ob-metaflow 2.13.2.1__py2.py3-none-any.whl → 2.13.6.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow might be problematic. Click here for more details.
- metaflow/cards.py +1 -0
- metaflow/cli.py +5 -0
- metaflow/cli_components/run_cmds.py +2 -0
- metaflow/cli_components/step_cmd.py +0 -13
- metaflow/decorators.py +2 -2
- metaflow/parameters.py +3 -1
- metaflow/plugins/__init__.py +5 -0
- metaflow/plugins/argo/argo_workflows.py +24 -8
- metaflow/plugins/cards/card_cli.py +7 -2
- metaflow/plugins/cards/card_decorator.py +53 -20
- metaflow/plugins/cards/card_modules/basic.py +56 -5
- metaflow/plugins/cards/card_modules/card.py +16 -1
- metaflow/plugins/cards/card_modules/components.py +64 -16
- metaflow/plugins/cards/card_modules/main.js +27 -25
- metaflow/plugins/cards/card_modules/test_cards.py +4 -4
- metaflow/plugins/cards/component_serializer.py +1 -1
- metaflow/plugins/events_decorator.py +120 -149
- metaflow/plugins/kubernetes/kubernetes.py +0 -9
- metaflow/plugins/kubernetes/kubernetes_cli.py +1 -1
- metaflow/plugins/kubernetes/kubernetes_decorator.py +8 -0
- metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
- metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
- metaflow/plugins/project_decorator.py +33 -5
- metaflow/user_configs/config_parameters.py +23 -9
- metaflow/version.py +1 -1
- {ob_metaflow-2.13.2.1.dist-info → ob_metaflow-2.13.6.1.dist-info}/METADATA +11 -3
- {ob_metaflow-2.13.2.1.dist-info → ob_metaflow-2.13.6.1.dist-info}/RECORD +31 -29
- {ob_metaflow-2.13.2.1.dist-info → ob_metaflow-2.13.6.1.dist-info}/WHEEL +1 -1
- {ob_metaflow-2.13.2.1.dist-info → ob_metaflow-2.13.6.1.dist-info}/LICENSE +0 -0
- {ob_metaflow-2.13.2.1.dist-info → ob_metaflow-2.13.6.1.dist-info}/entry_points.txt +0 -0
- {ob_metaflow-2.13.2.1.dist-info → ob_metaflow-2.13.6.1.dist-info}/top_level.txt +0 -0
|
@@ -38,7 +38,7 @@ class TestEditableCard(MetaflowCard):
|
|
|
38
38
|
|
|
39
39
|
ALLOW_USER_COMPONENTS = True
|
|
40
40
|
|
|
41
|
-
def __init__(self,
|
|
41
|
+
def __init__(self, components=[], **kwargs):
|
|
42
42
|
self._components = components
|
|
43
43
|
|
|
44
44
|
def render(self, task):
|
|
@@ -52,7 +52,7 @@ class TestEditableCard2(MetaflowCard):
|
|
|
52
52
|
|
|
53
53
|
ALLOW_USER_COMPONENTS = True
|
|
54
54
|
|
|
55
|
-
def __init__(self,
|
|
55
|
+
def __init__(self, components=[], **kwargs):
|
|
56
56
|
self._components = components
|
|
57
57
|
|
|
58
58
|
def render(self, task):
|
|
@@ -64,7 +64,7 @@ class TestNonEditableCard(MetaflowCard):
|
|
|
64
64
|
|
|
65
65
|
seperator = "$&#!!@*"
|
|
66
66
|
|
|
67
|
-
def __init__(self,
|
|
67
|
+
def __init__(self, components=[], **kwargs):
|
|
68
68
|
self._components = components
|
|
69
69
|
|
|
70
70
|
def render(self, task):
|
|
@@ -193,7 +193,7 @@ class TestRefreshComponentCard(MetaflowCard):
|
|
|
193
193
|
|
|
194
194
|
type = "test_component_refresh_card"
|
|
195
195
|
|
|
196
|
-
def __init__(self,
|
|
196
|
+
def __init__(self, components=[], **kwargs):
|
|
197
197
|
self._components = components
|
|
198
198
|
|
|
199
199
|
def render(self, task) -> str:
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
from .card_modules import MetaflowCardComponent
|
|
2
|
+
from .card_modules.card import create_component_id
|
|
2
3
|
from .card_modules.basic import ErrorComponent, SectionComponent
|
|
3
4
|
from .card_modules.components import (
|
|
4
5
|
UserComponent,
|
|
5
|
-
create_component_id,
|
|
6
6
|
StubComponent,
|
|
7
7
|
)
|
|
8
8
|
from .exception import ComponentOverwriteNotSupportedException
|
|
@@ -362,11 +362,7 @@ class TriggerOnFinishDecorator(FlowDecorator):
|
|
|
362
362
|
"""
|
|
363
363
|
|
|
364
364
|
name = "trigger_on_finish"
|
|
365
|
-
|
|
366
|
-
"flow": None, # flow_name or project_flow_name
|
|
367
|
-
"flows": [], # flow_names or project_flow_names
|
|
368
|
-
"options": {},
|
|
369
|
-
}
|
|
365
|
+
|
|
370
366
|
options = {
|
|
371
367
|
"trigger": dict(
|
|
372
368
|
multiple=True,
|
|
@@ -374,6 +370,14 @@ class TriggerOnFinishDecorator(FlowDecorator):
|
|
|
374
370
|
help="Specify run pathspec for testing @trigger_on_finish locally.",
|
|
375
371
|
),
|
|
376
372
|
}
|
|
373
|
+
defaults = {
|
|
374
|
+
"flow": None, # flow_name or project_flow_name
|
|
375
|
+
"flows": [], # flow_names or project_flow_names
|
|
376
|
+
"options": {},
|
|
377
|
+
# Re-enable if you want to support TL options directly in the decorator like
|
|
378
|
+
# for @project decorator
|
|
379
|
+
# **{k: v["default"] for k, v in options.items()},
|
|
380
|
+
}
|
|
377
381
|
|
|
378
382
|
def flow_init(
|
|
379
383
|
self,
|
|
@@ -394,111 +398,23 @@ class TriggerOnFinishDecorator(FlowDecorator):
|
|
|
394
398
|
)
|
|
395
399
|
elif self.attributes["flow"]:
|
|
396
400
|
# flow supports the format @trigger_on_finish(flow='FooFlow')
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
{
|
|
400
|
-
"fq_name": self.attributes["flow"],
|
|
401
|
-
}
|
|
402
|
-
)
|
|
403
|
-
elif isinstance(self.attributes["flow"], dict):
|
|
404
|
-
if "name" not in self.attributes["flow"]:
|
|
405
|
-
raise MetaflowException(
|
|
406
|
-
"The *flow* attribute for *@trigger_on_finish* is missing the "
|
|
407
|
-
"*name* key."
|
|
408
|
-
)
|
|
409
|
-
flow_name = self.attributes["flow"]["name"]
|
|
410
|
-
|
|
411
|
-
if not is_stringish(flow_name) or "." in flow_name:
|
|
412
|
-
raise MetaflowException(
|
|
413
|
-
"The *name* attribute of the *flow* is not a valid string"
|
|
414
|
-
)
|
|
415
|
-
result = {"fq_name": flow_name}
|
|
416
|
-
if "project" in self.attributes["flow"]:
|
|
417
|
-
if is_stringish(self.attributes["flow"]["project"]):
|
|
418
|
-
result["project"] = self.attributes["flow"]["project"]
|
|
419
|
-
else:
|
|
420
|
-
raise MetaflowException(
|
|
421
|
-
"The *project* attribute of the *flow* is not a string"
|
|
422
|
-
)
|
|
423
|
-
if "project_branch" in self.attributes["flow"]:
|
|
424
|
-
if is_stringish(self.attributes["flow"]["project_branch"]):
|
|
425
|
-
result["branch"] = self.attributes["flow"]["project_branch"]
|
|
426
|
-
else:
|
|
427
|
-
raise MetaflowException(
|
|
428
|
-
"The *project_branch* attribute of the *flow* is not a string"
|
|
429
|
-
)
|
|
430
|
-
self.triggers.append(result)
|
|
431
|
-
elif callable(self.attributes["flow"]) and not isinstance(
|
|
401
|
+
flow = self.attributes["flow"]
|
|
402
|
+
if callable(flow) and not isinstance(
|
|
432
403
|
self.attributes["flow"], DeployTimeField
|
|
433
404
|
):
|
|
434
|
-
trig = DeployTimeField(
|
|
435
|
-
"fq_name", [str, dict], None, self.attributes["flow"], False
|
|
436
|
-
)
|
|
405
|
+
trig = DeployTimeField("fq_name", [str, dict], None, flow, False)
|
|
437
406
|
self.triggers.append(trig)
|
|
438
407
|
else:
|
|
439
|
-
|
|
440
|
-
"Incorrect type for *flow* attribute in *@trigger_on_finish* "
|
|
441
|
-
" decorator. Supported type is string or Dict[str, str] - \n"
|
|
442
|
-
"@trigger_on_finish(flow='FooFlow') or "
|
|
443
|
-
"@trigger_on_finish(flow={'name':'FooFlow', 'project_branch': 'branch'})"
|
|
444
|
-
)
|
|
408
|
+
self.triggers.extend(self._parse_static_triggers([flow]))
|
|
445
409
|
elif self.attributes["flows"]:
|
|
446
410
|
# flows attribute supports the following formats -
|
|
447
411
|
# 1. flows=['FooFlow', 'BarFlow']
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
self.triggers.append(
|
|
452
|
-
{
|
|
453
|
-
"fq_name": flow,
|
|
454
|
-
}
|
|
455
|
-
)
|
|
456
|
-
elif isinstance(flow, dict):
|
|
457
|
-
if "name" not in flow:
|
|
458
|
-
raise MetaflowException(
|
|
459
|
-
"One or more flows in the *flows* attribute for "
|
|
460
|
-
"*@trigger_on_finish* is missing the "
|
|
461
|
-
"*name* key."
|
|
462
|
-
)
|
|
463
|
-
flow_name = flow["name"]
|
|
464
|
-
|
|
465
|
-
if not is_stringish(flow_name) or "." in flow_name:
|
|
466
|
-
raise MetaflowException(
|
|
467
|
-
"The *name* attribute '%s' is not a valid string"
|
|
468
|
-
% str(flow_name)
|
|
469
|
-
)
|
|
470
|
-
result = {"fq_name": flow_name}
|
|
471
|
-
if "project" in flow:
|
|
472
|
-
if is_stringish(flow["project"]):
|
|
473
|
-
result["project"] = flow["project"]
|
|
474
|
-
else:
|
|
475
|
-
raise MetaflowException(
|
|
476
|
-
"The *project* attribute of the *flow* '%s' is not "
|
|
477
|
-
"a string" % flow_name
|
|
478
|
-
)
|
|
479
|
-
if "project_branch" in flow:
|
|
480
|
-
if is_stringish(flow["project_branch"]):
|
|
481
|
-
result["branch"] = flow["project_branch"]
|
|
482
|
-
else:
|
|
483
|
-
raise MetaflowException(
|
|
484
|
-
"The *project_branch* attribute of the *flow* %s "
|
|
485
|
-
"is not a string" % flow_name
|
|
486
|
-
)
|
|
487
|
-
self.triggers.append(result)
|
|
488
|
-
else:
|
|
489
|
-
raise MetaflowException(
|
|
490
|
-
"One or more flows in *flows* attribute in "
|
|
491
|
-
"*@trigger_on_finish* decorator have an incorrect type. "
|
|
492
|
-
"Supported type is string or Dict[str, str]- \n"
|
|
493
|
-
"@trigger_on_finish(flows=['FooFlow', 'BarFlow']"
|
|
494
|
-
)
|
|
495
|
-
elif callable(self.attributes["flows"]) and not isinstance(
|
|
496
|
-
self.attributes["flows"], DeployTimeField
|
|
497
|
-
):
|
|
498
|
-
trig = DeployTimeField(
|
|
499
|
-
"flows", list, None, self.attributes["flows"], False
|
|
500
|
-
)
|
|
412
|
+
flows = self.attributes["flows"]
|
|
413
|
+
if callable(flows) and not isinstance(flows, DeployTimeField):
|
|
414
|
+
trig = DeployTimeField("flows", list, None, flows, False)
|
|
501
415
|
self.triggers.append(trig)
|
|
416
|
+
elif isinstance(flows, list):
|
|
417
|
+
self.triggers.extend(self._parse_static_triggers(flows))
|
|
502
418
|
else:
|
|
503
419
|
raise MetaflowException(
|
|
504
420
|
"Incorrect type for *flows* attribute in *@trigger_on_finish* "
|
|
@@ -515,37 +431,48 @@ class TriggerOnFinishDecorator(FlowDecorator):
|
|
|
515
431
|
for trigger in self.triggers:
|
|
516
432
|
if isinstance(trigger, DeployTimeField):
|
|
517
433
|
continue
|
|
518
|
-
|
|
519
|
-
# fully qualified name is just the flow name
|
|
520
|
-
trigger["flow"] = trigger["fq_name"]
|
|
521
|
-
elif trigger["fq_name"].count(".") >= 2:
|
|
522
|
-
# fully qualified name is of the format - project.branch.flow_name
|
|
523
|
-
trigger["project"], tail = trigger["fq_name"].split(".", maxsplit=1)
|
|
524
|
-
trigger["branch"], trigger["flow"] = tail.rsplit(".", maxsplit=1)
|
|
525
|
-
else:
|
|
526
|
-
raise MetaflowException(
|
|
527
|
-
"Incorrect format for *flow* in *@trigger_on_finish* "
|
|
528
|
-
"decorator. Specify either just the *flow_name* or a fully "
|
|
529
|
-
"qualified name like *project_name.branch_name.flow_name*."
|
|
530
|
-
)
|
|
531
|
-
# TODO: Also sanity check project and branch names
|
|
532
|
-
if not re.match(r"^[A-Za-z0-9_]+$", trigger["flow"]):
|
|
533
|
-
raise MetaflowException(
|
|
534
|
-
"Invalid flow name *%s* in *@trigger_on_finish* "
|
|
535
|
-
"decorator. Only alphanumeric characters and "
|
|
536
|
-
"underscores(_) are allowed." % trigger["flow"]
|
|
537
|
-
)
|
|
434
|
+
self._parse_fq_name(trigger)
|
|
538
435
|
|
|
539
436
|
self.options = self.attributes["options"]
|
|
540
437
|
|
|
541
438
|
# Handle scenario for local testing using --trigger.
|
|
439
|
+
|
|
440
|
+
# Re-enable this code if you want to support passing trigger directly in the
|
|
441
|
+
# decorator in a way similar to how production and branch are passed in the
|
|
442
|
+
# project decorator.
|
|
443
|
+
|
|
444
|
+
# # This is overkill since default is None for all options but adding this code
|
|
445
|
+
# # to make it safe if other non None-default options are added in the future.
|
|
446
|
+
# for op in options:
|
|
447
|
+
# if (
|
|
448
|
+
# op in self._user_defined_attributes
|
|
449
|
+
# and options[op] != self.defaults[op]
|
|
450
|
+
# and self.attributes[op] != options[op]
|
|
451
|
+
# ):
|
|
452
|
+
# # Exception if:
|
|
453
|
+
# # - the user provides a value in the attributes field
|
|
454
|
+
# # - AND the user provided a value in the command line (non default)
|
|
455
|
+
# # - AND the values are different
|
|
456
|
+
# # Note that this won't raise an error if the user provided the default
|
|
457
|
+
# # value in the command line and provided one in attribute but although
|
|
458
|
+
# # slightly inconsistent, it is not incorrect.
|
|
459
|
+
# raise MetaflowException(
|
|
460
|
+
# "You cannot pass %s as both a command-line argument and an attribute "
|
|
461
|
+
# "of the @trigger_on_finish decorator." % op
|
|
462
|
+
# )
|
|
463
|
+
|
|
464
|
+
# if "trigger" in self._user_defined_attributes:
|
|
465
|
+
# trigger_option = self.attributes["trigger"]
|
|
466
|
+
# else:
|
|
467
|
+
trigger_option = options["trigger"]
|
|
468
|
+
|
|
542
469
|
self._option_values = options
|
|
543
|
-
if
|
|
470
|
+
if trigger_option:
|
|
544
471
|
from metaflow import Run
|
|
545
472
|
from metaflow.events import Trigger
|
|
546
473
|
|
|
547
474
|
run_objs = []
|
|
548
|
-
for run_pathspec in
|
|
475
|
+
for run_pathspec in trigger_option:
|
|
549
476
|
if len(run_pathspec.split("/")) != 2:
|
|
550
477
|
raise MetaflowException(
|
|
551
478
|
"Incorrect format for run pathspec for *--trigger*. "
|
|
@@ -559,9 +486,67 @@ class TriggerOnFinishDecorator(FlowDecorator):
|
|
|
559
486
|
run_objs.append(run_obj)
|
|
560
487
|
current._update_env({"trigger": Trigger.from_runs(run_objs)})
|
|
561
488
|
|
|
489
|
+
@staticmethod
|
|
490
|
+
def _parse_static_triggers(flows):
|
|
491
|
+
results = []
|
|
492
|
+
for flow in flows:
|
|
493
|
+
if is_stringish(flow):
|
|
494
|
+
results.append(
|
|
495
|
+
{
|
|
496
|
+
"fq_name": flow,
|
|
497
|
+
}
|
|
498
|
+
)
|
|
499
|
+
elif isinstance(flow, dict):
|
|
500
|
+
if "name" not in flow:
|
|
501
|
+
if len(flows) > 1:
|
|
502
|
+
raise MetaflowException(
|
|
503
|
+
"One or more flows in the *flows* attribute for "
|
|
504
|
+
"*@trigger_on_finish* is missing the "
|
|
505
|
+
"*name* key."
|
|
506
|
+
)
|
|
507
|
+
raise MetaflowException(
|
|
508
|
+
"The *flow* attribute for *@trigger_on_finish* is missing the "
|
|
509
|
+
"*name* key."
|
|
510
|
+
)
|
|
511
|
+
flow_name = flow["name"]
|
|
512
|
+
|
|
513
|
+
if not is_stringish(flow_name) or "." in flow_name:
|
|
514
|
+
raise MetaflowException(
|
|
515
|
+
f"The *name* attribute of the *flow* {flow_name} is not a valid string"
|
|
516
|
+
)
|
|
517
|
+
result = {"fq_name": flow_name}
|
|
518
|
+
if "project" in flow:
|
|
519
|
+
if is_stringish(flow["project"]):
|
|
520
|
+
result["project"] = flow["project"]
|
|
521
|
+
else:
|
|
522
|
+
raise MetaflowException(
|
|
523
|
+
f"The *project* attribute of the *flow* {flow_name} is not a string"
|
|
524
|
+
)
|
|
525
|
+
if "project_branch" in flow:
|
|
526
|
+
if is_stringish(flow["project_branch"]):
|
|
527
|
+
result["branch"] = flow["project_branch"]
|
|
528
|
+
else:
|
|
529
|
+
raise MetaflowException(
|
|
530
|
+
f"The *project_branch* attribute of the *flow* {flow_name} is not a string"
|
|
531
|
+
)
|
|
532
|
+
results.append(result)
|
|
533
|
+
else:
|
|
534
|
+
if len(flows) > 1:
|
|
535
|
+
raise MetaflowException(
|
|
536
|
+
"One or more flows in the *flows* attribute for "
|
|
537
|
+
"*@trigger_on_finish* decorator have an incorrect type. "
|
|
538
|
+
"Supported type is string or Dict[str, str]- \n"
|
|
539
|
+
"@trigger_on_finish(flows=['FooFlow', 'BarFlow']"
|
|
540
|
+
)
|
|
541
|
+
raise MetaflowException(
|
|
542
|
+
"Incorrect type for *flow* attribute in *@trigger_on_finish* "
|
|
543
|
+
" decorator. Supported type is string or Dict[str, str] - \n"
|
|
544
|
+
"@trigger_on_finish(flow='FooFlow') or "
|
|
545
|
+
"@trigger_on_finish(flow={'name':'FooFlow', 'project_branch': 'branch'})"
|
|
546
|
+
)
|
|
547
|
+
return results
|
|
548
|
+
|
|
562
549
|
def _parse_fq_name(self, trigger):
|
|
563
|
-
if isinstance(trigger, DeployTimeField):
|
|
564
|
-
trigger["fq_name"] = deploy_time_eval(trigger["fq_name"])
|
|
565
550
|
if trigger["fq_name"].count(".") == 0:
|
|
566
551
|
# fully qualified name is just the flow name
|
|
567
552
|
trigger["flow"] = trigger["fq_name"]
|
|
@@ -581,32 +566,18 @@ class TriggerOnFinishDecorator(FlowDecorator):
|
|
|
581
566
|
"decorator. Only alphanumeric characters and "
|
|
582
567
|
"underscores(_) are allowed." % trigger["flow"]
|
|
583
568
|
)
|
|
584
|
-
return trigger
|
|
585
569
|
|
|
586
570
|
def format_deploytime_value(self):
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
deploy_value = deploy_time_eval(trigger)
|
|
592
|
-
if isinstance(deploy_value, list):
|
|
593
|
-
self.triggers = deploy_value
|
|
571
|
+
if len(self.triggers) == 1 and isinstance(self.triggers[0], DeployTimeField):
|
|
572
|
+
deploy_value = deploy_time_eval(self.triggers[0])
|
|
573
|
+
if isinstance(deploy_value, list):
|
|
574
|
+
self.triggers = deploy_value
|
|
594
575
|
else:
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
trigger = deploy_time_eval(trigger)
|
|
601
|
-
if isinstance(trigger, dict):
|
|
602
|
-
trigger["fq_name"] = trigger.get("name")
|
|
603
|
-
trigger["project"] = trigger.get("project")
|
|
604
|
-
trigger["branch"] = trigger.get("project_branch")
|
|
605
|
-
# We also added this bc it won't be formatted yet
|
|
606
|
-
if isinstance(trigger, str):
|
|
607
|
-
trigger = {"fq_name": trigger}
|
|
608
|
-
trigger = self._parse_fq_name(trigger)
|
|
609
|
-
self.triggers[self.triggers.index(old_trig)] = trigger
|
|
576
|
+
self.triggers = [deploy_value]
|
|
577
|
+
triggers = self._parse_static_triggers(self.triggers)
|
|
578
|
+
for trigger in triggers:
|
|
579
|
+
self._parse_fq_name(trigger)
|
|
580
|
+
self.triggers = triggers
|
|
610
581
|
|
|
611
582
|
def get_top_level_options(self):
|
|
612
583
|
return list(self._option_values.items())
|
|
@@ -685,15 +685,6 @@ class Kubernetes(object):
|
|
|
685
685
|
for name, value in system_annotations.items():
|
|
686
686
|
job.annotation(name, value)
|
|
687
687
|
|
|
688
|
-
(
|
|
689
|
-
job.annotation("metaflow/run_id", run_id)
|
|
690
|
-
.annotation("metaflow/step_name", step_name)
|
|
691
|
-
.annotation("metaflow/task_id", task_id)
|
|
692
|
-
.annotation("metaflow/attempt", attempt)
|
|
693
|
-
.label("app.kubernetes.io/name", "metaflow-task")
|
|
694
|
-
.label("app.kubernetes.io/part-of", "metaflow")
|
|
695
|
-
)
|
|
696
|
-
|
|
697
688
|
return job
|
|
698
689
|
|
|
699
690
|
def create_k8sjob(self, job):
|
|
@@ -190,7 +190,7 @@ def step(
|
|
|
190
190
|
executable = ctx.obj.environment.executable(step_name, executable)
|
|
191
191
|
|
|
192
192
|
# Set environment
|
|
193
|
-
env = {}
|
|
193
|
+
env = {"METAFLOW_FLOW_FILENAME": os.path.basename(sys.argv[0])}
|
|
194
194
|
env_deco = [deco for deco in node.decorators if deco.name == "environment"]
|
|
195
195
|
if env_deco:
|
|
196
196
|
env = env_deco[0].attributes["vars"]
|
|
@@ -562,6 +562,13 @@ class KubernetesDecorator(StepDecorator):
|
|
|
562
562
|
self._save_logs_sidecar = Sidecar("save_logs_periodically")
|
|
563
563
|
self._save_logs_sidecar.start()
|
|
564
564
|
|
|
565
|
+
# Start spot termination monitor sidecar.
|
|
566
|
+
current._update_env(
|
|
567
|
+
{"spot_termination_notice": "/tmp/spot_termination_notice"}
|
|
568
|
+
)
|
|
569
|
+
self._spot_monitor_sidecar = Sidecar("spot_termination_monitor")
|
|
570
|
+
self._spot_monitor_sidecar.start()
|
|
571
|
+
|
|
565
572
|
num_parallel = None
|
|
566
573
|
if hasattr(flow, "_parallel_ubf_iter"):
|
|
567
574
|
num_parallel = flow._parallel_ubf_iter.num_parallel
|
|
@@ -620,6 +627,7 @@ class KubernetesDecorator(StepDecorator):
|
|
|
620
627
|
|
|
621
628
|
try:
|
|
622
629
|
self._save_logs_sidecar.terminate()
|
|
630
|
+
self._spot_monitor_sidecar.terminate()
|
|
623
631
|
except:
|
|
624
632
|
# Best effort kill
|
|
625
633
|
pass
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
from metaflow._vendor import click
|
|
2
|
+
from datetime import datetime, timezone
|
|
3
|
+
from metaflow.tagging_util import validate_tags
|
|
4
|
+
from metaflow.metadata_provider import MetaDatum
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@click.group()
|
|
8
|
+
def cli():
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@cli.group(help="Commands related to spot metadata.")
|
|
13
|
+
def spot_metadata():
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@spot_metadata.command(help="Record spot termination metadata for a task.")
|
|
18
|
+
@click.option(
|
|
19
|
+
"--run-id",
|
|
20
|
+
required=True,
|
|
21
|
+
help="Run ID for which metadata is to be recorded.",
|
|
22
|
+
)
|
|
23
|
+
@click.option(
|
|
24
|
+
"--step-name",
|
|
25
|
+
required=True,
|
|
26
|
+
help="Step Name for which metadata is to be recorded.",
|
|
27
|
+
)
|
|
28
|
+
@click.option(
|
|
29
|
+
"--task-id",
|
|
30
|
+
required=True,
|
|
31
|
+
help="Task ID for which metadata is to be recorded.",
|
|
32
|
+
)
|
|
33
|
+
@click.option(
|
|
34
|
+
"--termination-notice-time",
|
|
35
|
+
required=True,
|
|
36
|
+
help="Spot termination notice time.",
|
|
37
|
+
)
|
|
38
|
+
@click.option(
|
|
39
|
+
"--tag",
|
|
40
|
+
"tags",
|
|
41
|
+
multiple=True,
|
|
42
|
+
required=False,
|
|
43
|
+
default=None,
|
|
44
|
+
help="List of tags.",
|
|
45
|
+
)
|
|
46
|
+
@click.pass_obj
|
|
47
|
+
def record(obj, run_id, step_name, task_id, termination_notice_time, tags=None):
|
|
48
|
+
validate_tags(tags)
|
|
49
|
+
|
|
50
|
+
tag_list = list(tags) if tags else []
|
|
51
|
+
|
|
52
|
+
entries = [
|
|
53
|
+
MetaDatum(
|
|
54
|
+
field="spot-termination-received-at",
|
|
55
|
+
value=datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
56
|
+
type="spot-termination-received-at",
|
|
57
|
+
tags=tag_list,
|
|
58
|
+
),
|
|
59
|
+
MetaDatum(
|
|
60
|
+
field="spot-termination-time",
|
|
61
|
+
value=termination_notice_time,
|
|
62
|
+
type="spot-termination-time",
|
|
63
|
+
tags=tag_list,
|
|
64
|
+
),
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
obj.metadata.register_metadata(
|
|
68
|
+
run_id=run_id, step_name=step_name, task_id=task_id, metadata=entries
|
|
69
|
+
)
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import time
|
|
4
|
+
import signal
|
|
5
|
+
import requests
|
|
6
|
+
import subprocess
|
|
7
|
+
from multiprocessing import Process
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
from metaflow.sidecar import MessageTypes
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SpotTerminationMonitorSidecar(object):
|
|
13
|
+
EC2_TYPE_URL = "http://169.254.169.254/latest/meta-data/instance-life-cycle"
|
|
14
|
+
METADATA_URL = "http://169.254.169.254/latest/meta-data/spot/termination-time"
|
|
15
|
+
TOKEN_URL = "http://169.254.169.254/latest/api/token"
|
|
16
|
+
POLL_INTERVAL = 5 # seconds
|
|
17
|
+
|
|
18
|
+
def __init__(self):
|
|
19
|
+
self.is_alive = True
|
|
20
|
+
self._process = None
|
|
21
|
+
self._token = None
|
|
22
|
+
self._token_expiry = 0
|
|
23
|
+
|
|
24
|
+
if self._is_aws_spot_instance():
|
|
25
|
+
self._process = Process(target=self._monitor_loop)
|
|
26
|
+
self._process.start()
|
|
27
|
+
|
|
28
|
+
def process_message(self, msg):
|
|
29
|
+
if msg.msg_type == MessageTypes.SHUTDOWN:
|
|
30
|
+
self.is_alive = False
|
|
31
|
+
if self._process:
|
|
32
|
+
self._process.terminate()
|
|
33
|
+
|
|
34
|
+
@classmethod
|
|
35
|
+
def get_worker(cls):
|
|
36
|
+
return cls
|
|
37
|
+
|
|
38
|
+
def _get_imds_token(self):
|
|
39
|
+
current_time = time.time()
|
|
40
|
+
if current_time >= self._token_expiry - 60: # Refresh 60s before expiry
|
|
41
|
+
try:
|
|
42
|
+
response = requests.put(
|
|
43
|
+
url=self.TOKEN_URL,
|
|
44
|
+
headers={"X-aws-ec2-metadata-token-ttl-seconds": "300"},
|
|
45
|
+
timeout=1,
|
|
46
|
+
)
|
|
47
|
+
if response.status_code == 200:
|
|
48
|
+
self._token = response.text
|
|
49
|
+
self._token_expiry = current_time + 240 # Slightly less than TTL
|
|
50
|
+
except requests.exceptions.RequestException:
|
|
51
|
+
pass
|
|
52
|
+
return self._token
|
|
53
|
+
|
|
54
|
+
def _make_ec2_request(self, url, timeout):
|
|
55
|
+
token = self._get_imds_token()
|
|
56
|
+
headers = {"X-aws-ec2-metadata-token": token} if token else {}
|
|
57
|
+
response = requests.get(url=url, headers=headers, timeout=timeout)
|
|
58
|
+
return response
|
|
59
|
+
|
|
60
|
+
def _is_aws_spot_instance(self):
|
|
61
|
+
try:
|
|
62
|
+
response = self._make_ec2_request(url=self.EC2_TYPE_URL, timeout=1)
|
|
63
|
+
return response.status_code == 200 and response.text == "spot"
|
|
64
|
+
except (requests.exceptions.RequestException, requests.exceptions.Timeout):
|
|
65
|
+
return False
|
|
66
|
+
|
|
67
|
+
def _monitor_loop(self):
|
|
68
|
+
while self.is_alive:
|
|
69
|
+
try:
|
|
70
|
+
response = self._make_ec2_request(url=self.METADATA_URL, timeout=1)
|
|
71
|
+
if response.status_code == 200:
|
|
72
|
+
termination_time = response.text
|
|
73
|
+
self._emit_termination_metadata(termination_time)
|
|
74
|
+
os.kill(os.getppid(), signal.SIGTERM)
|
|
75
|
+
break
|
|
76
|
+
except (requests.exceptions.RequestException, requests.exceptions.Timeout):
|
|
77
|
+
pass
|
|
78
|
+
time.sleep(self.POLL_INTERVAL)
|
|
79
|
+
|
|
80
|
+
def _emit_termination_metadata(self, termination_time):
|
|
81
|
+
flow_filename = os.getenv("METAFLOW_FLOW_FILENAME")
|
|
82
|
+
pathspec = os.getenv("MF_PATHSPEC")
|
|
83
|
+
_, run_id, step_name, task_id = pathspec.split("/")
|
|
84
|
+
retry_count = os.getenv("MF_ATTEMPT")
|
|
85
|
+
|
|
86
|
+
with open("/tmp/spot_termination_notice", "w") as fp:
|
|
87
|
+
fp.write(termination_time)
|
|
88
|
+
|
|
89
|
+
command = [
|
|
90
|
+
sys.executable,
|
|
91
|
+
f"/metaflow/{flow_filename}",
|
|
92
|
+
"spot-metadata",
|
|
93
|
+
"record",
|
|
94
|
+
"--run-id",
|
|
95
|
+
run_id,
|
|
96
|
+
"--step-name",
|
|
97
|
+
step_name,
|
|
98
|
+
"--task-id",
|
|
99
|
+
task_id,
|
|
100
|
+
"--termination-notice-time",
|
|
101
|
+
termination_time,
|
|
102
|
+
"--tag",
|
|
103
|
+
"attempt_id:{}".format(retry_count),
|
|
104
|
+
]
|
|
105
|
+
|
|
106
|
+
result = subprocess.run(command, capture_output=True, text=True)
|
|
107
|
+
|
|
108
|
+
if result.returncode != 0:
|
|
109
|
+
print(f"Failed to record spot termination metadata: {result.stderr}")
|
|
@@ -72,7 +72,6 @@ class ProjectDecorator(FlowDecorator):
|
|
|
72
72
|
"""
|
|
73
73
|
|
|
74
74
|
name = "project"
|
|
75
|
-
defaults = {"name": None}
|
|
76
75
|
|
|
77
76
|
options = {
|
|
78
77
|
"production": dict(
|
|
@@ -91,19 +90,48 @@ class ProjectDecorator(FlowDecorator):
|
|
|
91
90
|
),
|
|
92
91
|
}
|
|
93
92
|
|
|
93
|
+
defaults = {"name": None, **{k: v["default"] for k, v in options.items()}}
|
|
94
|
+
|
|
94
95
|
def flow_init(
|
|
95
96
|
self, flow, graph, environment, flow_datastore, metadata, logger, echo, options
|
|
96
97
|
):
|
|
97
98
|
self._option_values = options
|
|
98
99
|
project_name = self.attributes.get("name")
|
|
100
|
+
for op in options:
|
|
101
|
+
if (
|
|
102
|
+
op in self._user_defined_attributes
|
|
103
|
+
and options[op] != self.defaults[op]
|
|
104
|
+
and self.attributes[op] != options[op]
|
|
105
|
+
):
|
|
106
|
+
# Exception if:
|
|
107
|
+
# - the user provides a value in the attributes field
|
|
108
|
+
# - AND the user provided a value in the command line (non default)
|
|
109
|
+
# - AND the values are different
|
|
110
|
+
# Note that this won't raise an error if the user provided the default
|
|
111
|
+
# value in the command line and provided one in attribute but although
|
|
112
|
+
# slightly inconsistent, it is not incorrect.
|
|
113
|
+
raise MetaflowException(
|
|
114
|
+
"You cannot pass %s as both a command-line argument and an attribute "
|
|
115
|
+
"of the @project decorator." % op
|
|
116
|
+
)
|
|
117
|
+
if "branch" in self._user_defined_attributes:
|
|
118
|
+
project_branch = self.attributes["branch"]
|
|
119
|
+
else:
|
|
120
|
+
project_branch = options["branch"]
|
|
121
|
+
|
|
122
|
+
if "production" in self._user_defined_attributes:
|
|
123
|
+
project_production = self.attributes["production"]
|
|
124
|
+
else:
|
|
125
|
+
project_production = options["production"]
|
|
126
|
+
|
|
99
127
|
project_flow_name, branch_name = format_name(
|
|
100
128
|
flow.name,
|
|
101
129
|
project_name,
|
|
102
|
-
|
|
103
|
-
|
|
130
|
+
project_production,
|
|
131
|
+
project_branch,
|
|
104
132
|
get_username(),
|
|
105
133
|
)
|
|
106
|
-
is_user_branch =
|
|
134
|
+
is_user_branch = project_branch is None and not project_production
|
|
107
135
|
echo(
|
|
108
136
|
"Project: *%s*, Branch: *%s*" % (project_name, branch_name),
|
|
109
137
|
fg="magenta",
|
|
@@ -114,7 +142,7 @@ class ProjectDecorator(FlowDecorator):
|
|
|
114
142
|
"project_name": project_name,
|
|
115
143
|
"branch_name": branch_name,
|
|
116
144
|
"is_user_branch": is_user_branch,
|
|
117
|
-
"is_production":
|
|
145
|
+
"is_production": project_production,
|
|
118
146
|
"project_flow_name": project_flow_name,
|
|
119
147
|
}
|
|
120
148
|
)
|