metaflow 2.13.4__py2.py3-none-any.whl → 2.13.5__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/plugins/__init__.py +5 -0
- metaflow/plugins/argo/argo_workflows.py +1 -0
- metaflow/plugins/kubernetes/kubernetes_cli.py +1 -1
- metaflow/plugins/kubernetes/kubernetes_decorator.py +8 -0
- metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
- metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
- metaflow/version.py +1 -1
- {metaflow-2.13.4.dist-info → metaflow-2.13.5.dist-info}/METADATA +2 -2
- {metaflow-2.13.4.dist-info → metaflow-2.13.5.dist-info}/RECORD +13 -11
- {metaflow-2.13.4.dist-info → metaflow-2.13.5.dist-info}/LICENSE +0 -0
- {metaflow-2.13.4.dist-info → metaflow-2.13.5.dist-info}/WHEEL +0 -0
- {metaflow-2.13.4.dist-info → metaflow-2.13.5.dist-info}/entry_points.txt +0 -0
- {metaflow-2.13.4.dist-info → metaflow-2.13.5.dist-info}/top_level.txt +0 -0
metaflow/plugins/__init__.py
CHANGED
@@ -16,6 +16,7 @@ CLIS_DESC = [
|
|
16
16
|
("argo-workflows", ".argo.argo_workflows_cli.cli"),
|
17
17
|
("card", ".cards.card_cli.cli"),
|
18
18
|
("tag", ".tag_cli.cli"),
|
19
|
+
("spot-metadata", ".kubernetes.spot_metadata_cli.cli"),
|
19
20
|
("logs", ".logs_cli.cli"),
|
20
21
|
]
|
21
22
|
|
@@ -104,6 +105,10 @@ SIDECARS_DESC = [
|
|
104
105
|
"save_logs_periodically",
|
105
106
|
"..mflog.save_logs_periodically.SaveLogsPeriodicallySidecar",
|
106
107
|
),
|
108
|
+
(
|
109
|
+
"spot_termination_monitor",
|
110
|
+
".kubernetes.spot_monitor_sidecar.SpotTerminationMonitorSidecar",
|
111
|
+
),
|
107
112
|
("heartbeat", "metaflow.metadata_provider.heartbeat.MetadataHeartBeat"),
|
108
113
|
]
|
109
114
|
|
@@ -1705,6 +1705,7 @@ class ArgoWorkflows(object):
|
|
1705
1705
|
},
|
1706
1706
|
**{
|
1707
1707
|
# Some optional values for bookkeeping
|
1708
|
+
"METAFLOW_FLOW_FILENAME": os.path.basename(sys.argv[0]),
|
1708
1709
|
"METAFLOW_FLOW_NAME": self.flow.name,
|
1709
1710
|
"METAFLOW_STEP_NAME": node.name,
|
1710
1711
|
"METAFLOW_RUN_ID": run_id,
|
@@ -190,7 +190,7 @@ def step(
|
|
190
190
|
executable = ctx.obj.environment.executable(step_name, executable)
|
191
191
|
|
192
192
|
# Set environment
|
193
|
-
env = {}
|
193
|
+
env = {"METAFLOW_FLOW_FILENAME": os.path.basename(sys.argv[0])}
|
194
194
|
env_deco = [deco for deco in node.decorators if deco.name == "environment"]
|
195
195
|
if env_deco:
|
196
196
|
env = env_deco[0].attributes["vars"]
|
@@ -547,6 +547,13 @@ class KubernetesDecorator(StepDecorator):
|
|
547
547
|
self._save_logs_sidecar = Sidecar("save_logs_periodically")
|
548
548
|
self._save_logs_sidecar.start()
|
549
549
|
|
550
|
+
# Start spot termination monitor sidecar.
|
551
|
+
current._update_env(
|
552
|
+
{"spot_termination_notice": "/tmp/spot_termination_notice"}
|
553
|
+
)
|
554
|
+
self._spot_monitor_sidecar = Sidecar("spot_termination_monitor")
|
555
|
+
self._spot_monitor_sidecar.start()
|
556
|
+
|
550
557
|
num_parallel = None
|
551
558
|
if hasattr(flow, "_parallel_ubf_iter"):
|
552
559
|
num_parallel = flow._parallel_ubf_iter.num_parallel
|
@@ -605,6 +612,7 @@ class KubernetesDecorator(StepDecorator):
|
|
605
612
|
|
606
613
|
try:
|
607
614
|
self._save_logs_sidecar.terminate()
|
615
|
+
self._spot_monitor_sidecar.terminate()
|
608
616
|
except:
|
609
617
|
# Best effort kill
|
610
618
|
pass
|
@@ -0,0 +1,69 @@
|
|
1
|
+
from metaflow._vendor import click
|
2
|
+
from datetime import datetime, timezone
|
3
|
+
from metaflow.tagging_util import validate_tags
|
4
|
+
from metaflow.metadata_provider import MetaDatum
|
5
|
+
|
6
|
+
|
7
|
+
@click.group()
|
8
|
+
def cli():
|
9
|
+
pass
|
10
|
+
|
11
|
+
|
12
|
+
@cli.group(help="Commands related to spot metadata.")
|
13
|
+
def spot_metadata():
|
14
|
+
pass
|
15
|
+
|
16
|
+
|
17
|
+
@spot_metadata.command(help="Record spot termination metadata for a task.")
|
18
|
+
@click.option(
|
19
|
+
"--run-id",
|
20
|
+
required=True,
|
21
|
+
help="Run ID for which metadata is to be recorded.",
|
22
|
+
)
|
23
|
+
@click.option(
|
24
|
+
"--step-name",
|
25
|
+
required=True,
|
26
|
+
help="Step Name for which metadata is to be recorded.",
|
27
|
+
)
|
28
|
+
@click.option(
|
29
|
+
"--task-id",
|
30
|
+
required=True,
|
31
|
+
help="Task ID for which metadata is to be recorded.",
|
32
|
+
)
|
33
|
+
@click.option(
|
34
|
+
"--termination-notice-time",
|
35
|
+
required=True,
|
36
|
+
help="Spot termination notice time.",
|
37
|
+
)
|
38
|
+
@click.option(
|
39
|
+
"--tag",
|
40
|
+
"tags",
|
41
|
+
multiple=True,
|
42
|
+
required=False,
|
43
|
+
default=None,
|
44
|
+
help="List of tags.",
|
45
|
+
)
|
46
|
+
@click.pass_obj
|
47
|
+
def record(obj, run_id, step_name, task_id, termination_notice_time, tags=None):
|
48
|
+
validate_tags(tags)
|
49
|
+
|
50
|
+
tag_list = list(tags) if tags else []
|
51
|
+
|
52
|
+
entries = [
|
53
|
+
MetaDatum(
|
54
|
+
field="spot-termination-received-at",
|
55
|
+
value=datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
56
|
+
type="spot-termination-received-at",
|
57
|
+
tags=tag_list,
|
58
|
+
),
|
59
|
+
MetaDatum(
|
60
|
+
field="spot-termination-time",
|
61
|
+
value=termination_notice_time,
|
62
|
+
type="spot-termination-time",
|
63
|
+
tags=tag_list,
|
64
|
+
),
|
65
|
+
]
|
66
|
+
|
67
|
+
obj.metadata.register_metadata(
|
68
|
+
run_id=run_id, step_name=step_name, task_id=task_id, metadata=entries
|
69
|
+
)
|
@@ -0,0 +1,109 @@
|
|
1
|
+
import os
|
2
|
+
import sys
|
3
|
+
import time
|
4
|
+
import signal
|
5
|
+
import requests
|
6
|
+
import subprocess
|
7
|
+
from multiprocessing import Process
|
8
|
+
from datetime import datetime, timezone
|
9
|
+
from metaflow.sidecar import MessageTypes
|
10
|
+
|
11
|
+
|
12
|
+
class SpotTerminationMonitorSidecar(object):
|
13
|
+
EC2_TYPE_URL = "http://169.254.169.254/latest/meta-data/instance-life-cycle"
|
14
|
+
METADATA_URL = "http://169.254.169.254/latest/meta-data/spot/termination-time"
|
15
|
+
TOKEN_URL = "http://169.254.169.254/latest/api/token"
|
16
|
+
POLL_INTERVAL = 5 # seconds
|
17
|
+
|
18
|
+
def __init__(self):
|
19
|
+
self.is_alive = True
|
20
|
+
self._process = None
|
21
|
+
self._token = None
|
22
|
+
self._token_expiry = 0
|
23
|
+
|
24
|
+
if self._is_aws_spot_instance():
|
25
|
+
self._process = Process(target=self._monitor_loop)
|
26
|
+
self._process.start()
|
27
|
+
|
28
|
+
def process_message(self, msg):
|
29
|
+
if msg.msg_type == MessageTypes.SHUTDOWN:
|
30
|
+
self.is_alive = False
|
31
|
+
if self._process:
|
32
|
+
self._process.terminate()
|
33
|
+
|
34
|
+
@classmethod
|
35
|
+
def get_worker(cls):
|
36
|
+
return cls
|
37
|
+
|
38
|
+
def _get_imds_token(self):
|
39
|
+
current_time = time.time()
|
40
|
+
if current_time >= self._token_expiry - 60: # Refresh 60s before expiry
|
41
|
+
try:
|
42
|
+
response = requests.put(
|
43
|
+
url=self.TOKEN_URL,
|
44
|
+
headers={"X-aws-ec2-metadata-token-ttl-seconds": "300"},
|
45
|
+
timeout=1,
|
46
|
+
)
|
47
|
+
if response.status_code == 200:
|
48
|
+
self._token = response.text
|
49
|
+
self._token_expiry = current_time + 240 # Slightly less than TTL
|
50
|
+
except requests.exceptions.RequestException:
|
51
|
+
pass
|
52
|
+
return self._token
|
53
|
+
|
54
|
+
def _make_ec2_request(self, url, timeout):
|
55
|
+
token = self._get_imds_token()
|
56
|
+
headers = {"X-aws-ec2-metadata-token": token} if token else {}
|
57
|
+
response = requests.get(url=url, headers=headers, timeout=timeout)
|
58
|
+
return response
|
59
|
+
|
60
|
+
def _is_aws_spot_instance(self):
|
61
|
+
try:
|
62
|
+
response = self._make_ec2_request(url=self.EC2_TYPE_URL, timeout=1)
|
63
|
+
return response.status_code == 200 and response.text == "spot"
|
64
|
+
except (requests.exceptions.RequestException, requests.exceptions.Timeout):
|
65
|
+
return False
|
66
|
+
|
67
|
+
def _monitor_loop(self):
|
68
|
+
while self.is_alive:
|
69
|
+
try:
|
70
|
+
response = self._make_ec2_request(url=self.METADATA_URL, timeout=1)
|
71
|
+
if response.status_code == 200:
|
72
|
+
termination_time = response.text
|
73
|
+
self._emit_termination_metadata(termination_time)
|
74
|
+
os.kill(os.getppid(), signal.SIGTERM)
|
75
|
+
break
|
76
|
+
except (requests.exceptions.RequestException, requests.exceptions.Timeout):
|
77
|
+
pass
|
78
|
+
time.sleep(self.POLL_INTERVAL)
|
79
|
+
|
80
|
+
def _emit_termination_metadata(self, termination_time):
|
81
|
+
flow_filename = os.getenv("METAFLOW_FLOW_FILENAME")
|
82
|
+
pathspec = os.getenv("MF_PATHSPEC")
|
83
|
+
_, run_id, step_name, task_id = pathspec.split("/")
|
84
|
+
retry_count = os.getenv("MF_ATTEMPT")
|
85
|
+
|
86
|
+
with open("/tmp/spot_termination_notice", "w") as fp:
|
87
|
+
fp.write(termination_time)
|
88
|
+
|
89
|
+
command = [
|
90
|
+
sys.executable,
|
91
|
+
f"/metaflow/{flow_filename}",
|
92
|
+
"spot-metadata",
|
93
|
+
"record",
|
94
|
+
"--run-id",
|
95
|
+
run_id,
|
96
|
+
"--step-name",
|
97
|
+
step_name,
|
98
|
+
"--task-id",
|
99
|
+
task_id,
|
100
|
+
"--termination-notice-time",
|
101
|
+
termination_time,
|
102
|
+
"--tag",
|
103
|
+
"attempt_id:{}".format(retry_count),
|
104
|
+
]
|
105
|
+
|
106
|
+
result = subprocess.run(command, capture_output=True, text=True)
|
107
|
+
|
108
|
+
if result.returncode != 0:
|
109
|
+
print(f"Failed to record spot termination metadata: {result.stderr}")
|
metaflow/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
metaflow_version = "2.13.
|
1
|
+
metaflow_version = "2.13.5"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: metaflow
|
3
|
-
Version: 2.13.
|
3
|
+
Version: 2.13.5
|
4
4
|
Summary: Metaflow: More Data Science, Less Engineering
|
5
5
|
Author: Metaflow Developers
|
6
6
|
Author-email: help@metaflow.org
|
@@ -26,7 +26,7 @@ License-File: LICENSE
|
|
26
26
|
Requires-Dist: requests
|
27
27
|
Requires-Dist: boto3
|
28
28
|
Provides-Extra: stubs
|
29
|
-
Requires-Dist: metaflow-stubs==2.13.
|
29
|
+
Requires-Dist: metaflow-stubs==2.13.5; extra == "stubs"
|
30
30
|
Dynamic: author
|
31
31
|
Dynamic: author-email
|
32
32
|
Dynamic: classifier
|
@@ -36,7 +36,7 @@ metaflow/tuple_util.py,sha256=_G5YIEhuugwJ_f6rrZoelMFak3DqAR2tt_5CapS1XTY,830
|
|
36
36
|
metaflow/unbounded_foreach.py,sha256=p184WMbrMJ3xKYHwewj27ZhRUsSj_kw1jlye5gA9xJk,387
|
37
37
|
metaflow/util.py,sha256=hKjHl6NYJkKBSU2tzdVbddfOX1zWK73T4GCO42A0XB4,14666
|
38
38
|
metaflow/vendor.py,sha256=FchtA9tH22JM-eEtJ2c9FpUdMn8sSb1VHuQS56EcdZk,5139
|
39
|
-
metaflow/version.py,sha256=
|
39
|
+
metaflow/version.py,sha256=3DskisNYGb1X_79clFayeGjmYoYe6Nydsy4YBI-w0Ew,28
|
40
40
|
metaflow/_vendor/__init__.py,sha256=y_CiwUD3l4eAKvTVDZeqgVujMy31cAM1qjAB-HfI-9s,353
|
41
41
|
metaflow/_vendor/typing_extensions.py,sha256=0nUs5p1A_UrZigrAVBoOEM6TxU37zzPDUtiij1ZwpNc,110417
|
42
42
|
metaflow/_vendor/zipp.py,sha256=ajztOH-9I7KA_4wqDYygtHa6xUBVZgFpmZ8FE74HHHI,8425
|
@@ -149,7 +149,7 @@ metaflow/mflog/mflog.py,sha256=VebXxqitOtNAs7VJixnNfziO_i_urG7bsJ5JiB5IXgY,4370
|
|
149
149
|
metaflow/mflog/save_logs.py,sha256=ZBAF4BMukw4FMAC7odpr9OI2BC_2petPtDX0ca6srC4,2352
|
150
150
|
metaflow/mflog/save_logs_periodically.py,sha256=2Uvk9hi-zlCqXxOQoXmmjH1SCugfw6eG6w70WgfI-ho,1256
|
151
151
|
metaflow/mflog/tee.py,sha256=wTER15qeHuiRpCkOqo-bd-r3Gj-EVlf3IvWRCA4beW4,887
|
152
|
-
metaflow/plugins/__init__.py,sha256=
|
152
|
+
metaflow/plugins/__init__.py,sha256=Lr7i7ssJI_-czorJYjMFcRhGspqArobNoXUl9T1p3MY,8055
|
153
153
|
metaflow/plugins/catch_decorator.py,sha256=UOM2taN_OL2RPpuJhwEOA9ZALm0-hHD0XS2Hn2GUev0,4061
|
154
154
|
metaflow/plugins/debug_logger.py,sha256=mcF5HYzJ0NQmqCMjyVUk3iAP-heroHRIiVWQC6Ha2-I,879
|
155
155
|
metaflow/plugins/debug_monitor.py,sha256=Md5X_sDOSssN9pt2D8YcaIjTK5JaQD55UAYTcF6xYF0,1099
|
@@ -181,7 +181,7 @@ metaflow/plugins/airflow/sensors/s3_sensor.py,sha256=iDReG-7FKnumrtQg-HY6cCUAAqN
|
|
181
181
|
metaflow/plugins/argo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
182
182
|
metaflow/plugins/argo/argo_client.py,sha256=PS_cYGnPw9h4X7TP_plObDH3clMw4reOsBLkkGPTd0Y,16282
|
183
183
|
metaflow/plugins/argo/argo_events.py,sha256=_C1KWztVqgi3zuH57pInaE9OzABc2NnncC-zdwOMZ-w,5909
|
184
|
-
metaflow/plugins/argo/argo_workflows.py,sha256=
|
184
|
+
metaflow/plugins/argo/argo_workflows.py,sha256=HgreJyYibFiWScq9mvd0p0bM8NJPX49n0gdnhUVGoHI,175591
|
185
185
|
metaflow/plugins/argo/argo_workflows_cli.py,sha256=11_8l4IrtkwviKsijInTZPt7YK5TZzClREnw_Cf4D5o,36706
|
186
186
|
metaflow/plugins/argo/argo_workflows_decorator.py,sha256=ogCSBmwsC2C3eusydrgjuAJd4qK18f1sI4jJwA4Fd-o,7800
|
187
187
|
metaflow/plugins/argo/argo_workflows_deployer.py,sha256=6kHxEnYXJwzNCM9swI8-0AckxtPWqwhZLerYkX8fxUM,4444
|
@@ -289,11 +289,13 @@ metaflow/plugins/gcp/includefile_support.py,sha256=OQO0IVWv4ObboL0VqEZwcDOyj9ORL
|
|
289
289
|
metaflow/plugins/kubernetes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
290
290
|
metaflow/plugins/kubernetes/kube_utils.py,sha256=jdFMGbEmIow-oli26v31W9CmbZXigx06b3D_xIobpk0,4140
|
291
291
|
metaflow/plugins/kubernetes/kubernetes.py,sha256=7yaa1TL3TcC-Js6_kAi0HGFLbXesMw3WiKWPlN9yIxo,30028
|
292
|
-
metaflow/plugins/kubernetes/kubernetes_cli.py,sha256=
|
292
|
+
metaflow/plugins/kubernetes/kubernetes_cli.py,sha256=o_o0BDEJFpTuga7txRmkvZH8OIuTb5kI4UaG6xbzf84,13929
|
293
293
|
metaflow/plugins/kubernetes/kubernetes_client.py,sha256=tuvXP-QKpdeSmzVolB2R_TaacOr5DIb0j642eKcjsiM,6491
|
294
|
-
metaflow/plugins/kubernetes/kubernetes_decorator.py,sha256=
|
294
|
+
metaflow/plugins/kubernetes/kubernetes_decorator.py,sha256=OwIuB9MAeO_fmTv3_IurDnbL_szXH7et3TwEva4PCfc,30853
|
295
295
|
metaflow/plugins/kubernetes/kubernetes_job.py,sha256=pO9ExyAVCDoAoWFn9oFcos2aa0MQk4_D61O-T4E10E8,31826
|
296
296
|
metaflow/plugins/kubernetes/kubernetes_jobsets.py,sha256=9kU43eE5IvIa7y-POzBdxnJOazWsedKhwQ51Tu1HN_A,42471
|
297
|
+
metaflow/plugins/kubernetes/spot_metadata_cli.py,sha256=an0nWCxgflmqIPBCBrlb4m3DereDFFJBLt-KKhqcHc8,1670
|
298
|
+
metaflow/plugins/kubernetes/spot_monitor_sidecar.py,sha256=zrWU-smQwPnL6MBHmzTxWyEA00R6iKKQbhhy50xFwQ8,3832
|
297
299
|
metaflow/plugins/metadata_providers/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
298
300
|
metaflow/plugins/metadata_providers/local.py,sha256=9UAxe9caN6kU1lkSlIoJbRGgTqsMa62cBTnyMwhqiaA,22446
|
299
301
|
metaflow/plugins/metadata_providers/service.py,sha256=NKZfFMamx6upP6aFRJfXlfYIhySgFNzz6kbp1yPD7LA,20222
|
@@ -358,9 +360,9 @@ metaflow/user_configs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
|
|
358
360
|
metaflow/user_configs/config_decorators.py,sha256=Tj0H88UT8Q6pylXxHXgiA6cqnNlw4d3mR7M8J9g3ZUg,20139
|
359
361
|
metaflow/user_configs/config_options.py,sha256=Knpiax_YGmYAdR3zKmaepN8puW1MyL9g6-eMGAkcylo,20942
|
360
362
|
metaflow/user_configs/config_parameters.py,sha256=T0Zz18o9zKEV7mMcKotFWvXixhJpotLRBVrKx6ENErQ,15416
|
361
|
-
metaflow-2.13.
|
362
|
-
metaflow-2.13.
|
363
|
-
metaflow-2.13.
|
364
|
-
metaflow-2.13.
|
365
|
-
metaflow-2.13.
|
366
|
-
metaflow-2.13.
|
363
|
+
metaflow-2.13.5.dist-info/LICENSE,sha256=nl_Lt5v9VvJ-5lWJDT4ddKAG-VZ-2IaLmbzpgYDz2hU,11343
|
364
|
+
metaflow-2.13.5.dist-info/METADATA,sha256=8_zAjFu6yf75apsgNeoWmncpzpEhuji4C-JYe3w8HBg,6121
|
365
|
+
metaflow-2.13.5.dist-info/WHEEL,sha256=9Hm2OB-j1QcCUq9Jguht7ayGIIZBRTdOXD1qg9cCgPM,109
|
366
|
+
metaflow-2.13.5.dist-info/entry_points.txt,sha256=IKwTN1T3I5eJL3uo_vnkyxVffcgnRdFbKwlghZfn27k,57
|
367
|
+
metaflow-2.13.5.dist-info/top_level.txt,sha256=v1pDHoWaSaKeuc5fKTRSfsXCKSdW1zvNVmvA-i0if3o,9
|
368
|
+
metaflow-2.13.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|