metaflow 2.13.4__py2.py3-none-any.whl → 2.13.5__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,6 +16,7 @@ CLIS_DESC = [
16
16
  ("argo-workflows", ".argo.argo_workflows_cli.cli"),
17
17
  ("card", ".cards.card_cli.cli"),
18
18
  ("tag", ".tag_cli.cli"),
19
+ ("spot-metadata", ".kubernetes.spot_metadata_cli.cli"),
19
20
  ("logs", ".logs_cli.cli"),
20
21
  ]
21
22
 
@@ -104,6 +105,10 @@ SIDECARS_DESC = [
104
105
  "save_logs_periodically",
105
106
  "..mflog.save_logs_periodically.SaveLogsPeriodicallySidecar",
106
107
  ),
108
+ (
109
+ "spot_termination_monitor",
110
+ ".kubernetes.spot_monitor_sidecar.SpotTerminationMonitorSidecar",
111
+ ),
107
112
  ("heartbeat", "metaflow.metadata_provider.heartbeat.MetadataHeartBeat"),
108
113
  ]
109
114
 
@@ -1705,6 +1705,7 @@ class ArgoWorkflows(object):
1705
1705
  },
1706
1706
  **{
1707
1707
  # Some optional values for bookkeeping
1708
+ "METAFLOW_FLOW_FILENAME": os.path.basename(sys.argv[0]),
1708
1709
  "METAFLOW_FLOW_NAME": self.flow.name,
1709
1710
  "METAFLOW_STEP_NAME": node.name,
1710
1711
  "METAFLOW_RUN_ID": run_id,
@@ -190,7 +190,7 @@ def step(
190
190
  executable = ctx.obj.environment.executable(step_name, executable)
191
191
 
192
192
  # Set environment
193
- env = {}
193
+ env = {"METAFLOW_FLOW_FILENAME": os.path.basename(sys.argv[0])}
194
194
  env_deco = [deco for deco in node.decorators if deco.name == "environment"]
195
195
  if env_deco:
196
196
  env = env_deco[0].attributes["vars"]
@@ -547,6 +547,13 @@ class KubernetesDecorator(StepDecorator):
547
547
  self._save_logs_sidecar = Sidecar("save_logs_periodically")
548
548
  self._save_logs_sidecar.start()
549
549
 
550
+ # Start spot termination monitor sidecar.
551
+ current._update_env(
552
+ {"spot_termination_notice": "/tmp/spot_termination_notice"}
553
+ )
554
+ self._spot_monitor_sidecar = Sidecar("spot_termination_monitor")
555
+ self._spot_monitor_sidecar.start()
556
+
550
557
  num_parallel = None
551
558
  if hasattr(flow, "_parallel_ubf_iter"):
552
559
  num_parallel = flow._parallel_ubf_iter.num_parallel
@@ -605,6 +612,7 @@ class KubernetesDecorator(StepDecorator):
605
612
 
606
613
  try:
607
614
  self._save_logs_sidecar.terminate()
615
+ self._spot_monitor_sidecar.terminate()
608
616
  except:
609
617
  # Best effort kill
610
618
  pass
@@ -0,0 +1,69 @@
1
+ from metaflow._vendor import click
2
+ from datetime import datetime, timezone
3
+ from metaflow.tagging_util import validate_tags
4
+ from metaflow.metadata_provider import MetaDatum
5
+
6
+
7
+ @click.group()
8
+ def cli():
9
+ pass
10
+
11
+
12
+ @cli.group(help="Commands related to spot metadata.")
13
+ def spot_metadata():
14
+ pass
15
+
16
+
17
+ @spot_metadata.command(help="Record spot termination metadata for a task.")
18
+ @click.option(
19
+ "--run-id",
20
+ required=True,
21
+ help="Run ID for which metadata is to be recorded.",
22
+ )
23
+ @click.option(
24
+ "--step-name",
25
+ required=True,
26
+ help="Step Name for which metadata is to be recorded.",
27
+ )
28
+ @click.option(
29
+ "--task-id",
30
+ required=True,
31
+ help="Task ID for which metadata is to be recorded.",
32
+ )
33
+ @click.option(
34
+ "--termination-notice-time",
35
+ required=True,
36
+ help="Spot termination notice time.",
37
+ )
38
+ @click.option(
39
+ "--tag",
40
+ "tags",
41
+ multiple=True,
42
+ required=False,
43
+ default=None,
44
+ help="List of tags.",
45
+ )
46
+ @click.pass_obj
47
+ def record(obj, run_id, step_name, task_id, termination_notice_time, tags=None):
48
+ validate_tags(tags)
49
+
50
+ tag_list = list(tags) if tags else []
51
+
52
+ entries = [
53
+ MetaDatum(
54
+ field="spot-termination-received-at",
55
+ value=datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
56
+ type="spot-termination-received-at",
57
+ tags=tag_list,
58
+ ),
59
+ MetaDatum(
60
+ field="spot-termination-time",
61
+ value=termination_notice_time,
62
+ type="spot-termination-time",
63
+ tags=tag_list,
64
+ ),
65
+ ]
66
+
67
+ obj.metadata.register_metadata(
68
+ run_id=run_id, step_name=step_name, task_id=task_id, metadata=entries
69
+ )
@@ -0,0 +1,109 @@
1
+ import os
2
+ import sys
3
+ import time
4
+ import signal
5
+ import requests
6
+ import subprocess
7
+ from multiprocessing import Process
8
+ from datetime import datetime, timezone
9
+ from metaflow.sidecar import MessageTypes
10
+
11
+
12
+ class SpotTerminationMonitorSidecar(object):
13
+ EC2_TYPE_URL = "http://169.254.169.254/latest/meta-data/instance-life-cycle"
14
+ METADATA_URL = "http://169.254.169.254/latest/meta-data/spot/termination-time"
15
+ TOKEN_URL = "http://169.254.169.254/latest/api/token"
16
+ POLL_INTERVAL = 5 # seconds
17
+
18
+ def __init__(self):
19
+ self.is_alive = True
20
+ self._process = None
21
+ self._token = None
22
+ self._token_expiry = 0
23
+
24
+ if self._is_aws_spot_instance():
25
+ self._process = Process(target=self._monitor_loop)
26
+ self._process.start()
27
+
28
+ def process_message(self, msg):
29
+ if msg.msg_type == MessageTypes.SHUTDOWN:
30
+ self.is_alive = False
31
+ if self._process:
32
+ self._process.terminate()
33
+
34
+ @classmethod
35
+ def get_worker(cls):
36
+ return cls
37
+
38
+ def _get_imds_token(self):
39
+ current_time = time.time()
40
+ if current_time >= self._token_expiry - 60: # Refresh 60s before expiry
41
+ try:
42
+ response = requests.put(
43
+ url=self.TOKEN_URL,
44
+ headers={"X-aws-ec2-metadata-token-ttl-seconds": "300"},
45
+ timeout=1,
46
+ )
47
+ if response.status_code == 200:
48
+ self._token = response.text
49
+ self._token_expiry = current_time + 240 # Slightly less than TTL
50
+ except requests.exceptions.RequestException:
51
+ pass
52
+ return self._token
53
+
54
+ def _make_ec2_request(self, url, timeout):
55
+ token = self._get_imds_token()
56
+ headers = {"X-aws-ec2-metadata-token": token} if token else {}
57
+ response = requests.get(url=url, headers=headers, timeout=timeout)
58
+ return response
59
+
60
+ def _is_aws_spot_instance(self):
61
+ try:
62
+ response = self._make_ec2_request(url=self.EC2_TYPE_URL, timeout=1)
63
+ return response.status_code == 200 and response.text == "spot"
64
+ except (requests.exceptions.RequestException, requests.exceptions.Timeout):
65
+ return False
66
+
67
+ def _monitor_loop(self):
68
+ while self.is_alive:
69
+ try:
70
+ response = self._make_ec2_request(url=self.METADATA_URL, timeout=1)
71
+ if response.status_code == 200:
72
+ termination_time = response.text
73
+ self._emit_termination_metadata(termination_time)
74
+ os.kill(os.getppid(), signal.SIGTERM)
75
+ break
76
+ except (requests.exceptions.RequestException, requests.exceptions.Timeout):
77
+ pass
78
+ time.sleep(self.POLL_INTERVAL)
79
+
80
+ def _emit_termination_metadata(self, termination_time):
81
+ flow_filename = os.getenv("METAFLOW_FLOW_FILENAME")
82
+ pathspec = os.getenv("MF_PATHSPEC")
83
+ _, run_id, step_name, task_id = pathspec.split("/")
84
+ retry_count = os.getenv("MF_ATTEMPT")
85
+
86
+ with open("/tmp/spot_termination_notice", "w") as fp:
87
+ fp.write(termination_time)
88
+
89
+ command = [
90
+ sys.executable,
91
+ f"/metaflow/{flow_filename}",
92
+ "spot-metadata",
93
+ "record",
94
+ "--run-id",
95
+ run_id,
96
+ "--step-name",
97
+ step_name,
98
+ "--task-id",
99
+ task_id,
100
+ "--termination-notice-time",
101
+ termination_time,
102
+ "--tag",
103
+ "attempt_id:{}".format(retry_count),
104
+ ]
105
+
106
+ result = subprocess.run(command, capture_output=True, text=True)
107
+
108
+ if result.returncode != 0:
109
+ print(f"Failed to record spot termination metadata: {result.stderr}")
metaflow/version.py CHANGED
@@ -1 +1 @@
1
- metaflow_version = "2.13.4"
1
+ metaflow_version = "2.13.5"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: metaflow
3
- Version: 2.13.4
3
+ Version: 2.13.5
4
4
  Summary: Metaflow: More Data Science, Less Engineering
5
5
  Author: Metaflow Developers
6
6
  Author-email: help@metaflow.org
@@ -26,7 +26,7 @@ License-File: LICENSE
26
26
  Requires-Dist: requests
27
27
  Requires-Dist: boto3
28
28
  Provides-Extra: stubs
29
- Requires-Dist: metaflow-stubs==2.13.4; extra == "stubs"
29
+ Requires-Dist: metaflow-stubs==2.13.5; extra == "stubs"
30
30
  Dynamic: author
31
31
  Dynamic: author-email
32
32
  Dynamic: classifier
@@ -36,7 +36,7 @@ metaflow/tuple_util.py,sha256=_G5YIEhuugwJ_f6rrZoelMFak3DqAR2tt_5CapS1XTY,830
36
36
  metaflow/unbounded_foreach.py,sha256=p184WMbrMJ3xKYHwewj27ZhRUsSj_kw1jlye5gA9xJk,387
37
37
  metaflow/util.py,sha256=hKjHl6NYJkKBSU2tzdVbddfOX1zWK73T4GCO42A0XB4,14666
38
38
  metaflow/vendor.py,sha256=FchtA9tH22JM-eEtJ2c9FpUdMn8sSb1VHuQS56EcdZk,5139
39
- metaflow/version.py,sha256=drr-g04woqZf71pNYGX7pz8CSPausyn9M8cgyFIvKlE,28
39
+ metaflow/version.py,sha256=3DskisNYGb1X_79clFayeGjmYoYe6Nydsy4YBI-w0Ew,28
40
40
  metaflow/_vendor/__init__.py,sha256=y_CiwUD3l4eAKvTVDZeqgVujMy31cAM1qjAB-HfI-9s,353
41
41
  metaflow/_vendor/typing_extensions.py,sha256=0nUs5p1A_UrZigrAVBoOEM6TxU37zzPDUtiij1ZwpNc,110417
42
42
  metaflow/_vendor/zipp.py,sha256=ajztOH-9I7KA_4wqDYygtHa6xUBVZgFpmZ8FE74HHHI,8425
@@ -149,7 +149,7 @@ metaflow/mflog/mflog.py,sha256=VebXxqitOtNAs7VJixnNfziO_i_urG7bsJ5JiB5IXgY,4370
149
149
  metaflow/mflog/save_logs.py,sha256=ZBAF4BMukw4FMAC7odpr9OI2BC_2petPtDX0ca6srC4,2352
150
150
  metaflow/mflog/save_logs_periodically.py,sha256=2Uvk9hi-zlCqXxOQoXmmjH1SCugfw6eG6w70WgfI-ho,1256
151
151
  metaflow/mflog/tee.py,sha256=wTER15qeHuiRpCkOqo-bd-r3Gj-EVlf3IvWRCA4beW4,887
152
- metaflow/plugins/__init__.py,sha256=NXlwhFvhLYhAVhjCyRJZMIpTBBBJlzFupM7MgDKNYv0,7872
152
+ metaflow/plugins/__init__.py,sha256=Lr7i7ssJI_-czorJYjMFcRhGspqArobNoXUl9T1p3MY,8055
153
153
  metaflow/plugins/catch_decorator.py,sha256=UOM2taN_OL2RPpuJhwEOA9ZALm0-hHD0XS2Hn2GUev0,4061
154
154
  metaflow/plugins/debug_logger.py,sha256=mcF5HYzJ0NQmqCMjyVUk3iAP-heroHRIiVWQC6Ha2-I,879
155
155
  metaflow/plugins/debug_monitor.py,sha256=Md5X_sDOSssN9pt2D8YcaIjTK5JaQD55UAYTcF6xYF0,1099
@@ -181,7 +181,7 @@ metaflow/plugins/airflow/sensors/s3_sensor.py,sha256=iDReG-7FKnumrtQg-HY6cCUAAqN
181
181
  metaflow/plugins/argo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
182
182
  metaflow/plugins/argo/argo_client.py,sha256=PS_cYGnPw9h4X7TP_plObDH3clMw4reOsBLkkGPTd0Y,16282
183
183
  metaflow/plugins/argo/argo_events.py,sha256=_C1KWztVqgi3zuH57pInaE9OzABc2NnncC-zdwOMZ-w,5909
184
- metaflow/plugins/argo/argo_workflows.py,sha256=CdZoBZU8aSkza1wRw60VogJv9QdF4CLbfD4o8XPAf5o,175510
184
+ metaflow/plugins/argo/argo_workflows.py,sha256=HgreJyYibFiWScq9mvd0p0bM8NJPX49n0gdnhUVGoHI,175591
185
185
  metaflow/plugins/argo/argo_workflows_cli.py,sha256=11_8l4IrtkwviKsijInTZPt7YK5TZzClREnw_Cf4D5o,36706
186
186
  metaflow/plugins/argo/argo_workflows_decorator.py,sha256=ogCSBmwsC2C3eusydrgjuAJd4qK18f1sI4jJwA4Fd-o,7800
187
187
  metaflow/plugins/argo/argo_workflows_deployer.py,sha256=6kHxEnYXJwzNCM9swI8-0AckxtPWqwhZLerYkX8fxUM,4444
@@ -289,11 +289,13 @@ metaflow/plugins/gcp/includefile_support.py,sha256=OQO0IVWv4ObboL0VqEZwcDOyj9ORL
289
289
  metaflow/plugins/kubernetes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
290
290
  metaflow/plugins/kubernetes/kube_utils.py,sha256=jdFMGbEmIow-oli26v31W9CmbZXigx06b3D_xIobpk0,4140
291
291
  metaflow/plugins/kubernetes/kubernetes.py,sha256=7yaa1TL3TcC-Js6_kAi0HGFLbXesMw3WiKWPlN9yIxo,30028
292
- metaflow/plugins/kubernetes/kubernetes_cli.py,sha256=A6hI6KZ6sadPAOAyGhjwITMfnabr6voBXLRlDDxylcg,13874
292
+ metaflow/plugins/kubernetes/kubernetes_cli.py,sha256=o_o0BDEJFpTuga7txRmkvZH8OIuTb5kI4UaG6xbzf84,13929
293
293
  metaflow/plugins/kubernetes/kubernetes_client.py,sha256=tuvXP-QKpdeSmzVolB2R_TaacOr5DIb0j642eKcjsiM,6491
294
- metaflow/plugins/kubernetes/kubernetes_decorator.py,sha256=5NfrCZaGf2a2oQK4CeJExcizbojynCnEXzXqSN5Hoz0,30500
294
+ metaflow/plugins/kubernetes/kubernetes_decorator.py,sha256=OwIuB9MAeO_fmTv3_IurDnbL_szXH7et3TwEva4PCfc,30853
295
295
  metaflow/plugins/kubernetes/kubernetes_job.py,sha256=pO9ExyAVCDoAoWFn9oFcos2aa0MQk4_D61O-T4E10E8,31826
296
296
  metaflow/plugins/kubernetes/kubernetes_jobsets.py,sha256=9kU43eE5IvIa7y-POzBdxnJOazWsedKhwQ51Tu1HN_A,42471
297
+ metaflow/plugins/kubernetes/spot_metadata_cli.py,sha256=an0nWCxgflmqIPBCBrlb4m3DereDFFJBLt-KKhqcHc8,1670
298
+ metaflow/plugins/kubernetes/spot_monitor_sidecar.py,sha256=zrWU-smQwPnL6MBHmzTxWyEA00R6iKKQbhhy50xFwQ8,3832
297
299
  metaflow/plugins/metadata_providers/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
298
300
  metaflow/plugins/metadata_providers/local.py,sha256=9UAxe9caN6kU1lkSlIoJbRGgTqsMa62cBTnyMwhqiaA,22446
299
301
  metaflow/plugins/metadata_providers/service.py,sha256=NKZfFMamx6upP6aFRJfXlfYIhySgFNzz6kbp1yPD7LA,20222
@@ -358,9 +360,9 @@ metaflow/user_configs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
358
360
  metaflow/user_configs/config_decorators.py,sha256=Tj0H88UT8Q6pylXxHXgiA6cqnNlw4d3mR7M8J9g3ZUg,20139
359
361
  metaflow/user_configs/config_options.py,sha256=Knpiax_YGmYAdR3zKmaepN8puW1MyL9g6-eMGAkcylo,20942
360
362
  metaflow/user_configs/config_parameters.py,sha256=T0Zz18o9zKEV7mMcKotFWvXixhJpotLRBVrKx6ENErQ,15416
361
- metaflow-2.13.4.dist-info/LICENSE,sha256=nl_Lt5v9VvJ-5lWJDT4ddKAG-VZ-2IaLmbzpgYDz2hU,11343
362
- metaflow-2.13.4.dist-info/METADATA,sha256=PBmkd2ZuaamBlNTJCyBWZWWVO8xs8J1anqV4YV_P4kQ,6121
363
- metaflow-2.13.4.dist-info/WHEEL,sha256=9Hm2OB-j1QcCUq9Jguht7ayGIIZBRTdOXD1qg9cCgPM,109
364
- metaflow-2.13.4.dist-info/entry_points.txt,sha256=IKwTN1T3I5eJL3uo_vnkyxVffcgnRdFbKwlghZfn27k,57
365
- metaflow-2.13.4.dist-info/top_level.txt,sha256=v1pDHoWaSaKeuc5fKTRSfsXCKSdW1zvNVmvA-i0if3o,9
366
- metaflow-2.13.4.dist-info/RECORD,,
363
+ metaflow-2.13.5.dist-info/LICENSE,sha256=nl_Lt5v9VvJ-5lWJDT4ddKAG-VZ-2IaLmbzpgYDz2hU,11343
364
+ metaflow-2.13.5.dist-info/METADATA,sha256=8_zAjFu6yf75apsgNeoWmncpzpEhuji4C-JYe3w8HBg,6121
365
+ metaflow-2.13.5.dist-info/WHEEL,sha256=9Hm2OB-j1QcCUq9Jguht7ayGIIZBRTdOXD1qg9cCgPM,109
366
+ metaflow-2.13.5.dist-info/entry_points.txt,sha256=IKwTN1T3I5eJL3uo_vnkyxVffcgnRdFbKwlghZfn27k,57
367
+ metaflow-2.13.5.dist-info/top_level.txt,sha256=v1pDHoWaSaKeuc5fKTRSfsXCKSdW1zvNVmvA-i0if3o,9
368
+ metaflow-2.13.5.dist-info/RECORD,,