metaflow 2.12.33__py2.py3-none-any.whl → 2.12.35__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -377,6 +377,8 @@ KUBERNETES_PORT = from_conf("KUBERNETES_PORT", None)
377
377
  KUBERNETES_CPU = from_conf("KUBERNETES_CPU", None)
378
378
  KUBERNETES_MEMORY = from_conf("KUBERNETES_MEMORY", None)
379
379
  KUBERNETES_DISK = from_conf("KUBERNETES_DISK", None)
380
+ # Default kubernetes QoS class
381
+ KUBERNETES_QOS = from_conf("KUBERNETES_QOS", "burstable")
380
382
 
381
383
  ARGO_WORKFLOWS_KUBERNETES_SECRETS = from_conf("ARGO_WORKFLOWS_KUBERNETES_SECRETS", "")
382
384
  ARGO_WORKFLOWS_ENV_VARS_TO_SKIP = from_conf("ARGO_WORKFLOWS_ENV_VARS_TO_SKIP", "")
@@ -6,7 +6,18 @@ from tempfile import NamedTemporaryFile
6
6
  import time
7
7
  import metaflow.tracing as tracing
8
8
 
9
- from typing import Any, Callable, Iterable, Iterator, List, Optional
9
+ from typing import (
10
+ Any,
11
+ Callable,
12
+ Iterable,
13
+ Iterator,
14
+ List,
15
+ Optional,
16
+ NoReturn,
17
+ Tuple,
18
+ TypeVar,
19
+ Union,
20
+ )
10
21
 
11
22
  try:
12
23
  # Python 2
@@ -30,7 +41,13 @@ class MulticoreException(Exception):
30
41
  pass
31
42
 
32
43
 
33
- def _spawn(func, arg, dir):
44
+ _A = TypeVar("_A")
45
+ _R = TypeVar("_R")
46
+
47
+
48
+ def _spawn(
49
+ func: Callable[[_A], _R], arg: _A, dir: Optional[str]
50
+ ) -> Union[Tuple[int, str], NoReturn]:
34
51
  with NamedTemporaryFile(prefix="parallel_map_", dir=dir, delete=False) as tmpfile:
35
52
  output_file = tmpfile.name
36
53
 
@@ -63,11 +80,11 @@ def _spawn(func, arg, dir):
63
80
 
64
81
 
65
82
  def parallel_imap_unordered(
66
- func: Callable[[Any], Any],
67
- iterable: Iterable[Any],
83
+ func: Callable[[_A], _R],
84
+ iterable: Iterable[_A],
68
85
  max_parallel: Optional[int] = None,
69
86
  dir: Optional[str] = None,
70
- ) -> Iterator[Any]:
87
+ ) -> Iterator[_R]:
71
88
  """
72
89
  Parallelizes execution of a function using multiprocessing. The result
73
90
  order is not guaranteed.
@@ -79,9 +96,9 @@ def parallel_imap_unordered(
79
96
  iterable : Iterable[Any]
80
97
  Iterable over arguments to pass to fun
81
98
  max_parallel int, optional, default None
82
- Maximum parallelism. If not specified, uses the number of CPUs
99
+ Maximum parallelism. If not specified, it uses the number of CPUs
83
100
  dir : str, optional, default None
84
- If specified, directory where temporary files are created
101
+ If specified, it's the directory where temporary files are created
85
102
 
86
103
  Yields
87
104
  ------
@@ -121,14 +138,14 @@ def parallel_imap_unordered(
121
138
 
122
139
 
123
140
  def parallel_map(
124
- func: Callable[[Any], Any],
125
- iterable: Iterable[Any],
141
+ func: Callable[[_A], _R],
142
+ iterable: Iterable[_A],
126
143
  max_parallel: Optional[int] = None,
127
144
  dir: Optional[str] = None,
128
- ) -> List[Any]:
145
+ ) -> List[_R]:
129
146
  """
130
147
  Parallelizes execution of a function using multiprocessing. The result
131
- order is that of the arguments in `iterable`
148
+ order is that of the arguments in `iterable`.
132
149
 
133
150
  Parameters
134
151
  ----------
@@ -137,9 +154,9 @@ def parallel_map(
137
154
  iterable : Iterable[Any]
138
155
  Iterable over arguments to pass to fun
139
156
  max_parallel int, optional, default None
140
- Maximum parallelism. If not specified, uses the number of CPUs
157
+ Maximum parallelism. If not specified, it uses the number of CPUs
141
158
  dir : str, optional, default None
142
- If specified, directory where temporary files are created
159
+ If specified, it's the directory where temporary files are created
143
160
 
144
161
  Returns
145
162
  -------
@@ -155,4 +172,4 @@ def parallel_map(
155
172
  res = parallel_imap_unordered(
156
173
  wrapper, enumerate(iterable), max_parallel=max_parallel, dir=dir
157
174
  )
158
- return [r for idx, r in sorted(res)]
175
+ return [r for _, r in sorted(res)]
metaflow/parameters.py CHANGED
@@ -151,6 +151,7 @@ class DeployTimeField(object):
151
151
  return self._check_type(val, deploy_time)
152
152
 
153
153
  def _check_type(self, val, deploy_time):
154
+
154
155
  # it is easy to introduce a deploy-time function that accidentally
155
156
  # returns a value whose type is not compatible with what is defined
156
157
  # in Parameter. Let's catch those mistakes early here, instead of
@@ -158,7 +159,7 @@ class DeployTimeField(object):
158
159
 
159
160
  # note: this doesn't work with long in Python2 or types defined as
160
161
  # click types, e.g. click.INT
161
- TYPES = {bool: "bool", int: "int", float: "float", list: "list"}
162
+ TYPES = {bool: "bool", int: "int", float: "float", list: "list", dict: "dict"}
162
163
 
163
164
  msg = (
164
165
  "The value returned by the deploy-time function for "
@@ -166,7 +167,12 @@ class DeployTimeField(object):
166
167
  % (self.parameter_name, self.field)
167
168
  )
168
169
 
169
- if self.parameter_type in TYPES:
170
+ if isinstance(self.parameter_type, list):
171
+ if not any(isinstance(val, x) for x in self.parameter_type):
172
+ msg += "Expected one of the following %s." % TYPES[self.parameter_type]
173
+ raise ParameterFieldTypeMismatch(msg)
174
+ return str(val) if self.return_str else val
175
+ elif self.parameter_type in TYPES:
170
176
  if type(val) != self.parameter_type:
171
177
  msg += "Expected a %s." % TYPES[self.parameter_type]
172
178
  raise ParameterFieldTypeMismatch(msg)
@@ -46,6 +46,7 @@ from metaflow.parameters import (
46
46
  # TODO: Move chevron to _vendor
47
47
  from metaflow.plugins.cards.card_modules import chevron
48
48
  from metaflow.plugins.kubernetes.kubernetes import Kubernetes
49
+ from metaflow.plugins.kubernetes.kube_utils import qos_requests_and_limits
49
50
  from metaflow.plugins.timeout_decorator import get_run_time_limit_for_task
50
51
  from metaflow.util import compress_list, dict_to_cli_options, get_username
51
52
 
@@ -428,25 +429,25 @@ class Airflow(object):
428
429
  if k8s_deco.attributes["namespace"] is not None
429
430
  else "default"
430
431
  )
431
-
432
+ qos_requests, qos_limits = qos_requests_and_limits(
433
+ k8s_deco.attributes["qos"],
434
+ k8s_deco.attributes["cpu"],
435
+ k8s_deco.attributes["memory"],
436
+ k8s_deco.attributes["disk"],
437
+ )
432
438
  resources = dict(
433
- requests={
434
- "cpu": k8s_deco.attributes["cpu"],
435
- "memory": "%sM" % str(k8s_deco.attributes["memory"]),
436
- "ephemeral-storage": str(k8s_deco.attributes["disk"]),
437
- }
439
+ requests=qos_requests,
440
+ limits={
441
+ **qos_limits,
442
+ **{
443
+ "%s.com/gpu".lower()
444
+ % k8s_deco.attributes["gpu_vendor"]: str(k8s_deco.attributes["gpu"])
445
+ for k in [0]
446
+ # Don't set GPU limits if gpu isn't specified.
447
+ if k8s_deco.attributes["gpu"] is not None
448
+ },
449
+ },
438
450
  )
439
- if k8s_deco.attributes["gpu"] is not None:
440
- resources.update(
441
- dict(
442
- limits={
443
- "%s.com/gpu".lower()
444
- % k8s_deco.attributes["gpu_vendor"]: str(
445
- k8s_deco.attributes["gpu"]
446
- )
447
- }
448
- )
449
- )
450
451
 
451
452
  annotations = {
452
453
  "metaflow/production_token": self.production_token,
@@ -53,6 +53,7 @@ from metaflow.metaflow_config import (
53
53
  from metaflow.metaflow_config_funcs import config_values
54
54
  from metaflow.mflog import BASH_SAVE_LOGS, bash_capture_logs, export_mflog_env_vars
55
55
  from metaflow.parameters import deploy_time_eval
56
+ from metaflow.plugins.kubernetes.kube_utils import qos_requests_and_limits
56
57
  from metaflow.plugins.kubernetes.kubernetes import (
57
58
  parse_kube_keyvalue_list,
58
59
  validate_kube_labels,
@@ -522,7 +523,9 @@ class ArgoWorkflows(object):
522
523
  params = set(
523
524
  [param.name.lower() for var, param in self.flow._get_parameters()]
524
525
  )
525
- for event in self.flow._flow_decorators.get("trigger")[0].triggers:
526
+ trigger_deco = self.flow._flow_decorators.get("trigger")[0]
527
+ trigger_deco.format_deploytime_value()
528
+ for event in trigger_deco.triggers:
526
529
  parameters = {}
527
530
  # TODO: Add a check to guard against names starting with numerals(?)
528
531
  if not re.match(r"^[A-Za-z0-9_.-]+$", event["name"]):
@@ -562,9 +565,11 @@ class ArgoWorkflows(object):
562
565
 
563
566
  # @trigger_on_finish decorator
564
567
  if self.flow._flow_decorators.get("trigger_on_finish"):
565
- for event in self.flow._flow_decorators.get("trigger_on_finish")[
566
- 0
567
- ].triggers:
568
+ trigger_on_finish_deco = self.flow._flow_decorators.get(
569
+ "trigger_on_finish"
570
+ )[0]
571
+ trigger_on_finish_deco.format_deploytime_value()
572
+ for event in trigger_on_finish_deco.triggers:
568
573
  # Actual filters are deduced here since we don't have access to
569
574
  # the current object in the @trigger_on_finish decorator.
570
575
  triggers.append(
@@ -1838,6 +1843,13 @@ class ArgoWorkflows(object):
1838
1843
  if tmpfs_enabled and tmpfs_tempdir:
1839
1844
  env["METAFLOW_TEMPDIR"] = tmpfs_path
1840
1845
 
1846
+ qos_requests, qos_limits = qos_requests_and_limits(
1847
+ resources["qos"],
1848
+ resources["cpu"],
1849
+ resources["memory"],
1850
+ resources["disk"],
1851
+ )
1852
+
1841
1853
  # Create a ContainerTemplate for this node. Ideally, we would have
1842
1854
  # liked to inline this ContainerTemplate and avoid scanning the workflow
1843
1855
  # twice, but due to issues with variable substitution, we will have to
@@ -1901,6 +1913,7 @@ class ArgoWorkflows(object):
1901
1913
  persistent_volume_claims=resources["persistent_volume_claims"],
1902
1914
  shared_memory=shared_memory,
1903
1915
  port=port,
1916
+ qos=resources["qos"],
1904
1917
  )
1905
1918
 
1906
1919
  for k, v in env.items():
@@ -2086,17 +2099,17 @@ class ArgoWorkflows(object):
2086
2099
  image=resources["image"],
2087
2100
  image_pull_policy=resources["image_pull_policy"],
2088
2101
  resources=kubernetes_sdk.V1ResourceRequirements(
2089
- requests={
2090
- "cpu": str(resources["cpu"]),
2091
- "memory": "%sM" % str(resources["memory"]),
2092
- "ephemeral-storage": "%sM"
2093
- % str(resources["disk"]),
2094
- },
2102
+ requests=qos_requests,
2095
2103
  limits={
2096
- "%s.com/gpu".lower()
2097
- % resources["gpu_vendor"]: str(resources["gpu"])
2098
- for k in [0]
2099
- if resources["gpu"] is not None
2104
+ **qos_limits,
2105
+ **{
2106
+ "%s.com/gpu".lower()
2107
+ % resources["gpu_vendor"]: str(
2108
+ resources["gpu"]
2109
+ )
2110
+ for k in [0]
2111
+ if resources["gpu"] is not None
2112
+ },
2100
2113
  },
2101
2114
  ),
2102
2115
  # Configure secrets
@@ -2333,7 +2346,7 @@ class ArgoWorkflows(object):
2333
2346
  "memory": "500Mi",
2334
2347
  },
2335
2348
  ),
2336
- )
2349
+ ).to_dict()
2337
2350
  )
2338
2351
  ),
2339
2352
  Template("capture-error-hook-fn-preflight").steps(
@@ -2684,7 +2697,7 @@ class ArgoWorkflows(object):
2684
2697
  },
2685
2698
  ),
2686
2699
  )
2687
- )
2700
+ ).to_dict()
2688
2701
  )
2689
2702
  )
2690
2703
 
@@ -2854,7 +2867,7 @@ class ArgoWorkflows(object):
2854
2867
  "memory": "250Mi",
2855
2868
  },
2856
2869
  ),
2857
- )
2870
+ ).to_dict()
2858
2871
  )
2859
2872
  )
2860
2873
  .service_account_name(ARGO_EVENTS_SERVICE_ACCOUNT)
@@ -10,7 +10,7 @@ from metaflow.metaflow_config import KUBERNETES_NAMESPACE
10
10
  from metaflow.plugins.argo.argo_workflows import ArgoWorkflows
11
11
  from metaflow.runner.deployer import Deployer, DeployedFlow, TriggeredRun
12
12
 
13
- from metaflow.runner.utils import get_lower_level_group, handle_timeout
13
+ from metaflow.runner.utils import get_lower_level_group, handle_timeout, temporary_fifo
14
14
 
15
15
 
16
16
  def generate_fake_flow_file_contents(
@@ -341,18 +341,14 @@ class ArgoWorkflowsDeployedFlow(DeployedFlow):
341
341
  Exception
342
342
  If there is an error during the trigger process.
343
343
  """
344
- with tempfile.TemporaryDirectory() as temp_dir:
345
- tfp_runner_attribute = tempfile.NamedTemporaryFile(
346
- dir=temp_dir, delete=False
347
- )
348
-
344
+ with temporary_fifo() as (attribute_file_path, attribute_file_fd):
349
345
  # every subclass needs to have `self.deployer_kwargs`
350
346
  command = get_lower_level_group(
351
347
  self.deployer.api,
352
348
  self.deployer.top_level_kwargs,
353
349
  self.deployer.TYPE,
354
350
  self.deployer.deployer_kwargs,
355
- ).trigger(deployer_attribute_file=tfp_runner_attribute.name, **kwargs)
351
+ ).trigger(deployer_attribute_file=attribute_file_path, **kwargs)
356
352
 
357
353
  pid = self.deployer.spm.run_command(
358
354
  [sys.executable, *command],
@@ -363,7 +359,7 @@ class ArgoWorkflowsDeployedFlow(DeployedFlow):
363
359
 
364
360
  command_obj = self.deployer.spm.get(pid)
365
361
  content = handle_timeout(
366
- tfp_runner_attribute, command_obj, self.deployer.file_read_timeout
362
+ attribute_file_fd, command_obj, self.deployer.file_read_timeout
367
363
  )
368
364
 
369
365
  if command_obj.process.returncode == 0:
@@ -6,7 +6,7 @@ from typing import ClassVar, Optional, List
6
6
  from metaflow.plugins.aws.step_functions.step_functions import StepFunctions
7
7
  from metaflow.runner.deployer import DeployedFlow, TriggeredRun
8
8
 
9
- from metaflow.runner.utils import get_lower_level_group, handle_timeout
9
+ from metaflow.runner.utils import get_lower_level_group, handle_timeout, temporary_fifo
10
10
 
11
11
 
12
12
  class StepFunctionsTriggeredRun(TriggeredRun):
@@ -196,18 +196,14 @@ class StepFunctionsDeployedFlow(DeployedFlow):
196
196
  Exception
197
197
  If there is an error during the trigger process.
198
198
  """
199
- with tempfile.TemporaryDirectory() as temp_dir:
200
- tfp_runner_attribute = tempfile.NamedTemporaryFile(
201
- dir=temp_dir, delete=False
202
- )
203
-
199
+ with temporary_fifo() as (attribute_file_path, attribute_file_fd):
204
200
  # every subclass needs to have `self.deployer_kwargs`
205
201
  command = get_lower_level_group(
206
202
  self.deployer.api,
207
203
  self.deployer.top_level_kwargs,
208
204
  self.deployer.TYPE,
209
205
  self.deployer.deployer_kwargs,
210
- ).trigger(deployer_attribute_file=tfp_runner_attribute.name, **kwargs)
206
+ ).trigger(deployer_attribute_file=attribute_file_path, **kwargs)
211
207
 
212
208
  pid = self.deployer.spm.run_command(
213
209
  [sys.executable, *command],
@@ -218,7 +214,7 @@ class StepFunctionsDeployedFlow(DeployedFlow):
218
214
 
219
215
  command_obj = self.deployer.spm.get(pid)
220
216
  content = handle_timeout(
221
- tfp_runner_attribute, command_obj, self.deployer.file_read_timeout
217
+ attribute_file_fd, command_obj, self.deployer.file_read_timeout
222
218
  )
223
219
 
224
220
  if command_obj.process.returncode == 0:
@@ -600,7 +600,9 @@ class S3(object):
600
600
  # returned are Unicode.
601
601
  key = getattr(key_value, "key", key_value)
602
602
  if self._s3root is None:
603
- parsed = urlparse(to_unicode(key))
603
+ # NOTE: S3 allows fragments as part of object names, e.g. /dataset #1/data.txt
604
+ # Without allow_fragments=False the parsed.path for an object name with fragments is incomplete.
605
+ parsed = urlparse(to_unicode(key), allow_fragments=False)
604
606
  if parsed.scheme == "s3" and parsed.path:
605
607
  return key
606
608
  else:
@@ -765,7 +767,9 @@ class S3(object):
765
767
  """
766
768
 
767
769
  url = self._url(key)
768
- src = urlparse(url)
770
+ # NOTE: S3 allows fragments as part of object names, e.g. /dataset #1/data.txt
771
+ # Without allow_fragments=False the parsed src.path for an object name with fragments is incomplete.
772
+ src = urlparse(url, allow_fragments=False)
769
773
 
770
774
  def _info(s3, tmp):
771
775
  resp = s3.head_object(Bucket=src.netloc, Key=src.path.lstrip('/"'))
@@ -891,7 +895,9 @@ class S3(object):
891
895
  DOWNLOAD_MAX_CHUNK = 2 * 1024 * 1024 * 1024 - 1
892
896
 
893
897
  url, r = self._url_and_range(key)
894
- src = urlparse(url)
898
+ # NOTE: S3 allows fragments as part of object names, e.g. /dataset #1/data.txt
899
+ # Without allow_fragments=False the parsed src.path for an object name with fragments is incomplete.
900
+ src = urlparse(url, allow_fragments=False)
895
901
 
896
902
  def _download(s3, tmp):
897
903
  if r:
@@ -1173,7 +1179,9 @@ class S3(object):
1173
1179
  blob.close = lambda: None
1174
1180
 
1175
1181
  url = self._url(key)
1176
- src = urlparse(url)
1182
+ # NOTE: S3 allows fragments as part of object names, e.g. /dataset #1/data.txt
1183
+ # Without allow_fragments=False the parsed src.path for an object name with fragments is incomplete.
1184
+ src = urlparse(url, allow_fragments=False)
1177
1185
  extra_args = None
1178
1186
  if content_type or metadata or self._encryption:
1179
1187
  extra_args = {}