metaflow 2.11.15__py2.py3-none-any.whl → 2.12.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. metaflow/__init__.py +8 -0
  2. metaflow/_vendor/importlib_metadata/__init__.py +1063 -0
  3. metaflow/_vendor/importlib_metadata/_adapters.py +68 -0
  4. metaflow/_vendor/importlib_metadata/_collections.py +30 -0
  5. metaflow/_vendor/importlib_metadata/_compat.py +71 -0
  6. metaflow/_vendor/importlib_metadata/_functools.py +104 -0
  7. metaflow/_vendor/importlib_metadata/_itertools.py +73 -0
  8. metaflow/_vendor/importlib_metadata/_meta.py +48 -0
  9. metaflow/_vendor/importlib_metadata/_text.py +99 -0
  10. metaflow/_vendor/importlib_metadata/py.typed +0 -0
  11. metaflow/_vendor/typeguard/__init__.py +48 -0
  12. metaflow/_vendor/typeguard/_checkers.py +906 -0
  13. metaflow/_vendor/typeguard/_config.py +108 -0
  14. metaflow/_vendor/typeguard/_decorators.py +237 -0
  15. metaflow/_vendor/typeguard/_exceptions.py +42 -0
  16. metaflow/_vendor/typeguard/_functions.py +307 -0
  17. metaflow/_vendor/typeguard/_importhook.py +213 -0
  18. metaflow/_vendor/typeguard/_memo.py +48 -0
  19. metaflow/_vendor/typeguard/_pytest_plugin.py +100 -0
  20. metaflow/_vendor/typeguard/_suppression.py +88 -0
  21. metaflow/_vendor/typeguard/_transformer.py +1193 -0
  22. metaflow/_vendor/typeguard/_union_transformer.py +54 -0
  23. metaflow/_vendor/typeguard/_utils.py +169 -0
  24. metaflow/_vendor/typeguard/py.typed +0 -0
  25. metaflow/_vendor/typing_extensions.py +3053 -0
  26. metaflow/cli.py +48 -36
  27. metaflow/clone_util.py +6 -0
  28. metaflow/cmd/develop/stubs.py +2 -0
  29. metaflow/extension_support/__init__.py +2 -0
  30. metaflow/extension_support/plugins.py +2 -0
  31. metaflow/metaflow_config.py +24 -0
  32. metaflow/metaflow_environment.py +2 -2
  33. metaflow/parameters.py +1 -0
  34. metaflow/plugins/__init__.py +19 -0
  35. metaflow/plugins/airflow/airflow.py +7 -0
  36. metaflow/plugins/argo/argo_workflows.py +17 -0
  37. metaflow/plugins/aws/batch/batch_decorator.py +3 -3
  38. metaflow/plugins/azure/__init__.py +3 -0
  39. metaflow/plugins/azure/azure_credential.py +53 -0
  40. metaflow/plugins/azure/azure_exceptions.py +1 -1
  41. metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
  42. metaflow/plugins/azure/azure_utils.py +2 -35
  43. metaflow/plugins/azure/blob_service_client_factory.py +4 -2
  44. metaflow/plugins/datastores/azure_storage.py +6 -6
  45. metaflow/plugins/datatools/s3/s3.py +1 -1
  46. metaflow/plugins/gcp/__init__.py +1 -0
  47. metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +169 -0
  48. metaflow/plugins/gcp/gs_storage_client_factory.py +52 -1
  49. metaflow/plugins/kubernetes/kubernetes.py +85 -8
  50. metaflow/plugins/kubernetes/kubernetes_cli.py +24 -1
  51. metaflow/plugins/kubernetes/kubernetes_client.py +4 -1
  52. metaflow/plugins/kubernetes/kubernetes_decorator.py +49 -4
  53. metaflow/plugins/kubernetes/kubernetes_job.py +208 -206
  54. metaflow/plugins/kubernetes/kubernetes_jobsets.py +784 -0
  55. metaflow/plugins/timeout_decorator.py +2 -1
  56. metaflow/runner/__init__.py +0 -0
  57. metaflow/runner/click_api.py +406 -0
  58. metaflow/runner/metaflow_runner.py +452 -0
  59. metaflow/runner/nbrun.py +246 -0
  60. metaflow/runner/subprocess_manager.py +552 -0
  61. metaflow/task.py +1 -12
  62. metaflow/tuple_util.py +27 -0
  63. metaflow/util.py +0 -15
  64. metaflow/vendor.py +0 -1
  65. metaflow/version.py +1 -1
  66. {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/METADATA +2 -2
  67. {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/RECORD +72 -39
  68. metaflow/_vendor/v3_7/__init__.py +0 -1
  69. /metaflow/_vendor/{v3_7/zipp.py → zipp.py} +0 -0
  70. {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/LICENSE +0 -0
  71. {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/WHEEL +0 -0
  72. {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/entry_points.txt +0 -0
  73. {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,552 @@
1
+ import asyncio
2
+ import os
3
+ import shutil
4
+ import signal
5
+ import subprocess
6
+ import sys
7
+ import tempfile
8
+ import threading
9
+ import time
10
+ from typing import Callable, Dict, Iterator, List, Optional, Tuple
11
+
12
+
13
+ def kill_process_and_descendants(pid, termination_timeout):
14
+ try:
15
+ subprocess.check_call(["pkill", "-TERM", "-P", str(pid)])
16
+ except subprocess.CalledProcessError:
17
+ pass
18
+
19
+ time.sleep(termination_timeout)
20
+
21
+ try:
22
+ subprocess.check_call(["pkill", "-KILL", "-P", str(pid)])
23
+ except subprocess.CalledProcessError:
24
+ pass
25
+
26
+
27
+ class LogReadTimeoutError(Exception):
28
+ """Exception raised when reading logs times out."""
29
+
30
+
31
+ class SubprocessManager(object):
32
+ """
33
+ A manager for subprocesses. The subprocess manager manages one or more
34
+ CommandManager objects, each of which manages an individual subprocess.
35
+ """
36
+
37
+ def __init__(self):
38
+ self.commands: Dict[int, CommandManager] = {}
39
+
40
+ async def __aenter__(self) -> "SubprocessManager":
41
+ return self
42
+
43
+ async def __aexit__(self, exc_type, exc_value, traceback):
44
+ self.cleanup()
45
+
46
+ def run_command(
47
+ self,
48
+ command: List[str],
49
+ env: Optional[Dict[str, str]] = None,
50
+ cwd: Optional[str] = None,
51
+ show_output: bool = False,
52
+ ) -> int:
53
+ """
54
+ Run a command synchronously and return its process ID.
55
+
56
+ Parameters
57
+ ----------
58
+ command : List[str]
59
+ The command to run in List form.
60
+ env : Optional[Dict[str, str]], default None
61
+ Environment variables to set for the subprocess; if not specified,
62
+ the current enviornment variables are used.
63
+ cwd : Optional[str], default None
64
+ The directory to run the subprocess in; if not specified, the current
65
+ directory is used.
66
+ show_output : bool, default False
67
+ Suppress the 'stdout' and 'stderr' to the console by default.
68
+ They can be accessed later by reading the files present in the
69
+ CommandManager object:
70
+ - command_obj.log_files["stdout"]
71
+ - command_obj.log_files["stderr"]
72
+ Returns
73
+ -------
74
+ int
75
+ The process ID of the subprocess.
76
+ """
77
+
78
+ command_obj = CommandManager(command, env, cwd)
79
+ pid = command_obj.run(show_output=show_output)
80
+ self.commands[pid] = command_obj
81
+ return pid
82
+
83
+ async def async_run_command(
84
+ self,
85
+ command: List[str],
86
+ env: Optional[Dict[str, str]] = None,
87
+ cwd: Optional[str] = None,
88
+ ) -> int:
89
+ """
90
+ Run a command asynchronously and return its process ID.
91
+
92
+ Parameters
93
+ ----------
94
+ command : List[str]
95
+ The command to run in List form.
96
+ env : Optional[Dict[str, str]], default None
97
+ Environment variables to set for the subprocess; if not specified,
98
+ the current enviornment variables are used.
99
+ cwd : Optional[str], default None
100
+ The directory to run the subprocess in; if not specified, the current
101
+ directory is used.
102
+
103
+ Returns
104
+ -------
105
+ int
106
+ The process ID of the subprocess.
107
+ """
108
+
109
+ command_obj = CommandManager(command, env, cwd)
110
+ pid = await command_obj.async_run()
111
+ self.commands[pid] = command_obj
112
+ return pid
113
+
114
+ def get(self, pid: int) -> Optional["CommandManager"]:
115
+ """
116
+ Get one of the CommandManager managed by this SubprocessManager.
117
+
118
+ Parameters
119
+ ----------
120
+ pid : int
121
+ The process ID of the subprocess (returned by run_command or async_run_command).
122
+
123
+ Returns
124
+ -------
125
+ Optional[CommandManager]
126
+ The CommandManager object for the given process ID, or None if not found.
127
+ """
128
+ return self.commands.get(pid, None)
129
+
130
+ def cleanup(self) -> None:
131
+ """Clean up log files for all running subprocesses."""
132
+
133
+ for v in self.commands.values():
134
+ v.cleanup()
135
+
136
+
137
+ class CommandManager(object):
138
+ """A manager for an individual subprocess."""
139
+
140
+ def __init__(
141
+ self,
142
+ command: List[str],
143
+ env: Optional[Dict[str, str]] = None,
144
+ cwd: Optional[str] = None,
145
+ ):
146
+ """
147
+ Create a new CommandManager object.
148
+ This does not run the process itself but sets it up.
149
+
150
+ Parameters
151
+ ----------
152
+ command : List[str]
153
+ The command to run in List form.
154
+ env : Optional[Dict[str, str]], default None
155
+ Environment variables to set for the subprocess; if not specified,
156
+ the current enviornment variables are used.
157
+ cwd : Optional[str], default None
158
+ The directory to run the subprocess in; if not specified, the current
159
+ directory is used.
160
+ """
161
+ self.command = command
162
+
163
+ self.env = env if env is not None else os.environ.copy()
164
+ self.cwd = cwd if cwd is not None else os.getcwd()
165
+
166
+ self.process = None
167
+ self.run_called: bool = False
168
+ self.log_files: Dict[str, str] = {}
169
+
170
+ signal.signal(signal.SIGINT, self._handle_sigint)
171
+
172
+ async def __aenter__(self) -> "CommandManager":
173
+ return self
174
+
175
+ async def __aexit__(self, exc_type, exc_value, traceback):
176
+ self.cleanup()
177
+
178
+ async def wait(
179
+ self, timeout: Optional[float] = None, stream: Optional[str] = None
180
+ ) -> None:
181
+ """
182
+ Wait for the subprocess to finish, optionally with a timeout
183
+ and optionally streaming its output.
184
+
185
+ You can only call `wait` if `async_run` has already been called.
186
+
187
+ Parameters
188
+ ----------
189
+ timeout : Optional[float], default None
190
+ The maximum time to wait for the subprocess to finish.
191
+ If the timeout is reached, the subprocess is killed.
192
+ stream : Optional[str], default None
193
+ If specified, the specified stream is printed to stdout. `stream` can
194
+ be one of `stdout` or `stderr`.
195
+ """
196
+
197
+ if not self.run_called:
198
+ raise RuntimeError("No command run yet to wait for...")
199
+
200
+ if timeout is None:
201
+ if stream is None:
202
+ await self.process.wait()
203
+ else:
204
+ await self.emit_logs(stream)
205
+ else:
206
+ try:
207
+ if stream is None:
208
+ await asyncio.wait_for(self.process.wait(), timeout)
209
+ else:
210
+ await asyncio.wait_for(self.emit_logs(stream), timeout)
211
+ except asyncio.TimeoutError:
212
+ command_string = " ".join(self.command)
213
+ await self.kill()
214
+ print(
215
+ "Timeout: The process (PID %d; command: '%s') did not complete "
216
+ "within %s seconds." % (self.process.pid, command_string, timeout)
217
+ )
218
+
219
+ def run(self, show_output: bool = False):
220
+ """
221
+ Run the subprocess synchronously. This can only be called once.
222
+
223
+ This also waits on the process implicitly.
224
+
225
+ Parameters
226
+ ----------
227
+ show_output : bool, default False
228
+ Suppress the 'stdout' and 'stderr' to the console by default.
229
+ They can be accessed later by reading the files present in:
230
+ - self.log_files["stdout"]
231
+ - self.log_files["stderr"]
232
+ """
233
+
234
+ if not self.run_called:
235
+ self.temp_dir = tempfile.mkdtemp()
236
+ stdout_logfile = os.path.join(self.temp_dir, "stdout.log")
237
+ stderr_logfile = os.path.join(self.temp_dir, "stderr.log")
238
+
239
+ def stream_to_stdout_and_file(pipe, log_file):
240
+ with open(log_file, "w") as file:
241
+ for line in iter(pipe.readline, ""):
242
+ if show_output:
243
+ sys.stdout.write(line)
244
+ file.write(line)
245
+ pipe.close()
246
+
247
+ try:
248
+ self.process = subprocess.Popen(
249
+ self.command,
250
+ cwd=self.cwd,
251
+ env=self.env,
252
+ stdout=subprocess.PIPE,
253
+ stderr=subprocess.PIPE,
254
+ bufsize=1,
255
+ universal_newlines=True,
256
+ )
257
+
258
+ self.log_files["stdout"] = stdout_logfile
259
+ self.log_files["stderr"] = stderr_logfile
260
+
261
+ self.run_called = True
262
+
263
+ stdout_thread = threading.Thread(
264
+ target=stream_to_stdout_and_file,
265
+ args=(self.process.stdout, stdout_logfile),
266
+ )
267
+ stderr_thread = threading.Thread(
268
+ target=stream_to_stdout_and_file,
269
+ args=(self.process.stderr, stderr_logfile),
270
+ )
271
+
272
+ stdout_thread.start()
273
+ stderr_thread.start()
274
+
275
+ self.process.wait()
276
+
277
+ stdout_thread.join()
278
+ stderr_thread.join()
279
+
280
+ return self.process.pid
281
+ except Exception as e:
282
+ print("Error starting subprocess: %s" % e)
283
+ self.cleanup()
284
+ else:
285
+ command_string = " ".join(self.command)
286
+ print(
287
+ "Command '%s' has already been called. Please create another "
288
+ "CommandManager object." % command_string
289
+ )
290
+
291
+ async def async_run(self):
292
+ """
293
+ Run the subprocess asynchronously. This can only be called once.
294
+
295
+ Once this is called, you can then wait on the process (using `wait`), stream
296
+ logs (using `stream_logs`) or kill it (using `kill`).
297
+ """
298
+
299
+ if not self.run_called:
300
+ self.temp_dir = tempfile.mkdtemp()
301
+ stdout_logfile = os.path.join(self.temp_dir, "stdout.log")
302
+ stderr_logfile = os.path.join(self.temp_dir, "stderr.log")
303
+
304
+ try:
305
+ # returns when process has been started,
306
+ # not when it is finished...
307
+ self.process = await asyncio.create_subprocess_exec(
308
+ *self.command,
309
+ cwd=self.cwd,
310
+ env=self.env,
311
+ stdout=open(stdout_logfile, "w", encoding="utf-8"),
312
+ stderr=open(stderr_logfile, "w", encoding="utf-8"),
313
+ )
314
+
315
+ self.log_files["stdout"] = stdout_logfile
316
+ self.log_files["stderr"] = stderr_logfile
317
+
318
+ self.run_called = True
319
+ return self.process.pid
320
+ except Exception as e:
321
+ print("Error starting subprocess: %s" % e)
322
+ self.cleanup()
323
+ else:
324
+ command_string = " ".join(self.command)
325
+ print(
326
+ "Command '%s' has already been called. Please create another "
327
+ "CommandManager object." % command_string
328
+ )
329
+
330
+ async def stream_log(
331
+ self,
332
+ stream: str,
333
+ position: Optional[int] = None,
334
+ timeout_per_line: Optional[float] = None,
335
+ log_write_delay: float = 0.01,
336
+ ) -> Iterator[Tuple[int, str]]:
337
+ """
338
+ Stream logs from the subprocess line by line.
339
+
340
+ Parameters
341
+ ----------
342
+ stream : str
343
+ The stream to stream logs from. Can be one of "stdout" or "stderr".
344
+ position : Optional[int], default None
345
+ The position in the log file to start streaming from. If None, it starts
346
+ from the beginning of the log file. This allows resuming streaming from
347
+ a previously known position
348
+ timeout_per_line : Optional[float], default None
349
+ The time to wait for a line to be read from the log file. If None, it
350
+ waits indefinitely. If the timeout is reached, a LogReadTimeoutError
351
+ is raised. Note that this timeout is *per line* and not cumulative so this
352
+ function may take significantly more time than `timeout_per_line`
353
+ log_write_delay : float, default 0.01
354
+ Improves the probability of getting whole lines. This setting is for
355
+ advanced use cases.
356
+
357
+ Yields
358
+ ------
359
+ Tuple[int, str]
360
+ A tuple containing the position in the log file and the line read. The
361
+ position returned can be used to feed into another `stream_logs` call
362
+ for example.
363
+ """
364
+
365
+ if not self.run_called:
366
+ raise RuntimeError("No command run yet to get the logs for...")
367
+
368
+ if stream not in self.log_files:
369
+ raise ValueError(
370
+ "No log file found for '%s', valid values are: %s"
371
+ % (stream, ", ".join(self.log_files.keys()))
372
+ )
373
+
374
+ log_file = self.log_files[stream]
375
+
376
+ with open(log_file, mode="r", encoding="utf-8") as f:
377
+ if position is not None:
378
+ f.seek(position)
379
+
380
+ while True:
381
+ # wait for a small time for complete lines to be written to the file
382
+ # else, there's a possibility that a line may not be completely
383
+ # written when attempting to read it.
384
+ # This is not a problem, but improves readability.
385
+ await asyncio.sleep(log_write_delay)
386
+
387
+ try:
388
+ if timeout_per_line is None:
389
+ line = f.readline()
390
+ else:
391
+ line = await asyncio.wait_for(f.readline(), timeout_per_line)
392
+ except asyncio.TimeoutError as e:
393
+ raise LogReadTimeoutError(
394
+ "Timeout while reading a line from the log file for the "
395
+ "stream: %s" % stream
396
+ ) from e
397
+
398
+ # when we encounter an empty line
399
+ if not line:
400
+ # either the process has terminated, in which case we want to break
401
+ # and stop the reading process of the log file since no more logs
402
+ # will be written to it
403
+ if self.process.returncode is not None:
404
+ break
405
+ # or the process is still running and more logs could be written to
406
+ # the file, in which case we continue reading the log file
407
+ else:
408
+ continue
409
+
410
+ position = f.tell()
411
+ yield position, line.rstrip()
412
+
413
+ async def emit_logs(
414
+ self, stream: str = "stdout", custom_logger: Callable[..., None] = print
415
+ ):
416
+ """
417
+ Helper function that can easily emit all the logs for a given stream.
418
+
419
+ This function will only terminate when all the log has been printed.
420
+
421
+ Parameters
422
+ ----------
423
+ stream : str, default "stdout"
424
+ The stream to emit logs for. Can be one of "stdout" or "stderr".
425
+ custom_logger : Callable[..., None], default print
426
+ A custom logger function that takes in a string and "emits" it. By default,
427
+ the log is printed to stdout.
428
+ """
429
+
430
+ async for _, line in self.stream_log(stream):
431
+ custom_logger(line)
432
+
433
+ def cleanup(self):
434
+ """Clean up log files for a running subprocesses."""
435
+
436
+ if self.run_called:
437
+ shutil.rmtree(self.temp_dir, ignore_errors=True)
438
+
439
+ async def kill(self, termination_timeout: float = 1):
440
+ """
441
+ Kill the subprocess and its descendants.
442
+
443
+ Parameters
444
+ ----------
445
+ termination_timeout : float, default 1
446
+ The time to wait after sending a SIGTERM to the process and its descendants
447
+ before sending a SIGKILL.
448
+ """
449
+
450
+ if self.process is not None:
451
+ kill_process_and_descendants(self.process.pid, termination_timeout)
452
+ else:
453
+ print("No process to kill.")
454
+
455
+ def _handle_sigint(self, signum, frame):
456
+ asyncio.create_task(self.kill())
457
+
458
+
459
+ async def main():
460
+ flow_file = "../try.py"
461
+ from metaflow.cli import start
462
+ from metaflow.runner.click_api import MetaflowAPI
463
+
464
+ api = MetaflowAPI.from_cli(flow_file, start)
465
+ command = api().run(alpha=5)
466
+ cmd = [sys.executable, *command]
467
+
468
+ async with SubprocessManager() as spm:
469
+ # returns immediately
470
+ pid = await spm.async_run_command(cmd)
471
+ command_obj = spm.get(pid)
472
+
473
+ print(pid)
474
+
475
+ # this is None since the process has not completed yet
476
+ print(command_obj.process.returncode)
477
+
478
+ # wait / do some other processing while the process runs in background.
479
+ # if the process finishes before this sleep period, the calls to `wait`
480
+ # below are instantaneous since it has already ended..
481
+ # time.sleep(10)
482
+
483
+ # wait for process to finish
484
+ await command_obj.wait()
485
+
486
+ # wait for process to finish with a timeout, kill if timeout expires before completion
487
+ await command_obj.wait(timeout=2)
488
+
489
+ # wait for process to finish while streaming logs
490
+ await command_obj.wait(stream="stdout")
491
+
492
+ # wait for process to finish with a timeout while streaming logs
493
+ await command_obj.wait(stream="stdout", timeout=3)
494
+
495
+ # stream logs line by line and check for existence of a string, noting down the position
496
+ interesting_position = 0
497
+ async for position, line in command_obj.stream_log(stream="stdout"):
498
+ print(line)
499
+ if "alpha is" in line:
500
+ interesting_position = position
501
+ break
502
+
503
+ print("ended streaming at: %s" % interesting_position)
504
+
505
+ # wait / do some other processing while the process runs in background
506
+ # if the process finishes before this sleep period, the streaming of logs
507
+ # below are instantaneous since it has already ended..
508
+ # time.sleep(10)
509
+
510
+ # this blocks till the process completes unless we uncomment the `time.sleep` above..
511
+ print(
512
+ "resuming streaming from: %s while process is still running..."
513
+ % interesting_position
514
+ )
515
+ async for position, line in command_obj.stream_log(
516
+ stream="stdout", position=interesting_position
517
+ ):
518
+ print(line)
519
+
520
+ # this will be instantaneous since the process has finished and we just read from the log file
521
+ print("process has ended by now... streaming again from scratch..")
522
+ async for position, line in command_obj.stream_log(stream="stdout"):
523
+ print(line)
524
+
525
+ # this will be instantaneous since the process has finished and we just read from the log file
526
+ print(
527
+ "process has ended by now... streaming again but from position of choice.."
528
+ )
529
+ async for position, line in command_obj.stream_log(
530
+ stream="stdout", position=interesting_position
531
+ ):
532
+ print(line)
533
+
534
+ # two parallel streams for stdout
535
+ tasks = [
536
+ command_obj.emit_logs(
537
+ stream="stdout", custom_logger=lambda x: print("[STREAM A]: %s" % x)
538
+ ),
539
+ # this can be another 'command_obj' too, in which case
540
+ # we stream logs from 2 different subprocesses in parallel :)
541
+ command_obj.emit_logs(
542
+ stream="stdout", custom_logger=lambda x: print("[STREAM B]: %s" % x)
543
+ ),
544
+ ]
545
+ await asyncio.gather(*tasks)
546
+
547
+ # get the location of log files..
548
+ print(command_obj.log_files)
549
+
550
+
551
+ if __name__ == "__main__":
552
+ asyncio.run(main())
metaflow/task.py CHANGED
@@ -23,18 +23,7 @@ from .util import all_equal, get_username, resolve_identity, unicode_type
23
23
  from .clone_util import clone_task_helper
24
24
  from .metaflow_current import current
25
25
  from metaflow.tracing import get_trace_id
26
- from metaflow.util import namedtuple_with_defaults
27
-
28
- foreach_frame_field_list = [
29
- ("step", str),
30
- ("var", str),
31
- ("num_splits", int),
32
- ("index", int),
33
- ("value", str),
34
- ]
35
- ForeachFrame = namedtuple_with_defaults(
36
- "ForeachFrame", foreach_frame_field_list, (None,) * (len(foreach_frame_field_list))
37
- )
26
+ from metaflow.tuple_util import ForeachFrame
38
27
 
39
28
  # Maximum number of characters of the foreach path that we store in the metadata.
40
29
  MAX_FOREACH_PATH_LENGTH = 256
metaflow/tuple_util.py ADDED
@@ -0,0 +1,27 @@
1
+ # Keep this file minimum dependency as this will be imported by metaflow at bootup.
2
+ def namedtuple_with_defaults(typename, field_descr, defaults=()):
3
+ from typing import NamedTuple
4
+
5
+ T = NamedTuple(typename, field_descr)
6
+ T.__new__.__defaults__ = tuple(defaults)
7
+
8
+ # Adding the following to ensure the named tuple can be (un)pickled correctly.
9
+ import __main__
10
+
11
+ setattr(__main__, T.__name__, T)
12
+ T.__module__ = "__main__"
13
+ return T
14
+
15
+
16
+ # Define the namedtuple with default here if they need to be accessible in client
17
+ # (and w/o a real flow).
18
+ foreach_frame_field_list = [
19
+ ("step", str),
20
+ ("var", str),
21
+ ("num_splits", int),
22
+ ("index", int),
23
+ ("value", str),
24
+ ]
25
+ ForeachFrame = namedtuple_with_defaults(
26
+ "ForeachFrame", foreach_frame_field_list, (None,) * (len(foreach_frame_field_list))
27
+ )
metaflow/util.py CHANGED
@@ -51,21 +51,6 @@ except NameError:
51
51
  from shlex import quote as _quote
52
52
 
53
53
 
54
- from typing import NamedTuple
55
-
56
-
57
- def namedtuple_with_defaults(typename, field_descr, defaults=()):
58
- T = NamedTuple(typename, field_descr)
59
- T.__new__.__defaults__ = tuple(defaults)
60
-
61
- # Adding the following to ensure the named tuple can be (un)pickled correctly.
62
- import __main__
63
-
64
- setattr(__main__, T.__name__, T)
65
- T.__module__ = "__main__"
66
- return T
67
-
68
-
69
54
  class TempDir(object):
70
55
  # Provide a temporary directory since Python 2.7 does not have it inbuilt
71
56
  def __enter__(self):
metaflow/vendor.py CHANGED
@@ -13,7 +13,6 @@ WHITELIST = {
13
13
  "vendor_any.txt",
14
14
  "vendor_v3_5.txt",
15
15
  "vendor_v3_6.txt",
16
- "vendor_v3_7.txt",
17
16
  "pip.LICENSE",
18
17
  }
19
18
 
metaflow/version.py CHANGED
@@ -1 +1 @@
1
- metaflow_version = "2.11.15"
1
+ metaflow_version = "2.12.0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: metaflow
3
- Version: 2.11.15
3
+ Version: 2.12.0
4
4
  Summary: Metaflow: More Data Science, Less Engineering
5
5
  Author: Metaflow Developers
6
6
  Author-email: help@metaflow.org
@@ -26,7 +26,7 @@ License-File: LICENSE
26
26
  Requires-Dist: requests
27
27
  Requires-Dist: boto3
28
28
  Provides-Extra: stubs
29
- Requires-Dist: metaflow-stubs ==2.11.15 ; extra == 'stubs'
29
+ Requires-Dist: metaflow-stubs ==2.12.0 ; extra == 'stubs'
30
30
 
31
31
  ![Metaflow_Logo_Horizontal_FullColor_Ribbon_Dark_RGB](https://user-images.githubusercontent.com/763451/89453116-96a57e00-d713-11ea-9fa6-82b29d4d6eff.png)
32
32