metaflow 2.18.12__py2.py3-none-any.whl → 2.19.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. metaflow/__init__.py +1 -0
  2. metaflow/cli.py +78 -13
  3. metaflow/cli_components/run_cmds.py +182 -39
  4. metaflow/cli_components/step_cmd.py +160 -4
  5. metaflow/client/__init__.py +1 -0
  6. metaflow/client/core.py +162 -99
  7. metaflow/client/filecache.py +59 -32
  8. metaflow/cmd/code/__init__.py +2 -1
  9. metaflow/datastore/__init__.py +1 -0
  10. metaflow/datastore/content_addressed_store.py +40 -9
  11. metaflow/datastore/datastore_set.py +10 -1
  12. metaflow/datastore/flow_datastore.py +123 -4
  13. metaflow/datastore/spin_datastore.py +91 -0
  14. metaflow/datastore/task_datastore.py +86 -2
  15. metaflow/decorators.py +75 -6
  16. metaflow/extension_support/__init__.py +372 -305
  17. metaflow/flowspec.py +3 -2
  18. metaflow/graph.py +2 -2
  19. metaflow/metaflow_config.py +41 -0
  20. metaflow/metaflow_profile.py +18 -0
  21. metaflow/packaging_sys/utils.py +2 -39
  22. metaflow/packaging_sys/v1.py +63 -16
  23. metaflow/plugins/__init__.py +2 -0
  24. metaflow/plugins/argo/argo_workflows.py +20 -25
  25. metaflow/plugins/argo/param_val.py +19 -0
  26. metaflow/plugins/cards/card_datastore.py +13 -13
  27. metaflow/plugins/cards/card_decorator.py +1 -0
  28. metaflow/plugins/cards/card_modules/basic.py +9 -3
  29. metaflow/plugins/datastores/local_storage.py +12 -6
  30. metaflow/plugins/datastores/spin_storage.py +12 -0
  31. metaflow/plugins/datatools/s3/s3.py +29 -10
  32. metaflow/plugins/datatools/s3/s3op.py +90 -62
  33. metaflow/plugins/metadata_providers/local.py +76 -82
  34. metaflow/plugins/metadata_providers/spin.py +16 -0
  35. metaflow/runner/click_api.py +4 -2
  36. metaflow/runner/metaflow_runner.py +210 -19
  37. metaflow/runtime.py +348 -21
  38. metaflow/task.py +61 -12
  39. metaflow/user_configs/config_parameters.py +2 -4
  40. metaflow/user_decorators/mutable_flow.py +1 -1
  41. metaflow/user_decorators/user_step_decorator.py +10 -1
  42. metaflow/util.py +191 -1
  43. metaflow/version.py +1 -1
  44. {metaflow-2.18.12.data → metaflow-2.19.0.data}/data/share/metaflow/devtools/Makefile +10 -0
  45. {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/METADATA +2 -4
  46. {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/RECORD +52 -48
  47. {metaflow-2.18.12.data → metaflow-2.19.0.data}/data/share/metaflow/devtools/Tiltfile +0 -0
  48. {metaflow-2.18.12.data → metaflow-2.19.0.data}/data/share/metaflow/devtools/pick_services.sh +0 -0
  49. {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/WHEEL +0 -0
  50. {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/entry_points.txt +0 -0
  51. {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/licenses/LICENSE +0 -0
  52. {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/top_level.txt +0 -0
@@ -18,6 +18,14 @@ from metaflow.tagging_util import MAX_USER_TAG_SET_SIZE, validate_tags
18
18
 
19
19
  class LocalMetadataProvider(MetadataProvider):
20
20
  TYPE = "local"
21
+ DATASTORE_DIR = DATASTORE_LOCAL_DIR # ".metaflow"
22
+
23
+ @classmethod
24
+ def _get_storage_class(cls):
25
+ # This method is meant to be overridden
26
+ from metaflow.plugins.datastores.local_storage import LocalStorage
27
+
28
+ return LocalStorage
21
29
 
22
30
  def __init__(self, environment, flow, event_logger, monitor):
23
31
  super(LocalMetadataProvider, self).__init__(
@@ -26,30 +34,28 @@ class LocalMetadataProvider(MetadataProvider):
26
34
 
27
35
  @classmethod
28
36
  def compute_info(cls, val):
29
- from metaflow.plugins.datastores.local_storage import LocalStorage
37
+ storage_class = cls._get_storage_class()
30
38
 
31
- v = os.path.realpath(os.path.join(val, DATASTORE_LOCAL_DIR))
39
+ v = os.path.realpath(os.path.join(val, cls.DATASTORE_DIR))
32
40
  if os.path.isdir(v):
33
- LocalStorage.datastore_root = v
41
+ storage_class.datastore_root = v
34
42
  return val
35
43
  raise ValueError(
36
- "Could not find directory %s in directory %s" % (DATASTORE_LOCAL_DIR, val)
44
+ "Could not find directory %s in directory %s" % (cls.DATASTORE_DIR, val)
37
45
  )
38
46
 
39
47
  @classmethod
40
48
  def default_info(cls):
41
- from metaflow.plugins.datastores.local_storage import LocalStorage
49
+ storage_class = cls._get_storage_class()
42
50
 
43
51
  def print_clean(line, **kwargs):
44
52
  print(line)
45
53
 
46
- v = LocalStorage.get_datastore_root_from_config(
54
+ v = storage_class.get_datastore_root_from_config(
47
55
  print_clean, create_on_absent=False
48
56
  )
49
57
  if v is None:
50
- return (
51
- "<No %s directory found in current working tree>" % DATASTORE_LOCAL_DIR
52
- )
58
+ return "<No %s directory found in current working tree>" % cls.DATASTORE_DIR
53
59
  return os.path.dirname(v)
54
60
 
55
61
  def version(self):
@@ -102,7 +108,7 @@ class LocalMetadataProvider(MetadataProvider):
102
108
  def register_data_artifacts(
103
109
  self, run_id, step_name, task_id, attempt_id, artifacts
104
110
  ):
105
- meta_dir = self._create_and_get_metadir(
111
+ meta_dir = self.__class__._create_and_get_metadir(
106
112
  self._flow_name, run_id, step_name, task_id
107
113
  )
108
114
  artlist = self._artifacts_to_json(
@@ -112,7 +118,7 @@ class LocalMetadataProvider(MetadataProvider):
112
118
  self._save_meta(meta_dir, artdict)
113
119
 
114
120
  def register_metadata(self, run_id, step_name, task_id, metadata):
115
- meta_dir = self._create_and_get_metadir(
121
+ meta_dir = self.__class__._create_and_get_metadir(
116
122
  self._flow_name, run_id, step_name, task_id
117
123
  )
118
124
  metalist = self._metadata_to_json(run_id, step_name, task_id, metadata)
@@ -132,9 +138,7 @@ class LocalMetadataProvider(MetadataProvider):
132
138
 
133
139
  def _optimistically_mutate():
134
140
  # get existing tags
135
- run = LocalMetadataProvider.get_object(
136
- "run", "self", {}, None, flow_id, run_id
137
- )
141
+ run = cls.get_object("run", "self", {}, None, flow_id, run_id)
138
142
  if not run:
139
143
  raise MetaflowTaggingError(
140
144
  msg="Run not found (%s, %s)" % (flow_id, run_id)
@@ -167,15 +171,13 @@ class LocalMetadataProvider(MetadataProvider):
167
171
  validate_tags(next_user_tags_set, existing_tags=existing_user_tag_set)
168
172
 
169
173
  # write new tag set to file system
170
- LocalMetadataProvider._persist_tags_for_run(
174
+ cls._persist_tags_for_run(
171
175
  flow_id, run_id, next_user_tags_set, existing_system_tag_set
172
176
  )
173
177
 
174
178
  # read tags back from file system to see if our optimism is misplaced
175
179
  # I.e. did a concurrent mutate overwrite our change
176
- run = LocalMetadataProvider.get_object(
177
- "run", "self", {}, None, flow_id, run_id
178
- )
180
+ run = cls.get_object("run", "self", {}, None, flow_id, run_id)
179
181
  if not run:
180
182
  raise MetaflowTaggingError(
181
183
  msg="Run not found for read-back check (%s, %s)" % (flow_id, run_id)
@@ -279,8 +281,6 @@ class LocalMetadataProvider(MetadataProvider):
279
281
  if obj_type not in ("root", "flow", "run", "step", "task", "artifact"):
280
282
  raise MetaflowInternalError(msg="Unexpected object type %s" % obj_type)
281
283
 
282
- from metaflow.plugins.datastores.local_storage import LocalStorage
283
-
284
284
  if obj_type == "artifact":
285
285
  # Artifacts are actually part of the tasks in the filesystem
286
286
  # E.g. we get here for (obj_type, sub_type) == (artifact, self)
@@ -307,13 +307,13 @@ class LocalMetadataProvider(MetadataProvider):
307
307
 
308
308
  # Special handling of self, artifact, and metadata
309
309
  if sub_type == "self":
310
- meta_path = LocalMetadataProvider._get_metadir(*args[:obj_order])
310
+ meta_path = cls._get_metadir(*args[:obj_order])
311
311
  if meta_path is None:
312
312
  return None
313
313
  self_file = os.path.join(meta_path, "_self.json")
314
314
  if os.path.isfile(self_file):
315
315
  obj = MetadataProvider._apply_filter(
316
- [LocalMetadataProvider._read_json_file(self_file)], filters
316
+ [cls._read_json_file(self_file)], filters
317
317
  )[0]
318
318
  # For non-descendants of a run, we are done
319
319
 
@@ -324,7 +324,7 @@ class LocalMetadataProvider(MetadataProvider):
324
324
  raise MetaflowInternalError(
325
325
  msg="Unexpected object type %s" % obj_type
326
326
  )
327
- run = LocalMetadataProvider.get_object(
327
+ run = cls.get_object(
328
328
  "run", "self", {}, None, *args[:RUN_ORDER] # *[flow_id, run_id]
329
329
  )
330
330
  if not run:
@@ -341,7 +341,7 @@ class LocalMetadataProvider(MetadataProvider):
341
341
  if obj_type not in ("root", "flow", "run", "step", "task"):
342
342
  raise MetaflowInternalError(msg="Unexpected object type %s" % obj_type)
343
343
 
344
- meta_path = LocalMetadataProvider._get_metadir(*args[:obj_order])
344
+ meta_path = cls._get_metadir(*args[:obj_order])
345
345
  result = []
346
346
  if meta_path is None:
347
347
  return result
@@ -352,9 +352,7 @@ class LocalMetadataProvider(MetadataProvider):
352
352
  attempts_done = sorted(glob.iglob(attempt_done_files))
353
353
  if attempts_done:
354
354
  successful_attempt = int(
355
- LocalMetadataProvider._read_json_file(attempts_done[-1])[
356
- "value"
357
- ]
355
+ cls._read_json_file(attempts_done[-1])["value"]
358
356
  )
359
357
  if successful_attempt is not None:
360
358
  which_artifact = "*"
@@ -365,10 +363,10 @@ class LocalMetadataProvider(MetadataProvider):
365
363
  "%d_artifact_%s.json" % (successful_attempt, which_artifact),
366
364
  )
367
365
  for obj in glob.iglob(artifact_files):
368
- result.append(LocalMetadataProvider._read_json_file(obj))
366
+ result.append(cls._read_json_file(obj))
369
367
 
370
368
  # We are getting artifacts. We should overlay with ancestral run's tags
371
- run = LocalMetadataProvider.get_object(
369
+ run = cls.get_object(
372
370
  "run", "self", {}, None, *args[:RUN_ORDER] # *[flow_id, run_id]
373
371
  )
374
372
  if not run:
@@ -388,12 +386,12 @@ class LocalMetadataProvider(MetadataProvider):
388
386
  if obj_type not in ("root", "flow", "run", "step", "task"):
389
387
  raise MetaflowInternalError(msg="Unexpected object type %s" % obj_type)
390
388
  result = []
391
- meta_path = LocalMetadataProvider._get_metadir(*args[:obj_order])
389
+ meta_path = cls._get_metadir(*args[:obj_order])
392
390
  if meta_path is None:
393
391
  return result
394
392
  files = os.path.join(meta_path, "sysmeta_*")
395
393
  for obj in glob.iglob(files):
396
- result.append(LocalMetadataProvider._read_json_file(obj))
394
+ result.append(cls._read_json_file(obj))
397
395
  return result
398
396
 
399
397
  # For the other types, we locate all the objects we need to find and return them
@@ -401,14 +399,13 @@ class LocalMetadataProvider(MetadataProvider):
401
399
  raise MetaflowInternalError(msg="Unexpected object type %s" % obj_type)
402
400
  if sub_type not in ("flow", "run", "step", "task"):
403
401
  raise MetaflowInternalError(msg="unexpected sub type %s" % sub_type)
404
- obj_path = LocalMetadataProvider._make_path(
405
- *args[:obj_order], create_on_absent=False
406
- )
402
+ obj_path = cls._make_path(*args[:obj_order], create_on_absent=False)
407
403
  result = []
408
404
  if obj_path is None:
409
405
  return result
410
406
  skip_dirs = "*/" * (sub_order - obj_order)
411
- all_meta = os.path.join(obj_path, skip_dirs, LocalStorage.METADATA_DIR)
407
+ storage_class = cls._get_storage_class()
408
+ all_meta = os.path.join(obj_path, skip_dirs, storage_class.METADATA_DIR)
412
409
  SelfInfo = collections.namedtuple("SelfInfo", ["filepath", "run_id"])
413
410
  self_infos = []
414
411
  for meta_path in glob.iglob(all_meta):
@@ -418,9 +415,7 @@ class LocalMetadataProvider(MetadataProvider):
418
415
  run_id = None
419
416
  # flow and run do not need info from ancestral run
420
417
  if sub_type in ("step", "task"):
421
- run_id = LocalMetadataProvider._deduce_run_id_from_meta_dir(
422
- meta_path, sub_type
423
- )
418
+ run_id = cls._deduce_run_id_from_meta_dir(meta_path, sub_type)
424
419
  # obj_type IS run, or more granular than run, let's do sanity check vs args
425
420
  if obj_order >= RUN_ORDER:
426
421
  if run_id != args[RUN_ORDER - 1]:
@@ -430,10 +425,10 @@ class LocalMetadataProvider(MetadataProvider):
430
425
  self_infos.append(SelfInfo(filepath=self_file, run_id=run_id))
431
426
 
432
427
  for self_info in self_infos:
433
- obj = LocalMetadataProvider._read_json_file(self_info.filepath)
428
+ obj = cls._read_json_file(self_info.filepath)
434
429
  if self_info.run_id:
435
430
  flow_id_from_args = args[0]
436
- run = LocalMetadataProvider.get_object(
431
+ run = cls.get_object(
437
432
  "run",
438
433
  "self",
439
434
  {},
@@ -452,8 +447,8 @@ class LocalMetadataProvider(MetadataProvider):
452
447
 
453
448
  return MetadataProvider._apply_filter(result, filters)
454
449
 
455
- @staticmethod
456
- def _deduce_run_id_from_meta_dir(meta_dir_path, sub_type):
450
+ @classmethod
451
+ def _deduce_run_id_from_meta_dir(cls, meta_dir_path, sub_type):
457
452
  curr_order = ObjectOrder.type_to_order(sub_type)
458
453
  levels_to_ascend = curr_order - ObjectOrder.type_to_order("run")
459
454
  if levels_to_ascend < 0:
@@ -468,8 +463,8 @@ class LocalMetadataProvider(MetadataProvider):
468
463
  )
469
464
  return run_id
470
465
 
471
- @staticmethod
472
- def _makedirs(path):
466
+ @classmethod
467
+ def _makedirs(cls, path):
473
468
  # this is for python2 compatibility.
474
469
  # Python3 has os.makedirs(exist_ok=True).
475
470
  try:
@@ -481,17 +476,15 @@ class LocalMetadataProvider(MetadataProvider):
481
476
  else:
482
477
  raise
483
478
 
484
- @staticmethod
485
- def _persist_tags_for_run(flow_id, run_id, tags, system_tags):
486
- subpath = LocalMetadataProvider._create_and_get_metadir(
487
- flow_name=flow_id, run_id=run_id
488
- )
479
+ @classmethod
480
+ def _persist_tags_for_run(cls, flow_id, run_id, tags, system_tags):
481
+ subpath = cls._create_and_get_metadir(flow_name=flow_id, run_id=run_id)
489
482
  selfname = os.path.join(subpath, "_self.json")
490
483
  if not os.path.isfile(selfname):
491
484
  raise MetaflowInternalError(
492
485
  msg="Could not verify Run existence on disk - missing %s" % selfname
493
486
  )
494
- LocalMetadataProvider._save_meta(
487
+ cls._save_meta(
495
488
  subpath,
496
489
  {
497
490
  "_self": MetadataProvider._run_to_json_static(
@@ -508,11 +501,11 @@ class LocalMetadataProvider(MetadataProvider):
508
501
  tags = set()
509
502
  if sys_tags is None:
510
503
  sys_tags = set()
511
- subpath = self._create_and_get_metadir(
504
+ subpath = self.__class__._create_and_get_metadir(
512
505
  self._flow_name, run_id, step_name, task_id
513
506
  )
514
507
  selfname = os.path.join(subpath, "_self.json")
515
- self._makedirs(subpath)
508
+ self.__class__._makedirs(subpath)
516
509
  if os.path.isfile(selfname):
517
510
  # There is a race here, but we are not aiming to make this as solid as
518
511
  # the metadata service. This is used primarily for concurrent resumes,
@@ -549,26 +542,31 @@ class LocalMetadataProvider(MetadataProvider):
549
542
  self._register_system_metadata(run_id, step_name, task_id, attempt)
550
543
  return to_return
551
544
 
552
- @staticmethod
545
+ @classmethod
553
546
  def _make_path(
554
- flow_name=None, run_id=None, step_name=None, task_id=None, create_on_absent=True
547
+ cls,
548
+ flow_name=None,
549
+ run_id=None,
550
+ step_name=None,
551
+ task_id=None,
552
+ create_on_absent=True,
555
553
  ):
556
554
 
557
- from metaflow.plugins.datastores.local_storage import LocalStorage
555
+ storage_class = cls._get_storage_class()
558
556
 
559
- if LocalStorage.datastore_root is None:
557
+ if storage_class.datastore_root is None:
560
558
 
561
559
  def print_clean(line, **kwargs):
562
560
  print(line)
563
561
 
564
- LocalStorage.datastore_root = LocalStorage.get_datastore_root_from_config(
562
+ storage_class.datastore_root = storage_class.get_datastore_root_from_config(
565
563
  print_clean, create_on_absent=create_on_absent
566
564
  )
567
- if LocalStorage.datastore_root is None:
565
+ if storage_class.datastore_root is None:
568
566
  return None
569
567
 
570
568
  if flow_name is None:
571
- return LocalStorage.datastore_root
569
+ return storage_class.datastore_root
572
570
  components = []
573
571
  if flow_name:
574
572
  components.append(flow_name)
@@ -578,37 +576,35 @@ class LocalMetadataProvider(MetadataProvider):
578
576
  components.append(step_name)
579
577
  if task_id:
580
578
  components.append(task_id)
581
- return LocalStorage().full_uri(LocalStorage.path_join(*components))
579
+ return storage_class().full_uri(storage_class.path_join(*components))
582
580
 
583
- @staticmethod
581
+ @classmethod
584
582
  def _create_and_get_metadir(
585
- flow_name=None, run_id=None, step_name=None, task_id=None
583
+ cls, flow_name=None, run_id=None, step_name=None, task_id=None
586
584
  ):
587
- from metaflow.plugins.datastores.local_storage import LocalStorage
585
+ storage_class = cls._get_storage_class()
588
586
 
589
- root_path = LocalMetadataProvider._make_path(
590
- flow_name, run_id, step_name, task_id
591
- )
592
- subpath = os.path.join(root_path, LocalStorage.METADATA_DIR)
593
- LocalMetadataProvider._makedirs(subpath)
587
+ root_path = cls._make_path(flow_name, run_id, step_name, task_id)
588
+ subpath = os.path.join(root_path, storage_class.METADATA_DIR)
589
+ cls._makedirs(subpath)
594
590
  return subpath
595
591
 
596
- @staticmethod
597
- def _get_metadir(flow_name=None, run_id=None, step_name=None, task_id=None):
598
- from metaflow.plugins.datastores.local_storage import LocalStorage
592
+ @classmethod
593
+ def _get_metadir(cls, flow_name=None, run_id=None, step_name=None, task_id=None):
594
+ storage_class = cls._get_storage_class()
599
595
 
600
- root_path = LocalMetadataProvider._make_path(
596
+ root_path = cls._make_path(
601
597
  flow_name, run_id, step_name, task_id, create_on_absent=False
602
598
  )
603
599
  if root_path is None:
604
600
  return None
605
- subpath = os.path.join(root_path, LocalStorage.METADATA_DIR)
601
+ subpath = os.path.join(root_path, storage_class.METADATA_DIR)
606
602
  if os.path.isdir(subpath):
607
603
  return subpath
608
604
  return None
609
605
 
610
- @staticmethod
611
- def _dump_json_to_file(filepath, data, allow_overwrite=False):
606
+ @classmethod
607
+ def _dump_json_to_file(cls, filepath, data, allow_overwrite=False):
612
608
  if os.path.isfile(filepath) and not allow_overwrite:
613
609
  return
614
610
  try:
@@ -622,15 +618,13 @@ class LocalMetadataProvider(MetadataProvider):
622
618
  if f and os.path.isfile(f.name):
623
619
  os.remove(f.name)
624
620
 
625
- @staticmethod
626
- def _read_json_file(filepath):
621
+ @classmethod
622
+ def _read_json_file(cls, filepath):
627
623
  with open(filepath, "r") as f:
628
624
  return json.load(f)
629
625
 
630
- @staticmethod
631
- def _save_meta(root_dir, metadict, allow_overwrite=False):
626
+ @classmethod
627
+ def _save_meta(cls, root_dir, metadict, allow_overwrite=False):
632
628
  for name, datum in metadict.items():
633
629
  filename = os.path.join(root_dir, "%s.json" % name)
634
- LocalMetadataProvider._dump_json_to_file(
635
- filename, datum, allow_overwrite=allow_overwrite
636
- )
630
+ cls._dump_json_to_file(filename, datum, allow_overwrite=allow_overwrite)
@@ -0,0 +1,16 @@
1
+ from metaflow.plugins.metadata_providers.local import LocalMetadataProvider
2
+ from metaflow.metaflow_config import DATASTORE_SPIN_LOCAL_DIR
3
+
4
+
5
+ class SpinMetadataProvider(LocalMetadataProvider):
6
+ TYPE = "spin"
7
+ DATASTORE_DIR = DATASTORE_SPIN_LOCAL_DIR # ".metaflow_spin"
8
+
9
+ @classmethod
10
+ def _get_storage_class(cls):
11
+ from metaflow.plugins.datastores.spin_storage import SpinStorage
12
+
13
+ return SpinStorage
14
+
15
+ def version(self):
16
+ return "spin"
@@ -351,8 +351,10 @@ class MetaflowAPI(object):
351
351
  class_dict = {
352
352
  "__module__": "metaflow",
353
353
  "_API_NAME": flow_file,
354
- "_internal_getattr": functools.partial(
355
- _lazy_load_command, cli_collection, "_compute_flow_parameters"
354
+ "_internal_getattr": staticmethod(
355
+ functools.partial(
356
+ _lazy_load_command, cli_collection, "_compute_flow_parameters"
357
+ )
356
358
  ),
357
359
  "__getattr__": getattr_wrapper,
358
360
  }