ob-metaflow 2.15.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. metaflow/__init__.py +10 -3
  2. metaflow/_vendor/imghdr/__init__.py +186 -0
  3. metaflow/_vendor/yaml/__init__.py +427 -0
  4. metaflow/_vendor/yaml/composer.py +139 -0
  5. metaflow/_vendor/yaml/constructor.py +748 -0
  6. metaflow/_vendor/yaml/cyaml.py +101 -0
  7. metaflow/_vendor/yaml/dumper.py +62 -0
  8. metaflow/_vendor/yaml/emitter.py +1137 -0
  9. metaflow/_vendor/yaml/error.py +75 -0
  10. metaflow/_vendor/yaml/events.py +86 -0
  11. metaflow/_vendor/yaml/loader.py +63 -0
  12. metaflow/_vendor/yaml/nodes.py +49 -0
  13. metaflow/_vendor/yaml/parser.py +589 -0
  14. metaflow/_vendor/yaml/reader.py +185 -0
  15. metaflow/_vendor/yaml/representer.py +389 -0
  16. metaflow/_vendor/yaml/resolver.py +227 -0
  17. metaflow/_vendor/yaml/scanner.py +1435 -0
  18. metaflow/_vendor/yaml/serializer.py +111 -0
  19. metaflow/_vendor/yaml/tokens.py +104 -0
  20. metaflow/cards.py +4 -0
  21. metaflow/cli.py +125 -21
  22. metaflow/cli_components/init_cmd.py +1 -0
  23. metaflow/cli_components/run_cmds.py +204 -40
  24. metaflow/cli_components/step_cmd.py +160 -4
  25. metaflow/client/__init__.py +1 -0
  26. metaflow/client/core.py +198 -130
  27. metaflow/client/filecache.py +59 -32
  28. metaflow/cmd/code/__init__.py +2 -1
  29. metaflow/cmd/develop/stub_generator.py +49 -18
  30. metaflow/cmd/develop/stubs.py +9 -27
  31. metaflow/cmd/make_wrapper.py +30 -0
  32. metaflow/datastore/__init__.py +1 -0
  33. metaflow/datastore/content_addressed_store.py +40 -9
  34. metaflow/datastore/datastore_set.py +10 -1
  35. metaflow/datastore/flow_datastore.py +124 -4
  36. metaflow/datastore/spin_datastore.py +91 -0
  37. metaflow/datastore/task_datastore.py +92 -6
  38. metaflow/debug.py +5 -0
  39. metaflow/decorators.py +331 -82
  40. metaflow/extension_support/__init__.py +414 -356
  41. metaflow/extension_support/_empty_file.py +2 -2
  42. metaflow/flowspec.py +322 -82
  43. metaflow/graph.py +178 -15
  44. metaflow/includefile.py +25 -3
  45. metaflow/lint.py +94 -3
  46. metaflow/meta_files.py +13 -0
  47. metaflow/metadata_provider/metadata.py +13 -2
  48. metaflow/metaflow_config.py +66 -4
  49. metaflow/metaflow_environment.py +91 -25
  50. metaflow/metaflow_profile.py +18 -0
  51. metaflow/metaflow_version.py +16 -1
  52. metaflow/package/__init__.py +673 -0
  53. metaflow/packaging_sys/__init__.py +880 -0
  54. metaflow/packaging_sys/backend.py +128 -0
  55. metaflow/packaging_sys/distribution_support.py +153 -0
  56. metaflow/packaging_sys/tar_backend.py +99 -0
  57. metaflow/packaging_sys/utils.py +54 -0
  58. metaflow/packaging_sys/v1.py +527 -0
  59. metaflow/parameters.py +6 -2
  60. metaflow/plugins/__init__.py +6 -0
  61. metaflow/plugins/airflow/airflow.py +11 -1
  62. metaflow/plugins/airflow/airflow_cli.py +16 -5
  63. metaflow/plugins/argo/argo_client.py +42 -20
  64. metaflow/plugins/argo/argo_events.py +6 -6
  65. metaflow/plugins/argo/argo_workflows.py +1023 -344
  66. metaflow/plugins/argo/argo_workflows_cli.py +396 -94
  67. metaflow/plugins/argo/argo_workflows_decorator.py +9 -0
  68. metaflow/plugins/argo/argo_workflows_deployer_objects.py +75 -49
  69. metaflow/plugins/argo/capture_error.py +5 -2
  70. metaflow/plugins/argo/conditional_input_paths.py +35 -0
  71. metaflow/plugins/argo/exit_hooks.py +209 -0
  72. metaflow/plugins/argo/param_val.py +19 -0
  73. metaflow/plugins/aws/aws_client.py +6 -0
  74. metaflow/plugins/aws/aws_utils.py +33 -1
  75. metaflow/plugins/aws/batch/batch.py +72 -5
  76. metaflow/plugins/aws/batch/batch_cli.py +24 -3
  77. metaflow/plugins/aws/batch/batch_decorator.py +57 -6
  78. metaflow/plugins/aws/step_functions/step_functions.py +28 -3
  79. metaflow/plugins/aws/step_functions/step_functions_cli.py +49 -4
  80. metaflow/plugins/aws/step_functions/step_functions_deployer.py +3 -0
  81. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +30 -0
  82. metaflow/plugins/cards/card_cli.py +20 -1
  83. metaflow/plugins/cards/card_creator.py +24 -1
  84. metaflow/plugins/cards/card_datastore.py +21 -49
  85. metaflow/plugins/cards/card_decorator.py +58 -6
  86. metaflow/plugins/cards/card_modules/basic.py +38 -9
  87. metaflow/plugins/cards/card_modules/bundle.css +1 -1
  88. metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
  89. metaflow/plugins/cards/card_modules/components.py +592 -3
  90. metaflow/plugins/cards/card_modules/convert_to_native_type.py +34 -5
  91. metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
  92. metaflow/plugins/cards/card_modules/main.css +1 -0
  93. metaflow/plugins/cards/card_modules/main.js +56 -41
  94. metaflow/plugins/cards/card_modules/test_cards.py +22 -6
  95. metaflow/plugins/cards/component_serializer.py +1 -8
  96. metaflow/plugins/cards/metadata.py +22 -0
  97. metaflow/plugins/catch_decorator.py +9 -0
  98. metaflow/plugins/datastores/local_storage.py +12 -6
  99. metaflow/plugins/datastores/spin_storage.py +12 -0
  100. metaflow/plugins/datatools/s3/s3.py +49 -17
  101. metaflow/plugins/datatools/s3/s3op.py +113 -66
  102. metaflow/plugins/env_escape/client_modules.py +102 -72
  103. metaflow/plugins/events_decorator.py +127 -121
  104. metaflow/plugins/exit_hook/__init__.py +0 -0
  105. metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
  106. metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
  107. metaflow/plugins/kubernetes/kubernetes.py +12 -1
  108. metaflow/plugins/kubernetes/kubernetes_cli.py +11 -0
  109. metaflow/plugins/kubernetes/kubernetes_decorator.py +25 -6
  110. metaflow/plugins/kubernetes/kubernetes_job.py +12 -4
  111. metaflow/plugins/kubernetes/kubernetes_jobsets.py +31 -30
  112. metaflow/plugins/metadata_providers/local.py +76 -82
  113. metaflow/plugins/metadata_providers/service.py +13 -9
  114. metaflow/plugins/metadata_providers/spin.py +16 -0
  115. metaflow/plugins/package_cli.py +36 -24
  116. metaflow/plugins/parallel_decorator.py +11 -2
  117. metaflow/plugins/parsers.py +16 -0
  118. metaflow/plugins/pypi/bootstrap.py +7 -1
  119. metaflow/plugins/pypi/conda_decorator.py +41 -82
  120. metaflow/plugins/pypi/conda_environment.py +14 -6
  121. metaflow/plugins/pypi/micromamba.py +9 -1
  122. metaflow/plugins/pypi/pip.py +41 -5
  123. metaflow/plugins/pypi/pypi_decorator.py +4 -4
  124. metaflow/plugins/pypi/utils.py +22 -0
  125. metaflow/plugins/secrets/__init__.py +3 -0
  126. metaflow/plugins/secrets/secrets_decorator.py +14 -178
  127. metaflow/plugins/secrets/secrets_func.py +49 -0
  128. metaflow/plugins/secrets/secrets_spec.py +101 -0
  129. metaflow/plugins/secrets/utils.py +74 -0
  130. metaflow/plugins/test_unbounded_foreach_decorator.py +2 -2
  131. metaflow/plugins/timeout_decorator.py +0 -1
  132. metaflow/plugins/uv/bootstrap.py +29 -1
  133. metaflow/plugins/uv/uv_environment.py +5 -3
  134. metaflow/pylint_wrapper.py +5 -1
  135. metaflow/runner/click_api.py +79 -26
  136. metaflow/runner/deployer.py +208 -6
  137. metaflow/runner/deployer_impl.py +32 -12
  138. metaflow/runner/metaflow_runner.py +266 -33
  139. metaflow/runner/subprocess_manager.py +21 -1
  140. metaflow/runner/utils.py +27 -16
  141. metaflow/runtime.py +660 -66
  142. metaflow/task.py +255 -26
  143. metaflow/user_configs/config_options.py +33 -21
  144. metaflow/user_configs/config_parameters.py +220 -58
  145. metaflow/user_decorators/__init__.py +0 -0
  146. metaflow/user_decorators/common.py +144 -0
  147. metaflow/user_decorators/mutable_flow.py +512 -0
  148. metaflow/user_decorators/mutable_step.py +424 -0
  149. metaflow/user_decorators/user_flow_decorator.py +264 -0
  150. metaflow/user_decorators/user_step_decorator.py +749 -0
  151. metaflow/util.py +197 -7
  152. metaflow/vendor.py +23 -7
  153. metaflow/version.py +1 -1
  154. {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/Makefile +13 -2
  155. {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/Tiltfile +107 -7
  156. {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/pick_services.sh +1 -0
  157. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/METADATA +2 -3
  158. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/RECORD +162 -121
  159. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
  160. metaflow/_vendor/v3_5/__init__.py +0 -1
  161. metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
  162. metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
  163. metaflow/_vendor/v3_5/zipp.py +0 -329
  164. metaflow/info_file.py +0 -25
  165. metaflow/package.py +0 -203
  166. metaflow/user_configs/config_decorators.py +0 -568
  167. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +0 -0
  168. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/licenses/LICENSE +0 -0
  169. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
@@ -18,6 +18,14 @@ from metaflow.tagging_util import MAX_USER_TAG_SET_SIZE, validate_tags
18
18
 
19
19
  class LocalMetadataProvider(MetadataProvider):
20
20
  TYPE = "local"
21
+ DATASTORE_DIR = DATASTORE_LOCAL_DIR # ".metaflow"
22
+
23
+ @classmethod
24
+ def _get_storage_class(cls):
25
+ # This method is meant to be overridden
26
+ from metaflow.plugins.datastores.local_storage import LocalStorage
27
+
28
+ return LocalStorage
21
29
 
22
30
  def __init__(self, environment, flow, event_logger, monitor):
23
31
  super(LocalMetadataProvider, self).__init__(
@@ -26,30 +34,28 @@ class LocalMetadataProvider(MetadataProvider):
26
34
 
27
35
  @classmethod
28
36
  def compute_info(cls, val):
29
- from metaflow.plugins.datastores.local_storage import LocalStorage
37
+ storage_class = cls._get_storage_class()
30
38
 
31
- v = os.path.realpath(os.path.join(val, DATASTORE_LOCAL_DIR))
39
+ v = os.path.realpath(os.path.join(val, cls.DATASTORE_DIR))
32
40
  if os.path.isdir(v):
33
- LocalStorage.datastore_root = v
41
+ storage_class.datastore_root = v
34
42
  return val
35
43
  raise ValueError(
36
- "Could not find directory %s in directory %s" % (DATASTORE_LOCAL_DIR, val)
44
+ "Could not find directory %s in directory %s" % (cls.DATASTORE_DIR, val)
37
45
  )
38
46
 
39
47
  @classmethod
40
48
  def default_info(cls):
41
- from metaflow.plugins.datastores.local_storage import LocalStorage
49
+ storage_class = cls._get_storage_class()
42
50
 
43
51
  def print_clean(line, **kwargs):
44
52
  print(line)
45
53
 
46
- v = LocalStorage.get_datastore_root_from_config(
54
+ v = storage_class.get_datastore_root_from_config(
47
55
  print_clean, create_on_absent=False
48
56
  )
49
57
  if v is None:
50
- return (
51
- "<No %s directory found in current working tree>" % DATASTORE_LOCAL_DIR
52
- )
58
+ return "<No %s directory found in current working tree>" % cls.DATASTORE_DIR
53
59
  return os.path.dirname(v)
54
60
 
55
61
  def version(self):
@@ -102,7 +108,7 @@ class LocalMetadataProvider(MetadataProvider):
102
108
  def register_data_artifacts(
103
109
  self, run_id, step_name, task_id, attempt_id, artifacts
104
110
  ):
105
- meta_dir = self._create_and_get_metadir(
111
+ meta_dir = self.__class__._create_and_get_metadir(
106
112
  self._flow_name, run_id, step_name, task_id
107
113
  )
108
114
  artlist = self._artifacts_to_json(
@@ -112,7 +118,7 @@ class LocalMetadataProvider(MetadataProvider):
112
118
  self._save_meta(meta_dir, artdict)
113
119
 
114
120
  def register_metadata(self, run_id, step_name, task_id, metadata):
115
- meta_dir = self._create_and_get_metadir(
121
+ meta_dir = self.__class__._create_and_get_metadir(
116
122
  self._flow_name, run_id, step_name, task_id
117
123
  )
118
124
  metalist = self._metadata_to_json(run_id, step_name, task_id, metadata)
@@ -132,9 +138,7 @@ class LocalMetadataProvider(MetadataProvider):
132
138
 
133
139
  def _optimistically_mutate():
134
140
  # get existing tags
135
- run = LocalMetadataProvider.get_object(
136
- "run", "self", {}, None, flow_id, run_id
137
- )
141
+ run = cls.get_object("run", "self", {}, None, flow_id, run_id)
138
142
  if not run:
139
143
  raise MetaflowTaggingError(
140
144
  msg="Run not found (%s, %s)" % (flow_id, run_id)
@@ -167,15 +171,13 @@ class LocalMetadataProvider(MetadataProvider):
167
171
  validate_tags(next_user_tags_set, existing_tags=existing_user_tag_set)
168
172
 
169
173
  # write new tag set to file system
170
- LocalMetadataProvider._persist_tags_for_run(
174
+ cls._persist_tags_for_run(
171
175
  flow_id, run_id, next_user_tags_set, existing_system_tag_set
172
176
  )
173
177
 
174
178
  # read tags back from file system to see if our optimism is misplaced
175
179
  # I.e. did a concurrent mutate overwrite our change
176
- run = LocalMetadataProvider.get_object(
177
- "run", "self", {}, None, flow_id, run_id
178
- )
180
+ run = cls.get_object("run", "self", {}, None, flow_id, run_id)
179
181
  if not run:
180
182
  raise MetaflowTaggingError(
181
183
  msg="Run not found for read-back check (%s, %s)" % (flow_id, run_id)
@@ -279,8 +281,6 @@ class LocalMetadataProvider(MetadataProvider):
279
281
  if obj_type not in ("root", "flow", "run", "step", "task", "artifact"):
280
282
  raise MetaflowInternalError(msg="Unexpected object type %s" % obj_type)
281
283
 
282
- from metaflow.plugins.datastores.local_storage import LocalStorage
283
-
284
284
  if obj_type == "artifact":
285
285
  # Artifacts are actually part of the tasks in the filesystem
286
286
  # E.g. we get here for (obj_type, sub_type) == (artifact, self)
@@ -307,13 +307,13 @@ class LocalMetadataProvider(MetadataProvider):
307
307
 
308
308
  # Special handling of self, artifact, and metadata
309
309
  if sub_type == "self":
310
- meta_path = LocalMetadataProvider._get_metadir(*args[:obj_order])
310
+ meta_path = cls._get_metadir(*args[:obj_order])
311
311
  if meta_path is None:
312
312
  return None
313
313
  self_file = os.path.join(meta_path, "_self.json")
314
314
  if os.path.isfile(self_file):
315
315
  obj = MetadataProvider._apply_filter(
316
- [LocalMetadataProvider._read_json_file(self_file)], filters
316
+ [cls._read_json_file(self_file)], filters
317
317
  )[0]
318
318
  # For non-descendants of a run, we are done
319
319
 
@@ -324,7 +324,7 @@ class LocalMetadataProvider(MetadataProvider):
324
324
  raise MetaflowInternalError(
325
325
  msg="Unexpected object type %s" % obj_type
326
326
  )
327
- run = LocalMetadataProvider.get_object(
327
+ run = cls.get_object(
328
328
  "run", "self", {}, None, *args[:RUN_ORDER] # *[flow_id, run_id]
329
329
  )
330
330
  if not run:
@@ -341,7 +341,7 @@ class LocalMetadataProvider(MetadataProvider):
341
341
  if obj_type not in ("root", "flow", "run", "step", "task"):
342
342
  raise MetaflowInternalError(msg="Unexpected object type %s" % obj_type)
343
343
 
344
- meta_path = LocalMetadataProvider._get_metadir(*args[:obj_order])
344
+ meta_path = cls._get_metadir(*args[:obj_order])
345
345
  result = []
346
346
  if meta_path is None:
347
347
  return result
@@ -352,9 +352,7 @@ class LocalMetadataProvider(MetadataProvider):
352
352
  attempts_done = sorted(glob.iglob(attempt_done_files))
353
353
  if attempts_done:
354
354
  successful_attempt = int(
355
- LocalMetadataProvider._read_json_file(attempts_done[-1])[
356
- "value"
357
- ]
355
+ cls._read_json_file(attempts_done[-1])["value"]
358
356
  )
359
357
  if successful_attempt is not None:
360
358
  which_artifact = "*"
@@ -365,10 +363,10 @@ class LocalMetadataProvider(MetadataProvider):
365
363
  "%d_artifact_%s.json" % (successful_attempt, which_artifact),
366
364
  )
367
365
  for obj in glob.iglob(artifact_files):
368
- result.append(LocalMetadataProvider._read_json_file(obj))
366
+ result.append(cls._read_json_file(obj))
369
367
 
370
368
  # We are getting artifacts. We should overlay with ancestral run's tags
371
- run = LocalMetadataProvider.get_object(
369
+ run = cls.get_object(
372
370
  "run", "self", {}, None, *args[:RUN_ORDER] # *[flow_id, run_id]
373
371
  )
374
372
  if not run:
@@ -388,12 +386,12 @@ class LocalMetadataProvider(MetadataProvider):
388
386
  if obj_type not in ("root", "flow", "run", "step", "task"):
389
387
  raise MetaflowInternalError(msg="Unexpected object type %s" % obj_type)
390
388
  result = []
391
- meta_path = LocalMetadataProvider._get_metadir(*args[:obj_order])
389
+ meta_path = cls._get_metadir(*args[:obj_order])
392
390
  if meta_path is None:
393
391
  return result
394
392
  files = os.path.join(meta_path, "sysmeta_*")
395
393
  for obj in glob.iglob(files):
396
- result.append(LocalMetadataProvider._read_json_file(obj))
394
+ result.append(cls._read_json_file(obj))
397
395
  return result
398
396
 
399
397
  # For the other types, we locate all the objects we need to find and return them
@@ -401,14 +399,13 @@ class LocalMetadataProvider(MetadataProvider):
401
399
  raise MetaflowInternalError(msg="Unexpected object type %s" % obj_type)
402
400
  if sub_type not in ("flow", "run", "step", "task"):
403
401
  raise MetaflowInternalError(msg="unexpected sub type %s" % sub_type)
404
- obj_path = LocalMetadataProvider._make_path(
405
- *args[:obj_order], create_on_absent=False
406
- )
402
+ obj_path = cls._make_path(*args[:obj_order], create_on_absent=False)
407
403
  result = []
408
404
  if obj_path is None:
409
405
  return result
410
406
  skip_dirs = "*/" * (sub_order - obj_order)
411
- all_meta = os.path.join(obj_path, skip_dirs, LocalStorage.METADATA_DIR)
407
+ storage_class = cls._get_storage_class()
408
+ all_meta = os.path.join(obj_path, skip_dirs, storage_class.METADATA_DIR)
412
409
  SelfInfo = collections.namedtuple("SelfInfo", ["filepath", "run_id"])
413
410
  self_infos = []
414
411
  for meta_path in glob.iglob(all_meta):
@@ -418,9 +415,7 @@ class LocalMetadataProvider(MetadataProvider):
418
415
  run_id = None
419
416
  # flow and run do not need info from ancestral run
420
417
  if sub_type in ("step", "task"):
421
- run_id = LocalMetadataProvider._deduce_run_id_from_meta_dir(
422
- meta_path, sub_type
423
- )
418
+ run_id = cls._deduce_run_id_from_meta_dir(meta_path, sub_type)
424
419
  # obj_type IS run, or more granular than run, let's do sanity check vs args
425
420
  if obj_order >= RUN_ORDER:
426
421
  if run_id != args[RUN_ORDER - 1]:
@@ -430,10 +425,10 @@ class LocalMetadataProvider(MetadataProvider):
430
425
  self_infos.append(SelfInfo(filepath=self_file, run_id=run_id))
431
426
 
432
427
  for self_info in self_infos:
433
- obj = LocalMetadataProvider._read_json_file(self_info.filepath)
428
+ obj = cls._read_json_file(self_info.filepath)
434
429
  if self_info.run_id:
435
430
  flow_id_from_args = args[0]
436
- run = LocalMetadataProvider.get_object(
431
+ run = cls.get_object(
437
432
  "run",
438
433
  "self",
439
434
  {},
@@ -452,8 +447,8 @@ class LocalMetadataProvider(MetadataProvider):
452
447
 
453
448
  return MetadataProvider._apply_filter(result, filters)
454
449
 
455
- @staticmethod
456
- def _deduce_run_id_from_meta_dir(meta_dir_path, sub_type):
450
+ @classmethod
451
+ def _deduce_run_id_from_meta_dir(cls, meta_dir_path, sub_type):
457
452
  curr_order = ObjectOrder.type_to_order(sub_type)
458
453
  levels_to_ascend = curr_order - ObjectOrder.type_to_order("run")
459
454
  if levels_to_ascend < 0:
@@ -468,8 +463,8 @@ class LocalMetadataProvider(MetadataProvider):
468
463
  )
469
464
  return run_id
470
465
 
471
- @staticmethod
472
- def _makedirs(path):
466
+ @classmethod
467
+ def _makedirs(cls, path):
473
468
  # this is for python2 compatibility.
474
469
  # Python3 has os.makedirs(exist_ok=True).
475
470
  try:
@@ -481,17 +476,15 @@ class LocalMetadataProvider(MetadataProvider):
481
476
  else:
482
477
  raise
483
478
 
484
- @staticmethod
485
- def _persist_tags_for_run(flow_id, run_id, tags, system_tags):
486
- subpath = LocalMetadataProvider._create_and_get_metadir(
487
- flow_name=flow_id, run_id=run_id
488
- )
479
+ @classmethod
480
+ def _persist_tags_for_run(cls, flow_id, run_id, tags, system_tags):
481
+ subpath = cls._create_and_get_metadir(flow_name=flow_id, run_id=run_id)
489
482
  selfname = os.path.join(subpath, "_self.json")
490
483
  if not os.path.isfile(selfname):
491
484
  raise MetaflowInternalError(
492
485
  msg="Could not verify Run existence on disk - missing %s" % selfname
493
486
  )
494
- LocalMetadataProvider._save_meta(
487
+ cls._save_meta(
495
488
  subpath,
496
489
  {
497
490
  "_self": MetadataProvider._run_to_json_static(
@@ -508,11 +501,11 @@ class LocalMetadataProvider(MetadataProvider):
508
501
  tags = set()
509
502
  if sys_tags is None:
510
503
  sys_tags = set()
511
- subpath = self._create_and_get_metadir(
504
+ subpath = self.__class__._create_and_get_metadir(
512
505
  self._flow_name, run_id, step_name, task_id
513
506
  )
514
507
  selfname = os.path.join(subpath, "_self.json")
515
- self._makedirs(subpath)
508
+ self.__class__._makedirs(subpath)
516
509
  if os.path.isfile(selfname):
517
510
  # There is a race here, but we are not aiming to make this as solid as
518
511
  # the metadata service. This is used primarily for concurrent resumes,
@@ -549,26 +542,31 @@ class LocalMetadataProvider(MetadataProvider):
549
542
  self._register_system_metadata(run_id, step_name, task_id, attempt)
550
543
  return to_return
551
544
 
552
- @staticmethod
545
+ @classmethod
553
546
  def _make_path(
554
- flow_name=None, run_id=None, step_name=None, task_id=None, create_on_absent=True
547
+ cls,
548
+ flow_name=None,
549
+ run_id=None,
550
+ step_name=None,
551
+ task_id=None,
552
+ create_on_absent=True,
555
553
  ):
556
554
 
557
- from metaflow.plugins.datastores.local_storage import LocalStorage
555
+ storage_class = cls._get_storage_class()
558
556
 
559
- if LocalStorage.datastore_root is None:
557
+ if storage_class.datastore_root is None:
560
558
 
561
559
  def print_clean(line, **kwargs):
562
560
  print(line)
563
561
 
564
- LocalStorage.datastore_root = LocalStorage.get_datastore_root_from_config(
562
+ storage_class.datastore_root = storage_class.get_datastore_root_from_config(
565
563
  print_clean, create_on_absent=create_on_absent
566
564
  )
567
- if LocalStorage.datastore_root is None:
565
+ if storage_class.datastore_root is None:
568
566
  return None
569
567
 
570
568
  if flow_name is None:
571
- return LocalStorage.datastore_root
569
+ return storage_class.datastore_root
572
570
  components = []
573
571
  if flow_name:
574
572
  components.append(flow_name)
@@ -578,37 +576,35 @@ class LocalMetadataProvider(MetadataProvider):
578
576
  components.append(step_name)
579
577
  if task_id:
580
578
  components.append(task_id)
581
- return LocalStorage().full_uri(LocalStorage.path_join(*components))
579
+ return storage_class().full_uri(storage_class.path_join(*components))
582
580
 
583
- @staticmethod
581
+ @classmethod
584
582
  def _create_and_get_metadir(
585
- flow_name=None, run_id=None, step_name=None, task_id=None
583
+ cls, flow_name=None, run_id=None, step_name=None, task_id=None
586
584
  ):
587
- from metaflow.plugins.datastores.local_storage import LocalStorage
585
+ storage_class = cls._get_storage_class()
588
586
 
589
- root_path = LocalMetadataProvider._make_path(
590
- flow_name, run_id, step_name, task_id
591
- )
592
- subpath = os.path.join(root_path, LocalStorage.METADATA_DIR)
593
- LocalMetadataProvider._makedirs(subpath)
587
+ root_path = cls._make_path(flow_name, run_id, step_name, task_id)
588
+ subpath = os.path.join(root_path, storage_class.METADATA_DIR)
589
+ cls._makedirs(subpath)
594
590
  return subpath
595
591
 
596
- @staticmethod
597
- def _get_metadir(flow_name=None, run_id=None, step_name=None, task_id=None):
598
- from metaflow.plugins.datastores.local_storage import LocalStorage
592
+ @classmethod
593
+ def _get_metadir(cls, flow_name=None, run_id=None, step_name=None, task_id=None):
594
+ storage_class = cls._get_storage_class()
599
595
 
600
- root_path = LocalMetadataProvider._make_path(
596
+ root_path = cls._make_path(
601
597
  flow_name, run_id, step_name, task_id, create_on_absent=False
602
598
  )
603
599
  if root_path is None:
604
600
  return None
605
- subpath = os.path.join(root_path, LocalStorage.METADATA_DIR)
601
+ subpath = os.path.join(root_path, storage_class.METADATA_DIR)
606
602
  if os.path.isdir(subpath):
607
603
  return subpath
608
604
  return None
609
605
 
610
- @staticmethod
611
- def _dump_json_to_file(filepath, data, allow_overwrite=False):
606
+ @classmethod
607
+ def _dump_json_to_file(cls, filepath, data, allow_overwrite=False):
612
608
  if os.path.isfile(filepath) and not allow_overwrite:
613
609
  return
614
610
  try:
@@ -622,15 +618,13 @@ class LocalMetadataProvider(MetadataProvider):
622
618
  if f and os.path.isfile(f.name):
623
619
  os.remove(f.name)
624
620
 
625
- @staticmethod
626
- def _read_json_file(filepath):
621
+ @classmethod
622
+ def _read_json_file(cls, filepath):
627
623
  with open(filepath, "r") as f:
628
624
  return json.load(f)
629
625
 
630
- @staticmethod
631
- def _save_meta(root_dir, metadict, allow_overwrite=False):
626
+ @classmethod
627
+ def _save_meta(cls, root_dir, metadict, allow_overwrite=False):
632
628
  for name, datum in metadict.items():
633
629
  filename = os.path.join(root_dir, "%s.json" % name)
634
- LocalMetadataProvider._dump_json_to_file(
635
- filename, datum, allow_overwrite=allow_overwrite
636
- )
630
+ cls._dump_json_to_file(filename, datum, allow_overwrite=allow_overwrite)
@@ -72,14 +72,18 @@ class ServiceMetadataProvider(MetadataProvider):
72
72
  @classmethod
73
73
  def compute_info(cls, val):
74
74
  v = val.rstrip("/")
75
- try:
76
- resp = cls._session.get(
77
- os.path.join(v, "ping"), headers=SERVICE_HEADERS.copy()
78
- )
79
- resp.raise_for_status()
80
- except: # noqa E722
81
- raise ValueError("Metaflow service [%s] unreachable." % v)
82
- return v
75
+ for i in range(SERVICE_RETRY_COUNT):
76
+ try:
77
+ resp = cls._session.get(
78
+ os.path.join(v, "ping"), headers=SERVICE_HEADERS.copy()
79
+ )
80
+ resp.raise_for_status()
81
+ except: # noqa E722
82
+ time.sleep(2 ** (i - 1))
83
+ else:
84
+ return v
85
+
86
+ raise ValueError("Metaflow service [%s] unreachable." % v)
83
87
 
84
88
  @classmethod
85
89
  def default_info(cls):
@@ -587,7 +591,7 @@ class ServiceMetadataProvider(MetadataProvider):
587
591
  else:
588
592
  if resp.status_code < 300:
589
593
  return resp.headers.get("METADATA_SERVICE_VERSION", None)
590
- elif resp.status_code != 503:
594
+ elif resp.status_code not in (503, 500):
591
595
  raise ServiceException(
592
596
  "Metadata request (%s) failed"
593
597
  " (code %s): %s" % (url, resp.status_code, resp.text),
@@ -0,0 +1,16 @@
1
+ from metaflow.plugins.metadata_providers.local import LocalMetadataProvider
2
+ from metaflow.metaflow_config import DATASTORE_SPIN_LOCAL_DIR
3
+
4
+
5
+ class SpinMetadataProvider(LocalMetadataProvider):
6
+ TYPE = "spin"
7
+ DATASTORE_DIR = DATASTORE_SPIN_LOCAL_DIR # ".metaflow_spin"
8
+
9
+ @classmethod
10
+ def _get_storage_class(cls):
11
+ from metaflow.plugins.datastores.spin_storage import SpinStorage
12
+
13
+ return SpinStorage
14
+
15
+ def version(self):
16
+ return "spin"
@@ -9,49 +9,61 @@ def cli():
9
9
 
10
10
 
11
11
  @cli.group(help="Commands related to code packages.")
12
+ @click.option(
13
+ "--timeout", default=60, help="Timeout for package operations in seconds."
14
+ )
12
15
  @click.pass_obj
13
- def package(obj):
16
+ def package(obj, timeout):
14
17
  # Prepare the package before any of the sub-commands are invoked.
18
+ # We explicitly will *not* upload it to the datastore.
15
19
  obj.package = MetaflowPackage(
16
- obj.flow, obj.environment, obj.echo, obj.package_suffixes
20
+ obj.flow,
21
+ obj.environment,
22
+ obj.echo,
23
+ suffixes=obj.package_suffixes,
24
+ flow_datastore=None,
17
25
  )
26
+ obj.package_op_timeout = timeout
18
27
 
19
28
 
20
- @package.command(help="Output information about the current code package.")
29
+ @package.command(help="Output information about the code package.")
21
30
  @click.pass_obj
22
31
  def info(obj):
23
- obj.echo("Status of the current working directory:", fg="magenta", bold=False)
24
- obj.echo_always(
25
- "Hash: *%s*" % sha1(obj.package.blob).hexdigest(),
26
- highlight="green",
27
- highlight_bold=False,
28
- )
29
- obj.echo_always(
30
- "Package size: *%d* KB" % (len(obj.package.blob) / 1024),
31
- highlight="green",
32
- highlight_bold=False,
33
- )
34
- num = sum(1 for _ in obj.package.path_tuples())
35
- obj.echo_always(
36
- "Number of files: *%d*" % num, highlight="green", highlight_bold=False
37
- )
32
+ obj.echo_always(obj.package.show())
38
33
 
39
34
 
40
- @package.command(help="List files included in the code package.")
35
+ @package.command(help="List all files included in the code package.")
36
+ @click.option(
37
+ "--archive/--no-archive",
38
+ default=False,
39
+ help="If True, lists the file paths as present in the code package archive; "
40
+ "otherwise, lists the files on your filesystem included in the code package",
41
+ show_default=True,
42
+ )
41
43
  @click.pass_obj
42
- def list(obj):
44
+ def list(obj, archive=False):
45
+ _ = obj.package.blob_with_timeout(timeout=obj.package_op_timeout)
46
+ # We now have all the information about the blob
43
47
  obj.echo(
44
- "Files included in the code package " "(change with --package-suffixes):",
48
+ "Files included in the code package (change with --package-suffixes):",
45
49
  fg="magenta",
46
50
  bold=False,
47
51
  )
48
- obj.echo_always("\n".join(path for path, _ in obj.package.path_tuples()))
52
+ if archive:
53
+ obj.echo_always("\n".join(path for _, path in obj.package.path_tuples()))
54
+ else:
55
+ obj.echo_always("\n".join(path for path, _ in obj.package.path_tuples()))
49
56
 
50
57
 
51
- @package.command(help="Save the current code package in a tar file")
58
+ @package.command(help="Save the current code package to a file.")
52
59
  @click.argument("path")
53
60
  @click.pass_obj
54
61
  def save(obj, path):
55
62
  with open(path, "wb") as f:
56
63
  f.write(obj.package.blob)
57
- obj.echo("Code package saved in *%s*." % path, fg="magenta", bold=False)
64
+ obj.echo(
65
+ "Code package saved in *%s* with metadata: %s"
66
+ % (path, obj.package.package_metadata),
67
+ fg="magenta",
68
+ bold=False,
69
+ )
@@ -36,8 +36,10 @@ class ParallelDecorator(StepDecorator):
36
36
  defaults = {}
37
37
  IS_PARALLEL = True
38
38
 
39
- def __init__(self, attributes=None, statically_defined=False):
40
- super(ParallelDecorator, self).__init__(attributes, statically_defined)
39
+ def __init__(self, attributes=None, statically_defined=False, inserted_by=None):
40
+ super(ParallelDecorator, self).__init__(
41
+ attributes, statically_defined, inserted_by
42
+ )
41
43
 
42
44
  def runtime_step_cli(
43
45
  self, cli_args, retry_count, max_user_code_retries, ubf_context
@@ -51,6 +53,13 @@ class ParallelDecorator(StepDecorator):
51
53
  def step_init(
52
54
  self, flow, graph, step_name, decorators, environment, flow_datastore, logger
53
55
  ):
56
+ # TODO: This can be supported in the future, but for the time being we disable the transition as it leads to
57
+ # a UBF exception during runtime when the actual parallel-join step is conditional (switching between different join implementations from the @parallel step).
58
+ if graph[step_name].type == "split-switch":
59
+ raise MetaflowException(
60
+ "A @parallel step can not be a conditional switch step. Please add a join step after *%s*"
61
+ % step_name
62
+ )
54
63
  self.environment = environment
55
64
  # Previously, the `parallel` property was a hardcoded, static property within `current`.
56
65
  # Whenever `current.parallel` was called, it returned a named tuple with values coming from
@@ -0,0 +1,16 @@
1
+ from metaflow._vendor import yaml
2
+
3
+
4
+ def yaml_parser(content: str) -> dict:
5
+ """
6
+ Parse YAML content to a dictionary.
7
+
8
+ Parameters
9
+ ----------
10
+ content : str
11
+
12
+ Returns
13
+ -------
14
+ dict
15
+ """
16
+ return yaml.safe_load(content)
@@ -12,6 +12,7 @@ import platform
12
12
  from urllib.error import URLError
13
13
  from urllib.request import urlopen
14
14
  from metaflow.metaflow_config import DATASTORE_LOCAL_DIR, CONDA_USE_FAST_INIT
15
+ from metaflow.packaging_sys import MetaflowCodeContent, ContentType
15
16
  from metaflow.plugins import DATASTORES
16
17
  from metaflow.plugins.pypi.utils import MICROMAMBA_MIRROR_URL, MICROMAMBA_URL
17
18
  from metaflow.util import which
@@ -365,8 +366,13 @@ if __name__ == "__main__":
365
366
 
366
367
  # Move MAGIC_FILE inside local datastore.
367
368
  os.makedirs(manifest_dir, exist_ok=True)
369
+ path_to_manifest = MetaflowCodeContent.get_filename(
370
+ MAGIC_FILE, ContentType.OTHER_CONTENT
371
+ )
372
+ if path_to_manifest is None:
373
+ raise RuntimeError(f"Cannot find {MAGIC_FILE} in the package")
368
374
  shutil.move(
369
- os.path.join(os.getcwd(), MAGIC_FILE),
375
+ path_to_manifest,
370
376
  os.path.join(manifest_dir, MAGIC_FILE),
371
377
  )
372
378
  with open(os.path.join(manifest_dir, MAGIC_FILE)) as f: