ob-metaflow 2.16.8.2rc2__py2.py3-none-any.whl → 2.17.1.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow might be problematic. Click here for more details.

Files changed (65) hide show
  1. metaflow/_vendor/click/core.py +3 -4
  2. metaflow/_vendor/imghdr/__init__.py +7 -1
  3. metaflow/_vendor/yaml/__init__.py +427 -0
  4. metaflow/_vendor/yaml/composer.py +139 -0
  5. metaflow/_vendor/yaml/constructor.py +748 -0
  6. metaflow/_vendor/yaml/cyaml.py +101 -0
  7. metaflow/_vendor/yaml/dumper.py +62 -0
  8. metaflow/_vendor/yaml/emitter.py +1137 -0
  9. metaflow/_vendor/yaml/error.py +75 -0
  10. metaflow/_vendor/yaml/events.py +86 -0
  11. metaflow/_vendor/yaml/loader.py +63 -0
  12. metaflow/_vendor/yaml/nodes.py +49 -0
  13. metaflow/_vendor/yaml/parser.py +589 -0
  14. metaflow/_vendor/yaml/reader.py +185 -0
  15. metaflow/_vendor/yaml/representer.py +389 -0
  16. metaflow/_vendor/yaml/resolver.py +227 -0
  17. metaflow/_vendor/yaml/scanner.py +1435 -0
  18. metaflow/_vendor/yaml/serializer.py +111 -0
  19. metaflow/_vendor/yaml/tokens.py +104 -0
  20. metaflow/cli.py +11 -2
  21. metaflow/cli_components/run_cmds.py +0 -15
  22. metaflow/client/core.py +6 -1
  23. metaflow/cmd/make_wrapper.py +30 -0
  24. metaflow/extension_support/__init__.py +4 -3
  25. metaflow/flowspec.py +1 -113
  26. metaflow/graph.py +10 -134
  27. metaflow/lint.py +3 -70
  28. metaflow/metaflow_environment.py +14 -6
  29. metaflow/metaflow_version.py +15 -0
  30. metaflow/package/__init__.py +18 -9
  31. metaflow/packaging_sys/__init__.py +53 -43
  32. metaflow/packaging_sys/backend.py +21 -6
  33. metaflow/packaging_sys/tar_backend.py +16 -3
  34. metaflow/packaging_sys/v1.py +21 -21
  35. metaflow/plugins/argo/argo_client.py +31 -14
  36. metaflow/plugins/argo/argo_workflows.py +67 -22
  37. metaflow/plugins/argo/argo_workflows_cli.py +348 -85
  38. metaflow/plugins/argo/argo_workflows_deployer_objects.py +69 -0
  39. metaflow/plugins/aws/step_functions/step_functions.py +0 -6
  40. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +30 -0
  41. metaflow/plugins/cards/card_modules/basic.py +3 -14
  42. metaflow/plugins/cards/card_modules/convert_to_native_type.py +7 -1
  43. metaflow/plugins/kubernetes/kubernetes_decorator.py +1 -1
  44. metaflow/plugins/kubernetes/kubernetes_job.py +8 -2
  45. metaflow/plugins/kubernetes/kubernetes_jobsets.py +26 -28
  46. metaflow/plugins/pypi/conda_decorator.py +4 -2
  47. metaflow/runner/click_api.py +14 -7
  48. metaflow/runner/deployer.py +160 -7
  49. metaflow/runner/deployer_impl.py +15 -7
  50. metaflow/runner/subprocess_manager.py +20 -12
  51. metaflow/runtime.py +27 -102
  52. metaflow/task.py +25 -46
  53. metaflow/user_decorators/mutable_flow.py +8 -6
  54. metaflow/util.py +0 -29
  55. metaflow/vendor.py +23 -6
  56. metaflow/version.py +1 -1
  57. {ob_metaflow-2.16.8.2rc2.data → ob_metaflow-2.17.1.0.data}/data/share/metaflow/devtools/Makefile +3 -2
  58. {ob_metaflow-2.16.8.2rc2.data → ob_metaflow-2.17.1.0.data}/data/share/metaflow/devtools/Tiltfile +2 -2
  59. {ob_metaflow-2.16.8.2rc2.dist-info → ob_metaflow-2.17.1.0.dist-info}/METADATA +2 -2
  60. {ob_metaflow-2.16.8.2rc2.dist-info → ob_metaflow-2.17.1.0.dist-info}/RECORD +65 -48
  61. {ob_metaflow-2.16.8.2rc2.data → ob_metaflow-2.17.1.0.data}/data/share/metaflow/devtools/pick_services.sh +0 -0
  62. {ob_metaflow-2.16.8.2rc2.dist-info → ob_metaflow-2.17.1.0.dist-info}/WHEEL +0 -0
  63. {ob_metaflow-2.16.8.2rc2.dist-info → ob_metaflow-2.17.1.0.dist-info}/entry_points.txt +0 -0
  64. {ob_metaflow-2.16.8.2rc2.dist-info → ob_metaflow-2.17.1.0.dist-info}/licenses/LICENSE +0 -0
  65. {ob_metaflow-2.16.8.2rc2.dist-info → ob_metaflow-2.17.1.0.dist-info}/top_level.txt +0 -0
metaflow/runtime.py CHANGED
@@ -37,7 +37,7 @@ from .debug import debug
37
37
  from .decorators import flow_decorators
38
38
  from .flowspec import _FlowState
39
39
  from .mflog import mflog, RUNTIME_LOG_SOURCE
40
- from .util import to_unicode, compress_list, unicode_type, get_split_branch_for_node
40
+ from .util import to_unicode, compress_list, unicode_type
41
41
  from .clone_util import clone_task_helper
42
42
  from .unbounded_foreach import (
43
43
  CONTROL_TASK_TAG,
@@ -820,90 +820,39 @@ class NativeRuntime(object):
820
820
  # matching_split is the split-parent of the finished task
821
821
  matching_split = self._graph[self._graph[next_step].split_parents[-1]]
822
822
  _, foreach_stack = task.finished_id
823
-
824
- direct_parents = set(self._graph[next_step].in_funcs)
825
-
826
- # next step is a foreach join
823
+ index = ""
827
824
  if matching_split.type == "foreach":
828
- top_frame = task.finished_id[1][-1]
829
- num_splits = top_frame.num_splits
830
-
831
- finished_for_each_index = {}
832
- for finished_id, pathspec in self._finished.items():
833
- finished_step, finished_foreach_stack = finished_id
834
-
835
- if finished_step not in direct_parents:
836
- continue
837
-
838
- if (
839
- len(finished_foreach_stack) != len(task.finished_id[1])
840
- or finished_foreach_stack[:-1] != task.finished_id[1][:-1]
841
- ):
842
- continue
843
-
844
- current_index = finished_foreach_stack[-1].index
845
- finished_for_each_index[current_index] = pathspec
825
+ # next step is a foreach join
846
826
 
847
- if (
848
- num_splits is not None
849
- and len(finished_for_each_index) == num_splits
850
- ):
851
- required_tasks = list(finished_for_each_index.values())
852
- index = self._translate_index(task, next_step, "join")
853
- self._queue_push(
854
- next_step,
855
- {"input_paths": required_tasks, "join_type": "foreach"},
856
- index,
857
- )
858
- elif matching_split.type == "split-switch":
859
- required_tasks = [task.path]
860
- join_type = "split-switch"
827
+ def siblings(foreach_stack):
828
+ top = foreach_stack[-1]
829
+ bottom = list(foreach_stack[:-1])
830
+ for index in range(top.num_splits):
831
+ yield tuple(bottom + [top._replace(index=index)])
832
+
833
+ # required tasks are all split-siblings of the finished task
834
+ required_tasks = [
835
+ self._finished.get((task.step, s)) for s in siblings(foreach_stack)
836
+ ]
837
+ join_type = "foreach"
838
+ index = self._translate_index(task, next_step, "join")
839
+ else:
840
+ # next step is a split
841
+ # required tasks are all branches joined by the next step
842
+ required_tasks = [
843
+ self._finished.get((step, foreach_stack))
844
+ for step in self._graph[next_step].in_funcs
845
+ ]
846
+ join_type = "linear"
861
847
  index = self._translate_index(task, next_step, "linear")
848
+
849
+ if all(required_tasks):
850
+ # all tasks to be joined are ready. Schedule the next join step.
862
851
  self._queue_push(
863
852
  next_step,
864
853
  {"input_paths": required_tasks, "join_type": join_type},
865
854
  index,
866
855
  )
867
- else:
868
- split_node_name = matching_split.name
869
- expected_branches = set(matching_split.out_funcs)
870
-
871
- resolved_and_finished = {}
872
- for finished_id, pathspec in self._finished.items():
873
- finished_step, finished_foreach_stack = finished_id
874
-
875
- if finished_step not in direct_parents:
876
- continue
877
-
878
- if finished_foreach_stack != foreach_stack:
879
- continue
880
-
881
- branch = get_split_branch_for_node(
882
- self._graph, finished_step, split_node_name
883
- )
884
- if branch in expected_branches:
885
- resolved_and_finished[branch] = pathspec
886
-
887
- if set(resolved_and_finished.keys()) == expected_branches:
888
- required_tasks = list(resolved_and_finished.values())
889
- index = self._translate_index(task, next_step, "linear")
890
- self._queue_push(
891
- next_step,
892
- {"input_paths": required_tasks, "join_type": "linear"},
893
- index,
894
- )
895
-
896
- def _queue_task_switch(self, task, next_steps):
897
- if len(next_steps) != 1:
898
- msg = (
899
- "Step *{step}* is a switch statement but runtime got {actual} transitions. "
900
- "Expected exactly 1 chosen step."
901
- )
902
- raise Exception(msg.format(step=task.step, actual=len(next_steps)))
903
-
904
- chosen_step = next_steps[0]
905
- index = self._translate_index(task, chosen_step, "linear")
906
- self._queue_push(chosen_step, {"input_paths": [task.path]}, index)
907
856
 
908
857
  def _queue_task_foreach(self, task, next_steps):
909
858
  # CHECK: this condition should be enforced by the linter but
@@ -981,28 +930,7 @@ class NativeRuntime(object):
981
930
  next_steps = []
982
931
  foreach = None
983
932
  expected = self._graph[task.step].out_funcs
984
- if self._graph[task.step].type == "split-switch":
985
- if len(next_steps) != 1:
986
- msg = (
987
- "Switch step *{step}* should transition to exactly "
988
- "one step at runtime, but got: {actual}"
989
- )
990
- raise MetaflowInternalError(
991
- msg.format(step=task.step, actual=", ".join(next_steps))
992
- )
993
- if next_steps[0] not in expected:
994
- msg = (
995
- "Switch step *{step}* transitioned to unexpected "
996
- "step *{actual}*. Expected one of: {expected}"
997
- )
998
- raise MetaflowInternalError(
999
- msg.format(
1000
- step=task.step,
1001
- actual=next_steps[0],
1002
- expected=", ".join(expected),
1003
- )
1004
- )
1005
- elif next_steps != expected:
933
+ if next_steps != expected:
1006
934
  msg = (
1007
935
  "Based on static analysis of the code, step *{step}* "
1008
936
  "was expected to transition to step(s) *{expected}*. "
@@ -1026,9 +954,6 @@ class NativeRuntime(object):
1026
954
  elif foreach:
1027
955
  # Next step is a foreach child
1028
956
  self._queue_task_foreach(task, next_steps)
1029
- elif self._graph[task.step].type == "split-switch":
1030
- # Next step is switch - queue the chosen step
1031
- self._queue_task_switch(task, next_steps)
1032
957
  else:
1033
958
  # Next steps are normal linear steps
1034
959
  for step in next_steps:
metaflow/task.py CHANGED
@@ -670,49 +670,31 @@ class MetaflowTask(object):
670
670
 
671
671
  if join_type:
672
672
  # Join step:
673
- passdown_params = None
674
-
675
- # Ensure that we have the right number of inputs.
676
- if join_type not in ("foreach", "split-switch"):
677
- # Find the corresponding split node from the graph.
678
- split_node = self.flow._graph[node.split_parents[-1]]
679
- # The number of expected inputs is the number of branches from that split.
680
- expected_inputs = len(split_node.out_funcs)
681
-
682
- if len(inputs) != expected_inputs:
683
- raise MetaflowDataMissing(
684
- "Join *%s* expected %d inputs but only %d inputs "
685
- "were found" % (step_name, expected_inputs, len(inputs))
686
- )
687
- if join_type == "split-switch":
688
- # Switch joins only have one input path (the chosen branch from the switch).
689
- # This occurs when a switch leads directly to a step without other converging branches.
690
- if len(inputs) > 1:
691
- raise MetaflowInternalError(
692
- f"Step *{step_name}* is a switch join but gets multiple inputs"
693
- )
694
- # Use input datastore directly - no need to copy parameters since we're not
695
- # creating a new output datastore
696
- self.flow._set_datastore(inputs[0])
697
- passdown_params = False
698
- else:
699
- # Multiple input contexts are passed in as an argument
700
- # to the step function.
701
- input_obj = Inputs(self._clone_flow(inp) for inp in inputs)
702
- self.flow._set_datastore(output)
703
- passdown_params = True
704
- # initialize parameters (if they exist)
705
- # We take Parameter values from the first input,
706
- # which is always safe since parameters are read-only
707
- if passdown_params is not None:
708
- current._update_env(
709
- {
710
- "parameter_names": self._init_parameters(
711
- inputs[0], passdown=passdown_params
712
- ),
713
- "graph_info": self.flow._graph_info,
714
- }
673
+
674
+ # Ensure that we have the right number of inputs. The
675
+ # foreach case is checked above.
676
+ if join_type != "foreach" and len(inputs) != len(node.in_funcs):
677
+ raise MetaflowDataMissing(
678
+ "Join *%s* expected %d "
679
+ "inputs but only %d inputs "
680
+ "were found" % (step_name, len(node.in_funcs), len(inputs))
715
681
  )
682
+
683
+ # Multiple input contexts are passed in as an argument
684
+ # to the step function.
685
+ input_obj = Inputs(self._clone_flow(inp) for inp in inputs)
686
+ self.flow._set_datastore(output)
687
+ # initialize parameters (if they exist)
688
+ # We take Parameter values from the first input,
689
+ # which is always safe since parameters are read-only
690
+ current._update_env(
691
+ {
692
+ "parameter_names": self._init_parameters(
693
+ inputs[0], passdown=True
694
+ ),
695
+ "graph_info": self.flow._graph_info,
696
+ }
697
+ )
716
698
  else:
717
699
  # Linear step:
718
700
  # We are running with a single input context.
@@ -770,10 +752,7 @@ class MetaflowTask(object):
770
752
  )
771
753
 
772
754
  if join_type:
773
- if join_type == "split-switch":
774
- self._exec_step_function(step_func, orig_step_func)
775
- else:
776
- self._exec_step_function(step_func, orig_step_func, input_obj)
755
+ self._exec_step_function(step_func, orig_step_func, input_obj)
777
756
  else:
778
757
  self._exec_step_function(step_func, orig_step_func)
779
758
 
@@ -347,8 +347,10 @@ class MutableFlow:
347
347
  "Mutable flow adding flow decorator '%s'" % deco_type
348
348
  )
349
349
 
350
+ # self._flow_cls._flow_decorators is a dictionary of form :
351
+ # <deco_name> : [deco_instance, deco_instance, ...]
350
352
  existing_deco = [
351
- d for d in self._flow_cls._flow_decorators if d.name == flow_deco.name
353
+ d for d in self._flow_cls._flow_decorators if d == flow_deco.name
352
354
  ]
353
355
 
354
356
  if flow_deco.allow_multiple or not existing_deco:
@@ -365,11 +367,11 @@ class MutableFlow:
365
367
  "Mutable flow overriding flow decorator '%s' "
366
368
  "(removing existing decorator and adding new one)" % flow_deco.name
367
369
  )
368
- self._flow_cls._flow_decorators = [
369
- d
370
+ self._flow_cls._flow_decorators = {
371
+ d: self._flow_cls._flow_decorators[d]
370
372
  for d in self._flow_cls._flow_decorators
371
- if d.name != flow_deco.name
372
- ]
373
+ if d != flow_deco.name
374
+ }
373
375
  _do_add()
374
376
  elif duplicates == MutableFlow.ERROR:
375
377
  # If we error, we raise an exception
@@ -470,7 +472,7 @@ class MutableFlow:
470
472
  % (len(old_deco_list) - len(new_deco_list))
471
473
  )
472
474
  if new_deco_list:
473
- self._flow_cls._flow_decorators[deconame] = new_deco_list
475
+ self._flow_cls._flow_decorators[deco_name] = new_deco_list
474
476
  else:
475
477
  del self._flow_cls._flow_decorators[deco_name]
476
478
  return did_remove
metaflow/util.py CHANGED
@@ -212,35 +212,6 @@ def write_latest_run_id(obj, run_id):
212
212
  f.write(str(run_id))
213
213
 
214
214
 
215
- def get_split_branch_for_node(graph, node_name, split_node_name):
216
- """
217
- Walks backwards from a node to find which branch of a given split
218
- it belongs to. The branches are the direct children of the split node.
219
- """
220
-
221
- # The branches are the direct children of the split node
222
- split_branches = set(graph[split_node_name].out_funcs)
223
-
224
- # Use a queue for breadth-first search backwards
225
- q = [node_name]
226
- visited = {node_name}
227
-
228
- while q:
229
- current_name = q.pop(0)
230
-
231
- # If we have reached one of the original branches, we are done
232
- if current_name in split_branches:
233
- return current_name
234
-
235
- # Add this node's parents to the queue to continue searching
236
- for parent_name in graph[current_name].in_funcs:
237
- if parent_name not in visited:
238
- visited.add(parent_name)
239
- q.append(parent_name)
240
-
241
- return None
242
-
243
-
244
215
  def get_object_package_version(obj):
245
216
  """
246
217
  Return the top level package name and package version that defines the
metaflow/vendor.py CHANGED
@@ -63,14 +63,29 @@ def find_vendored_libs(vendor_dir, whitelist, whitelist_dirs):
63
63
  return vendored_libs, paths
64
64
 
65
65
 
66
- def fetch_licenses(*info_dir, vendor_dir):
67
- for file in chain.from_iterable(map(iter_subtree, info_dir)):
68
- if "LICENSE" in file.name:
69
- library = file.parent.name.split("-")[0]
70
- shutil.copy(file, vendor_dir / ("%s.LICENSE" % library))
71
- else:
66
+ def fetch_licenses(*info_dirs, vendor_dir):
67
+ for dist_info in info_dirs:
68
+ metadata_file = dist_info / "METADATA"
69
+ if not metadata_file.exists():
70
+ continue
71
+
72
+ project_name = None
73
+ for line in metadata_file.read_text("utf-8").splitlines():
74
+ if line.startswith("Name: "):
75
+ project_name = line.split("Name: ", 1)[1].strip()
76
+ break
77
+ if not project_name:
72
78
  continue
73
79
 
80
+ for item in dist_info.iterdir():
81
+ if item.is_file() and re.search(r"(LICENSE|COPYING)", item.name, re.I):
82
+ shutil.copy(item, vendor_dir / f"{project_name}.LICENSE")
83
+ elif item.is_dir() and item.name.lower() == "licenses":
84
+ for license_file in item.iterdir():
85
+ if license_file.is_file():
86
+ dest_name = f"{project_name}.{license_file.name}"
87
+ shutil.copy(license_file, vendor_dir / dest_name)
88
+
74
89
 
75
90
  def vendor(vendor_dir):
76
91
  # remove everything
@@ -108,6 +123,8 @@ def vendor(vendor_dir):
108
123
  "-r",
109
124
  "_vendor/vendor_%s.txt" % subdir,
110
125
  "--no-compile",
126
+ "--no-binary",
127
+ ":all:",
111
128
  ]
112
129
  )
113
130
 
metaflow/version.py CHANGED
@@ -1 +1 @@
1
- metaflow_version = "2.16.8.2rc2"
1
+ metaflow_version = "2.17.1.0"
@@ -257,10 +257,11 @@ shell: setup-tilt
257
257
  echo "🔎 Using $$user_shell for interactive session."; \
258
258
  echo "🐍 If you installed Metaflow in a virtual environment, activate it now."; \
259
259
  if [ -f "$(DEVTOOLS_DIR)/aws_config" ]; then \
260
- env METAFLOW_HOME="$(DEVTOOLS_DIR)" \
260
+ env -u AWS_PROFILE \
261
+ -u AWS_SHARED_CREDENTIALS_FILE \
262
+ METAFLOW_HOME="$(DEVTOOLS_DIR)" \
261
263
  METAFLOW_PROFILE=local \
262
264
  AWS_CONFIG_FILE="$(DEVTOOLS_DIR)/aws_config" \
263
- AWS_SHARED_CREDENTIALS_FILE= \
264
265
  "$$user_shell" -i; \
265
266
  else \
266
267
  env METAFLOW_HOME="$(DEVTOOLS_DIR)" \
@@ -593,7 +593,7 @@ if "ui" in enabled_components:
593
593
  'uiBackend.metaflowDatastoreSysRootS3=s3://metaflow-test',
594
594
  'uiBackend.metaflowS3EndpointURL=http://minio.default.svc.cluster.local:9000',
595
595
  'uiBackend.image.name=public.ecr.aws/outerbounds/metaflow_metadata_service',
596
- 'uiBackend.image.tag=2.4.13-2-g70af4ed',
596
+ 'uiBackend.image.tag=2.5.0',
597
597
  'uiBackend.env[0].name=AWS_ACCESS_KEY_ID',
598
598
  'uiBackend.env[0].value=rootuser',
599
599
  'uiBackend.env[1].name=AWS_SECRET_ACCESS_KEY',
@@ -603,7 +603,7 @@ if "ui" in enabled_components:
603
603
  'uiBackend.resources.requests.memory=256Mi',
604
604
  'uiStatic.metaflowUIBackendURL=http://localhost:8083/api',
605
605
  'uiStatic.image.name=public.ecr.aws/outerbounds/metaflow_ui',
606
- 'uiStatic.image.tag=v1.3.13-5-g5dd049e',
606
+ 'uiStatic.image.tag=v1.3.14',
607
607
  'uiStatic.resources.requests.cpu=25m',
608
608
  'uiStatic.resources.requests.memory=64Mi',
609
609
  'uiStatic.resources.limits.cpu=50m',
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ob-metaflow
3
- Version: 2.16.8.2rc2
3
+ Version: 2.17.1.0
4
4
  Summary: Metaflow: More AI and ML, Less Engineering
5
5
  Author: Netflix, Outerbounds & the Metaflow Community
6
6
  Author-email: help@outerbounds.co
@@ -12,7 +12,7 @@ Requires-Dist: boto3
12
12
  Requires-Dist: pylint
13
13
  Requires-Dist: kubernetes
14
14
  Provides-Extra: stubs
15
- Requires-Dist: metaflow-stubs==2.16.8.2rc2; extra == "stubs"
15
+ Requires-Dist: metaflow-stubs==2.17.1.0; extra == "stubs"
16
16
  Dynamic: author
17
17
  Dynamic: author-email
18
18
  Dynamic: description