ob-metaflow 2.15.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/__init__.py +10 -3
- metaflow/_vendor/imghdr/__init__.py +186 -0
- metaflow/_vendor/yaml/__init__.py +427 -0
- metaflow/_vendor/yaml/composer.py +139 -0
- metaflow/_vendor/yaml/constructor.py +748 -0
- metaflow/_vendor/yaml/cyaml.py +101 -0
- metaflow/_vendor/yaml/dumper.py +62 -0
- metaflow/_vendor/yaml/emitter.py +1137 -0
- metaflow/_vendor/yaml/error.py +75 -0
- metaflow/_vendor/yaml/events.py +86 -0
- metaflow/_vendor/yaml/loader.py +63 -0
- metaflow/_vendor/yaml/nodes.py +49 -0
- metaflow/_vendor/yaml/parser.py +589 -0
- metaflow/_vendor/yaml/reader.py +185 -0
- metaflow/_vendor/yaml/representer.py +389 -0
- metaflow/_vendor/yaml/resolver.py +227 -0
- metaflow/_vendor/yaml/scanner.py +1435 -0
- metaflow/_vendor/yaml/serializer.py +111 -0
- metaflow/_vendor/yaml/tokens.py +104 -0
- metaflow/cards.py +4 -0
- metaflow/cli.py +125 -21
- metaflow/cli_components/init_cmd.py +1 -0
- metaflow/cli_components/run_cmds.py +204 -40
- metaflow/cli_components/step_cmd.py +160 -4
- metaflow/client/__init__.py +1 -0
- metaflow/client/core.py +198 -130
- metaflow/client/filecache.py +59 -32
- metaflow/cmd/code/__init__.py +2 -1
- metaflow/cmd/develop/stub_generator.py +49 -18
- metaflow/cmd/develop/stubs.py +9 -27
- metaflow/cmd/make_wrapper.py +30 -0
- metaflow/datastore/__init__.py +1 -0
- metaflow/datastore/content_addressed_store.py +40 -9
- metaflow/datastore/datastore_set.py +10 -1
- metaflow/datastore/flow_datastore.py +124 -4
- metaflow/datastore/spin_datastore.py +91 -0
- metaflow/datastore/task_datastore.py +92 -6
- metaflow/debug.py +5 -0
- metaflow/decorators.py +331 -82
- metaflow/extension_support/__init__.py +414 -356
- metaflow/extension_support/_empty_file.py +2 -2
- metaflow/flowspec.py +322 -82
- metaflow/graph.py +178 -15
- metaflow/includefile.py +25 -3
- metaflow/lint.py +94 -3
- metaflow/meta_files.py +13 -0
- metaflow/metadata_provider/metadata.py +13 -2
- metaflow/metaflow_config.py +66 -4
- metaflow/metaflow_environment.py +91 -25
- metaflow/metaflow_profile.py +18 -0
- metaflow/metaflow_version.py +16 -1
- metaflow/package/__init__.py +673 -0
- metaflow/packaging_sys/__init__.py +880 -0
- metaflow/packaging_sys/backend.py +128 -0
- metaflow/packaging_sys/distribution_support.py +153 -0
- metaflow/packaging_sys/tar_backend.py +99 -0
- metaflow/packaging_sys/utils.py +54 -0
- metaflow/packaging_sys/v1.py +527 -0
- metaflow/parameters.py +6 -2
- metaflow/plugins/__init__.py +6 -0
- metaflow/plugins/airflow/airflow.py +11 -1
- metaflow/plugins/airflow/airflow_cli.py +16 -5
- metaflow/plugins/argo/argo_client.py +42 -20
- metaflow/plugins/argo/argo_events.py +6 -6
- metaflow/plugins/argo/argo_workflows.py +1023 -344
- metaflow/plugins/argo/argo_workflows_cli.py +396 -94
- metaflow/plugins/argo/argo_workflows_decorator.py +9 -0
- metaflow/plugins/argo/argo_workflows_deployer_objects.py +75 -49
- metaflow/plugins/argo/capture_error.py +5 -2
- metaflow/plugins/argo/conditional_input_paths.py +35 -0
- metaflow/plugins/argo/exit_hooks.py +209 -0
- metaflow/plugins/argo/param_val.py +19 -0
- metaflow/plugins/aws/aws_client.py +6 -0
- metaflow/plugins/aws/aws_utils.py +33 -1
- metaflow/plugins/aws/batch/batch.py +72 -5
- metaflow/plugins/aws/batch/batch_cli.py +24 -3
- metaflow/plugins/aws/batch/batch_decorator.py +57 -6
- metaflow/plugins/aws/step_functions/step_functions.py +28 -3
- metaflow/plugins/aws/step_functions/step_functions_cli.py +49 -4
- metaflow/plugins/aws/step_functions/step_functions_deployer.py +3 -0
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +30 -0
- metaflow/plugins/cards/card_cli.py +20 -1
- metaflow/plugins/cards/card_creator.py +24 -1
- metaflow/plugins/cards/card_datastore.py +21 -49
- metaflow/plugins/cards/card_decorator.py +58 -6
- metaflow/plugins/cards/card_modules/basic.py +38 -9
- metaflow/plugins/cards/card_modules/bundle.css +1 -1
- metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
- metaflow/plugins/cards/card_modules/components.py +592 -3
- metaflow/plugins/cards/card_modules/convert_to_native_type.py +34 -5
- metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
- metaflow/plugins/cards/card_modules/main.css +1 -0
- metaflow/plugins/cards/card_modules/main.js +56 -41
- metaflow/plugins/cards/card_modules/test_cards.py +22 -6
- metaflow/plugins/cards/component_serializer.py +1 -8
- metaflow/plugins/cards/metadata.py +22 -0
- metaflow/plugins/catch_decorator.py +9 -0
- metaflow/plugins/datastores/local_storage.py +12 -6
- metaflow/plugins/datastores/spin_storage.py +12 -0
- metaflow/plugins/datatools/s3/s3.py +49 -17
- metaflow/plugins/datatools/s3/s3op.py +113 -66
- metaflow/plugins/env_escape/client_modules.py +102 -72
- metaflow/plugins/events_decorator.py +127 -121
- metaflow/plugins/exit_hook/__init__.py +0 -0
- metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
- metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
- metaflow/plugins/kubernetes/kubernetes.py +12 -1
- metaflow/plugins/kubernetes/kubernetes_cli.py +11 -0
- metaflow/plugins/kubernetes/kubernetes_decorator.py +25 -6
- metaflow/plugins/kubernetes/kubernetes_job.py +12 -4
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +31 -30
- metaflow/plugins/metadata_providers/local.py +76 -82
- metaflow/plugins/metadata_providers/service.py +13 -9
- metaflow/plugins/metadata_providers/spin.py +16 -0
- metaflow/plugins/package_cli.py +36 -24
- metaflow/plugins/parallel_decorator.py +11 -2
- metaflow/plugins/parsers.py +16 -0
- metaflow/plugins/pypi/bootstrap.py +7 -1
- metaflow/plugins/pypi/conda_decorator.py +41 -82
- metaflow/plugins/pypi/conda_environment.py +14 -6
- metaflow/plugins/pypi/micromamba.py +9 -1
- metaflow/plugins/pypi/pip.py +41 -5
- metaflow/plugins/pypi/pypi_decorator.py +4 -4
- metaflow/plugins/pypi/utils.py +22 -0
- metaflow/plugins/secrets/__init__.py +3 -0
- metaflow/plugins/secrets/secrets_decorator.py +14 -178
- metaflow/plugins/secrets/secrets_func.py +49 -0
- metaflow/plugins/secrets/secrets_spec.py +101 -0
- metaflow/plugins/secrets/utils.py +74 -0
- metaflow/plugins/test_unbounded_foreach_decorator.py +2 -2
- metaflow/plugins/timeout_decorator.py +0 -1
- metaflow/plugins/uv/bootstrap.py +29 -1
- metaflow/plugins/uv/uv_environment.py +5 -3
- metaflow/pylint_wrapper.py +5 -1
- metaflow/runner/click_api.py +79 -26
- metaflow/runner/deployer.py +208 -6
- metaflow/runner/deployer_impl.py +32 -12
- metaflow/runner/metaflow_runner.py +266 -33
- metaflow/runner/subprocess_manager.py +21 -1
- metaflow/runner/utils.py +27 -16
- metaflow/runtime.py +660 -66
- metaflow/task.py +255 -26
- metaflow/user_configs/config_options.py +33 -21
- metaflow/user_configs/config_parameters.py +220 -58
- metaflow/user_decorators/__init__.py +0 -0
- metaflow/user_decorators/common.py +144 -0
- metaflow/user_decorators/mutable_flow.py +512 -0
- metaflow/user_decorators/mutable_step.py +424 -0
- metaflow/user_decorators/user_flow_decorator.py +264 -0
- metaflow/user_decorators/user_step_decorator.py +749 -0
- metaflow/util.py +197 -7
- metaflow/vendor.py +23 -7
- metaflow/version.py +1 -1
- {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/Makefile +13 -2
- {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/Tiltfile +107 -7
- {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/pick_services.sh +1 -0
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/METADATA +2 -3
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/RECORD +162 -121
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
- metaflow/_vendor/v3_5/__init__.py +0 -1
- metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
- metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
- metaflow/_vendor/v3_5/zipp.py +0 -329
- metaflow/info_file.py +0 -25
- metaflow/package.py +0 -203
- metaflow/user_configs/config_decorators.py +0 -568
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +0 -0
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/licenses/LICENSE +0 -0
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
metaflow/client/core.py
CHANGED
|
@@ -32,11 +32,12 @@ from metaflow.exception import (
|
|
|
32
32
|
from metaflow.includefile import IncludedFile
|
|
33
33
|
from metaflow.metaflow_config import DEFAULT_METADATA, MAX_ATTEMPTS
|
|
34
34
|
from metaflow.metaflow_environment import MetaflowEnvironment
|
|
35
|
+
from metaflow.package import MetaflowPackage
|
|
36
|
+
from metaflow.packaging_sys import ContentType
|
|
35
37
|
from metaflow.plugins import ENVIRONMENTS, METADATA_PROVIDERS
|
|
36
38
|
from metaflow.unbounded_foreach import CONTROL_TASK_TAG
|
|
37
39
|
from metaflow.util import cached_property, is_stringish, resolve_identity, to_unicode
|
|
38
40
|
|
|
39
|
-
from ..info_file import INFO_FILE
|
|
40
41
|
from .filecache import FileCache
|
|
41
42
|
|
|
42
43
|
if TYPE_CHECKING:
|
|
@@ -206,6 +207,20 @@ def default_namespace() -> str:
|
|
|
206
207
|
return get_namespace()
|
|
207
208
|
|
|
208
209
|
|
|
210
|
+
def inspect_spin(datastore_root: str = "."):
|
|
211
|
+
"""
|
|
212
|
+
Set metadata provider to spin metadata so that users can inspect spin
|
|
213
|
+
steps, tasks, and artifacts.
|
|
214
|
+
|
|
215
|
+
Parameters
|
|
216
|
+
----------
|
|
217
|
+
datastore_root : str, default "."
|
|
218
|
+
The root path to the spin datastore.
|
|
219
|
+
"""
|
|
220
|
+
metadata_str = f"spin@{datastore_root}"
|
|
221
|
+
metadata(metadata_str)
|
|
222
|
+
|
|
223
|
+
|
|
209
224
|
MetaflowArtifacts = NamedTuple
|
|
210
225
|
|
|
211
226
|
|
|
@@ -276,6 +291,7 @@ class MetaflowObject(object):
|
|
|
276
291
|
self._attempt = attempt
|
|
277
292
|
self._current_namespace = _current_namespace or get_namespace()
|
|
278
293
|
self._namespace_check = _namespace_check
|
|
294
|
+
|
|
279
295
|
# If the current namespace is False, we disable checking for namespace for this
|
|
280
296
|
# and all children objects. Not setting namespace_check to False has the consequence
|
|
281
297
|
# of preventing access to children objects after the namespace changes
|
|
@@ -302,7 +318,7 @@ class MetaflowObject(object):
|
|
|
302
318
|
# distinguish between "attempt will happen" and "no such
|
|
303
319
|
# attempt exists".
|
|
304
320
|
|
|
305
|
-
if pathspec:
|
|
321
|
+
if pathspec and _object is None:
|
|
306
322
|
ids = pathspec.split("/")
|
|
307
323
|
|
|
308
324
|
if self._NAME == "flow" and len(ids) != 1:
|
|
@@ -816,20 +832,29 @@ class MetaflowCode(object):
|
|
|
816
832
|
self._path = info["location"]
|
|
817
833
|
self._ds_type = info["ds_type"]
|
|
818
834
|
self._sha = info["sha"]
|
|
835
|
+
self._code_metadata = info.get(
|
|
836
|
+
"metadata",
|
|
837
|
+
'{"version": 0, "archive_format": "tgz", "mfcontent_version": 0}',
|
|
838
|
+
)
|
|
839
|
+
|
|
840
|
+
self._backend = MetaflowPackage.get_backend(self._code_metadata)
|
|
819
841
|
|
|
820
842
|
if filecache is None:
|
|
821
843
|
filecache = FileCache()
|
|
822
844
|
_, blobdata = filecache.get_data(
|
|
823
845
|
self._ds_type, self._flow_name, self._path, self._sha
|
|
824
846
|
)
|
|
825
|
-
|
|
826
|
-
self.
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
self.
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
847
|
+
self._code_obj = BytesIO(blobdata)
|
|
848
|
+
self._info = MetaflowPackage.cls_get_info(self._code_metadata, self._code_obj)
|
|
849
|
+
self._code_obj.seek(0)
|
|
850
|
+
if self._info:
|
|
851
|
+
self._flowspec = MetaflowPackage.cls_get_content(
|
|
852
|
+
self._code_metadata, self._code_obj, self._info["script"]
|
|
853
|
+
)
|
|
854
|
+
self._code_obj.seek(0)
|
|
855
|
+
else:
|
|
856
|
+
raise MetaflowInternalError("Code package metadata is invalid.")
|
|
857
|
+
self._tarball = None
|
|
833
858
|
|
|
834
859
|
@property
|
|
835
860
|
def path(self) -> str:
|
|
@@ -877,7 +902,14 @@ class MetaflowCode(object):
|
|
|
877
902
|
TarFile
|
|
878
903
|
TarFile for everything in this code package
|
|
879
904
|
"""
|
|
880
|
-
return
|
|
905
|
+
# We only return one tarball because the different TarFile objects share
|
|
906
|
+
# a common bytes buffer (self._code_obj).
|
|
907
|
+
if self._tarball is not None:
|
|
908
|
+
return self._tarball
|
|
909
|
+
if self._backend.type == "tgz":
|
|
910
|
+
self._tarball = self._backend.cls_open(self._code_obj)
|
|
911
|
+
return self._tarball
|
|
912
|
+
raise RuntimeError("Archive is not a tarball")
|
|
881
913
|
|
|
882
914
|
def extract(self) -> TemporaryDirectory:
|
|
883
915
|
"""
|
|
@@ -908,27 +940,15 @@ class MetaflowCode(object):
|
|
|
908
940
|
The directory and its contents are automatically deleted when
|
|
909
941
|
this object is garbage collected.
|
|
910
942
|
"""
|
|
911
|
-
exclusions = [
|
|
912
|
-
"metaflow/",
|
|
913
|
-
"metaflow_extensions/",
|
|
914
|
-
"INFO",
|
|
915
|
-
"CONFIG_PARAMETERS",
|
|
916
|
-
"conda.manifest",
|
|
917
|
-
# This file is created when using the conda/pypi features available in
|
|
918
|
-
# nflx-metaflow-extensions: https://github.com/Netflix/metaflow-nflx-extensions
|
|
919
|
-
"condav2-1.cnd",
|
|
920
|
-
]
|
|
921
|
-
members = [
|
|
922
|
-
m
|
|
923
|
-
for m in self.tarball.getmembers()
|
|
924
|
-
if not any(
|
|
925
|
-
(x.endswith("/") and m.name.startswith(x)) or (m.name == x)
|
|
926
|
-
for x in exclusions
|
|
927
|
-
)
|
|
928
|
-
]
|
|
929
|
-
|
|
930
943
|
tmp = TemporaryDirectory()
|
|
931
|
-
|
|
944
|
+
# We save the position we are in _code_obj -- in case tarball is using it at
|
|
945
|
+
# the same time -- so we can reset it to not perturb tarball.
|
|
946
|
+
pos = self._code_obj.tell()
|
|
947
|
+
self._code_obj.seek(0)
|
|
948
|
+
MetaflowPackage.cls_extract_into(
|
|
949
|
+
self._code_metadata, self._code_obj, tmp.name, ContentType.USER_CONTENT
|
|
950
|
+
)
|
|
951
|
+
self._code_obj.seek(pos)
|
|
932
952
|
return tmp
|
|
933
953
|
|
|
934
954
|
@property
|
|
@@ -1184,149 +1204,197 @@ class Task(MetaflowObject):
|
|
|
1184
1204
|
_PARENT_CLASS = "step"
|
|
1185
1205
|
_CHILD_CLASS = "artifact"
|
|
1186
1206
|
|
|
1187
|
-
def __init__(self, *args, **kwargs):
|
|
1188
|
-
super(Task, self).__init__(*args, **kwargs)
|
|
1189
|
-
|
|
1190
1207
|
def _iter_filter(self, x):
|
|
1191
1208
|
# exclude private data artifacts
|
|
1192
1209
|
return x.id[0] != "_"
|
|
1193
1210
|
|
|
1194
|
-
def
|
|
1211
|
+
def _get_matching_pathspecs(self, steps, metadata_key, metadata_pattern):
|
|
1195
1212
|
"""
|
|
1196
|
-
Yield tasks from specified steps
|
|
1213
|
+
Yield pathspecs of tasks from specified steps that match a given metadata pattern.
|
|
1197
1214
|
|
|
1198
1215
|
Parameters
|
|
1199
1216
|
----------
|
|
1200
1217
|
steps : List[str]
|
|
1201
|
-
List of
|
|
1202
|
-
|
|
1203
|
-
|
|
1218
|
+
List of Step objects to search for tasks.
|
|
1219
|
+
metadata_key : str
|
|
1220
|
+
Metadata key to filter tasks on (e.g., 'foreach-execution-path').
|
|
1221
|
+
metadata_pattern : str
|
|
1222
|
+
Regular expression pattern to match against the metadata value.
|
|
1204
1223
|
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1224
|
+
Yields
|
|
1225
|
+
------
|
|
1226
|
+
str
|
|
1227
|
+
Pathspec of each task whose metadata value for the specified key matches the pattern.
|
|
1209
1228
|
"""
|
|
1210
1229
|
flow_id, run_id, _, _ = self.path_components
|
|
1211
|
-
|
|
1212
1230
|
for step in steps:
|
|
1213
1231
|
task_pathspecs = self._metaflow.metadata.filter_tasks_by_metadata(
|
|
1214
|
-
flow_id, run_id, step
|
|
1232
|
+
flow_id, run_id, step, metadata_key, metadata_pattern
|
|
1215
1233
|
)
|
|
1216
1234
|
for task_pathspec in task_pathspecs:
|
|
1217
|
-
yield
|
|
1235
|
+
yield task_pathspec
|
|
1236
|
+
|
|
1237
|
+
@staticmethod
|
|
1238
|
+
def _get_previous_steps(graph_info, step_name):
|
|
1239
|
+
# Get the parent steps
|
|
1240
|
+
steps = []
|
|
1241
|
+
for node_name, attributes in graph_info["steps"].items():
|
|
1242
|
+
if step_name in attributes["next"]:
|
|
1243
|
+
steps.append(node_name)
|
|
1244
|
+
return steps
|
|
1218
1245
|
|
|
1219
1246
|
@property
|
|
1220
|
-
def
|
|
1247
|
+
def parent_task_pathspecs(self) -> Iterator[str]:
|
|
1221
1248
|
"""
|
|
1222
|
-
Yields all parent tasks of the current task
|
|
1249
|
+
Yields pathspecs of all parent tasks of the current task.
|
|
1223
1250
|
|
|
1224
1251
|
Yields
|
|
1225
1252
|
------
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1253
|
+
str
|
|
1254
|
+
Pathspec of the parent task of the current task
|
|
1229
1255
|
"""
|
|
1230
|
-
|
|
1256
|
+
_, _, step_name, _ = self.path_components
|
|
1257
|
+
metadata_dict = self.metadata_dict
|
|
1258
|
+
graph_info = self["_graph_info"].data
|
|
1231
1259
|
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
current_path =
|
|
1260
|
+
# Get the parent steps
|
|
1261
|
+
steps = self._get_previous_steps(graph_info, step_name)
|
|
1262
|
+
node_type = graph_info["steps"][step_name]["type"]
|
|
1263
|
+
metadata_key = "foreach-execution-path"
|
|
1264
|
+
current_path = metadata_dict.get(metadata_key)
|
|
1237
1265
|
|
|
1238
1266
|
if len(steps) > 1:
|
|
1239
1267
|
# Static join - use exact path matching
|
|
1240
1268
|
pattern = current_path or ".*"
|
|
1241
|
-
yield from self._iter_matching_tasks(
|
|
1242
|
-
steps, "foreach-execution-path", pattern
|
|
1243
|
-
)
|
|
1244
|
-
return
|
|
1245
|
-
|
|
1246
|
-
# Handle single step case
|
|
1247
|
-
target_task = Step(
|
|
1248
|
-
f"{flow_id}/{run_id}/{steps[0].id}", _namespace_check=False
|
|
1249
|
-
).task
|
|
1250
|
-
target_path = target_task.metadata_dict.get("foreach-execution-path")
|
|
1251
|
-
|
|
1252
|
-
if not target_path or not current_path:
|
|
1253
|
-
# (Current task, "A:10") and (Parent task, "")
|
|
1254
|
-
# Pattern: ".*"
|
|
1255
|
-
pattern = ".*"
|
|
1256
1269
|
else:
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
if current_depth < target_depth:
|
|
1261
|
-
# Foreach join
|
|
1262
|
-
# (Current task, "A:10,B:13") and (Parent task, "A:10,B:13,C:21")
|
|
1263
|
-
# Pattern: "A:10,B:13,.*"
|
|
1264
|
-
pattern = f"{current_path},.*"
|
|
1265
|
-
else:
|
|
1266
|
-
# Foreach split or linear step
|
|
1267
|
-
# Option 1:
|
|
1268
|
-
# (Current task, "A:10,B:13,C:21") and (Parent task, "A:10,B:13")
|
|
1269
|
-
# Option 2:
|
|
1270
|
-
# (Current task, "A:10,B:13") and (Parent task, "A:10,B:13")
|
|
1271
|
-
# Pattern: "A:10,B:13"
|
|
1272
|
-
pattern = ",".join(current_path.split(",")[:target_depth])
|
|
1270
|
+
if not steps:
|
|
1271
|
+
return # No parent steps, yield nothing
|
|
1273
1272
|
|
|
1274
|
-
|
|
1273
|
+
if not current_path:
|
|
1274
|
+
# Current task is not part of a foreach
|
|
1275
|
+
# Pattern: ".*"
|
|
1276
|
+
pattern = ".*"
|
|
1277
|
+
else:
|
|
1278
|
+
current_depth = len(current_path.split(","))
|
|
1279
|
+
if node_type == "join":
|
|
1280
|
+
# Foreach join
|
|
1281
|
+
# (Current task, "A:10,B:13") and (Parent task, "A:10,B:13,C:21")
|
|
1282
|
+
# Pattern: "A:10,B:13,.*"
|
|
1283
|
+
pattern = f"{current_path},.*"
|
|
1284
|
+
else:
|
|
1285
|
+
# Foreach split or linear step
|
|
1286
|
+
# Pattern: "A:10,B:13"
|
|
1287
|
+
parent_step_type = graph_info["steps"][steps[0]]["type"]
|
|
1288
|
+
target_depth = current_depth
|
|
1289
|
+
if (
|
|
1290
|
+
parent_step_type == "split-foreach"
|
|
1291
|
+
or parent_step_type == "split-parallel"
|
|
1292
|
+
) and current_depth == 1:
|
|
1293
|
+
# (Current task, "A:10") and (Parent task, "")
|
|
1294
|
+
pattern = ".*"
|
|
1295
|
+
else:
|
|
1296
|
+
# (Current task, "A:10,B:13,C:21") and (Parent task, "A:10,B:13")
|
|
1297
|
+
# (Current task, "A:10,B:13") and (Parent task, "A:10,B:13")
|
|
1298
|
+
if (
|
|
1299
|
+
parent_step_type == "split-foreach"
|
|
1300
|
+
or parent_step_type == "split-parallel"
|
|
1301
|
+
):
|
|
1302
|
+
target_depth = current_depth - 1
|
|
1303
|
+
pattern = ",".join(current_path.split(",")[:target_depth])
|
|
1304
|
+
|
|
1305
|
+
for pathspec in self._get_matching_pathspecs(steps, metadata_key, pattern):
|
|
1306
|
+
yield pathspec
|
|
1275
1307
|
|
|
1276
1308
|
@property
|
|
1277
|
-
def
|
|
1309
|
+
def child_task_pathspecs(self) -> Iterator[str]:
|
|
1278
1310
|
"""
|
|
1279
|
-
|
|
1311
|
+
Yields pathspecs of all child tasks of the current task.
|
|
1280
1312
|
|
|
1281
1313
|
Yields
|
|
1282
1314
|
------
|
|
1283
|
-
|
|
1284
|
-
|
|
1315
|
+
str
|
|
1316
|
+
Pathspec of the child task of the current task
|
|
1285
1317
|
"""
|
|
1286
|
-
flow_id, run_id,
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
return []
|
|
1318
|
+
flow_id, run_id, step_name, _ = self.path_components
|
|
1319
|
+
metadata_dict = self.metadata_dict
|
|
1320
|
+
graph_info = self["_graph_info"].data
|
|
1290
1321
|
|
|
1291
|
-
|
|
1322
|
+
# Get the child steps
|
|
1323
|
+
steps = graph_info["steps"][step_name]["next"]
|
|
1324
|
+
|
|
1325
|
+
node_type = graph_info["steps"][step_name]["type"]
|
|
1326
|
+
metadata_key = "foreach-execution-path"
|
|
1327
|
+
current_path = metadata_dict.get(metadata_key)
|
|
1292
1328
|
|
|
1293
1329
|
if len(steps) > 1:
|
|
1294
1330
|
# Static split - use exact path matching
|
|
1295
1331
|
pattern = current_path or ".*"
|
|
1296
|
-
yield from self._iter_matching_tasks(
|
|
1297
|
-
steps, "foreach-execution-path", pattern
|
|
1298
|
-
)
|
|
1299
|
-
return
|
|
1300
|
-
|
|
1301
|
-
# Handle single step case
|
|
1302
|
-
target_task = Step(
|
|
1303
|
-
f"{flow_id}/{run_id}/{steps[0].id}", _namespace_check=False
|
|
1304
|
-
).task
|
|
1305
|
-
target_path = target_task.metadata_dict.get("foreach-execution-path")
|
|
1306
|
-
|
|
1307
|
-
if not target_path or not current_path:
|
|
1308
|
-
# (Current task, "A:10") and (Child task, "")
|
|
1309
|
-
# Pattern: ".*"
|
|
1310
|
-
pattern = ".*"
|
|
1311
1332
|
else:
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
if
|
|
1316
|
-
#
|
|
1317
|
-
#
|
|
1318
|
-
|
|
1319
|
-
pattern = f"{current_path},.*"
|
|
1333
|
+
if not steps:
|
|
1334
|
+
return # No child steps, yield nothing
|
|
1335
|
+
|
|
1336
|
+
if not current_path:
|
|
1337
|
+
# Current task is not part of a foreach
|
|
1338
|
+
# Pattern: ".*"
|
|
1339
|
+
pattern = ".*"
|
|
1320
1340
|
else:
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1341
|
+
current_depth = len(current_path.split(","))
|
|
1342
|
+
if node_type == "split-foreach" or node_type == "split-parallel":
|
|
1343
|
+
# Foreach split
|
|
1344
|
+
# (Current task, "A:10,B:13") and (Child task, "A:10,B:13,C:21")
|
|
1345
|
+
# Pattern: "A:10,B:13,.*"
|
|
1346
|
+
pattern = f"{current_path},.*"
|
|
1347
|
+
else:
|
|
1348
|
+
# Foreach join or linear step
|
|
1349
|
+
# Pattern: "A:10,B:13"
|
|
1350
|
+
child_step_type = graph_info["steps"][steps[0]]["type"]
|
|
1351
|
+
|
|
1352
|
+
# We need to know if the child step is a foreach join or a static join
|
|
1353
|
+
child_step_prev_steps = self._get_previous_steps(
|
|
1354
|
+
graph_info, steps[0]
|
|
1355
|
+
)
|
|
1356
|
+
if len(child_step_prev_steps) > 1:
|
|
1357
|
+
child_step_type = "static-join"
|
|
1358
|
+
target_depth = current_depth
|
|
1359
|
+
if child_step_type == "join" and current_depth == 1:
|
|
1360
|
+
# (Current task, "A:10") and (Child task, "")
|
|
1361
|
+
pattern = ".*"
|
|
1362
|
+
else:
|
|
1363
|
+
# (Current task, "A:10,B:13,C:21") and (Child task, "A:10,B:13")
|
|
1364
|
+
# (Current task, "A:10,B:13") and (Child task, "A:10,B:13")
|
|
1365
|
+
if child_step_type == "join":
|
|
1366
|
+
target_depth = current_depth - 1
|
|
1367
|
+
pattern = ",".join(current_path.split(",")[:target_depth])
|
|
1368
|
+
|
|
1369
|
+
for pathspec in self._get_matching_pathspecs(steps, metadata_key, pattern):
|
|
1370
|
+
yield pathspec
|
|
1371
|
+
|
|
1372
|
+
@property
|
|
1373
|
+
def parent_tasks(self) -> Iterator["Task"]:
|
|
1374
|
+
"""
|
|
1375
|
+
Yields all parent tasks of the current task if one exists.
|
|
1376
|
+
|
|
1377
|
+
Yields
|
|
1378
|
+
------
|
|
1379
|
+
Task
|
|
1380
|
+
Parent task of the current task
|
|
1381
|
+
"""
|
|
1382
|
+
parent_task_pathspecs = self.parent_task_pathspecs
|
|
1383
|
+
for pathspec in parent_task_pathspecs:
|
|
1384
|
+
yield Task(pathspec=pathspec, _namespace_check=False)
|
|
1385
|
+
|
|
1386
|
+
@property
|
|
1387
|
+
def child_tasks(self) -> Iterator["Task"]:
|
|
1388
|
+
"""
|
|
1389
|
+
Yields all child tasks of the current task if one exists.
|
|
1390
|
+
|
|
1391
|
+
Yields
|
|
1392
|
+
------
|
|
1393
|
+
Task
|
|
1394
|
+
Child task of the current task
|
|
1395
|
+
"""
|
|
1396
|
+
for pathspec in self.child_task_pathspecs:
|
|
1397
|
+
yield Task(pathspec=pathspec, _namespace_check=False)
|
|
1330
1398
|
|
|
1331
1399
|
@property
|
|
1332
1400
|
def metadata(self) -> List[Metadata]:
|
metaflow/client/filecache.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import print_function
|
|
2
2
|
from collections import OrderedDict
|
|
3
|
+
import json
|
|
3
4
|
import os
|
|
4
5
|
import sys
|
|
5
6
|
import time
|
|
@@ -10,13 +11,14 @@ from urllib.parse import urlparse
|
|
|
10
11
|
|
|
11
12
|
from metaflow.datastore import FlowDataStore
|
|
12
13
|
from metaflow.datastore.content_addressed_store import BlobCache
|
|
14
|
+
from metaflow.datastore.flow_datastore import MetadataCache
|
|
13
15
|
from metaflow.exception import MetaflowException
|
|
14
16
|
from metaflow.metaflow_config import (
|
|
15
17
|
CLIENT_CACHE_PATH,
|
|
16
18
|
CLIENT_CACHE_MAX_SIZE,
|
|
17
19
|
CLIENT_CACHE_MAX_FLOWDATASTORE_COUNT,
|
|
18
|
-
CLIENT_CACHE_MAX_TASKDATASTORE_COUNT,
|
|
19
20
|
)
|
|
21
|
+
from metaflow.metaflow_profile import from_start
|
|
20
22
|
|
|
21
23
|
from metaflow.plugins import DATASTORES
|
|
22
24
|
|
|
@@ -63,8 +65,8 @@ class FileCache(object):
|
|
|
63
65
|
# when querying for sizes of artifacts. Once we have queried for the size
|
|
64
66
|
# of one artifact in a TaskDatastore, caching this means that any
|
|
65
67
|
# queries on that same TaskDatastore will be quick (since we already
|
|
66
|
-
# have all the metadata)
|
|
67
|
-
|
|
68
|
+
# have all the metadata). We keep track of this in a file so it persists
|
|
69
|
+
# across processes.
|
|
68
70
|
|
|
69
71
|
@property
|
|
70
72
|
def cache_dir(self):
|
|
@@ -87,7 +89,7 @@ class FileCache(object):
|
|
|
87
89
|
):
|
|
88
90
|
ds_cls = self._get_datastore_storage_impl(ds_type)
|
|
89
91
|
ds_root = ds_cls.path_join(*ds_cls.path_split(location)[:-5])
|
|
90
|
-
cache_id = self.
|
|
92
|
+
cache_id = self.flow_ds_id(ds_type, ds_root, flow_name)
|
|
91
93
|
|
|
92
94
|
token = (
|
|
93
95
|
"%s.cached"
|
|
@@ -311,13 +313,13 @@ class FileCache(object):
|
|
|
311
313
|
self._objects = sorted(objects, reverse=False)
|
|
312
314
|
|
|
313
315
|
@staticmethod
|
|
314
|
-
def
|
|
316
|
+
def flow_ds_id(ds_type, ds_root, flow_name):
|
|
315
317
|
p = urlparse(ds_root)
|
|
316
318
|
sanitized_root = (p.netloc + p.path).replace("/", "_")
|
|
317
319
|
return ".".join([ds_type, sanitized_root, flow_name])
|
|
318
320
|
|
|
319
321
|
@staticmethod
|
|
320
|
-
def
|
|
322
|
+
def task_ds_id(ds_type, ds_root, flow_name, run_id, step_name, task_id, attempt):
|
|
321
323
|
p = urlparse(ds_root)
|
|
322
324
|
sanitized_root = (p.netloc + p.path).replace("/", "_")
|
|
323
325
|
return ".".join(
|
|
@@ -365,7 +367,7 @@ class FileCache(object):
|
|
|
365
367
|
return storage_impl[0]
|
|
366
368
|
|
|
367
369
|
def _get_flow_datastore(self, ds_type, ds_root, flow_name):
|
|
368
|
-
cache_id = self.
|
|
370
|
+
cache_id = self.flow_ds_id(ds_type, ds_root, flow_name)
|
|
369
371
|
cached_flow_datastore = self._store_caches.get(cache_id)
|
|
370
372
|
|
|
371
373
|
if cached_flow_datastore:
|
|
@@ -380,9 +382,14 @@ class FileCache(object):
|
|
|
380
382
|
ds_root=ds_root,
|
|
381
383
|
)
|
|
382
384
|
blob_cache = self._blob_caches.setdefault(
|
|
383
|
-
cache_id,
|
|
385
|
+
cache_id,
|
|
386
|
+
(
|
|
387
|
+
FileBlobCache(self, cache_id),
|
|
388
|
+
TaskMetadataCache(self, ds_type, ds_root, flow_name),
|
|
389
|
+
),
|
|
384
390
|
)
|
|
385
|
-
cached_flow_datastore.ca_store.set_blob_cache(blob_cache)
|
|
391
|
+
cached_flow_datastore.ca_store.set_blob_cache(blob_cache[0])
|
|
392
|
+
cached_flow_datastore.set_metadata_cache(blob_cache[1])
|
|
386
393
|
self._store_caches[cache_id] = cached_flow_datastore
|
|
387
394
|
if len(self._store_caches) > CLIENT_CACHE_MAX_FLOWDATASTORE_COUNT:
|
|
388
395
|
cache_id_to_remove, _ = self._store_caches.popitem(last=False)
|
|
@@ -393,32 +400,52 @@ class FileCache(object):
|
|
|
393
400
|
self, ds_type, ds_root, flow_name, run_id, step_name, task_id, attempt
|
|
394
401
|
):
|
|
395
402
|
flow_ds = self._get_flow_datastore(ds_type, ds_root, flow_name)
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
403
|
+
|
|
404
|
+
return flow_ds.get_task_datastore(run_id, step_name, task_id, attempt=attempt)
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
class TaskMetadataCache(MetadataCache):
|
|
408
|
+
def __init__(self, filecache, ds_type, ds_root, flow_name):
|
|
409
|
+
self._filecache = filecache
|
|
410
|
+
self._ds_type = ds_type
|
|
411
|
+
self._ds_root = ds_root
|
|
412
|
+
self._flow_name = flow_name
|
|
413
|
+
|
|
414
|
+
def _path(self, run_id, step_name, task_id, attempt):
|
|
415
|
+
if attempt is None:
|
|
416
|
+
raise MetaflowException(
|
|
417
|
+
"Attempt number must be specified to use task metadata cache. Raise an issue "
|
|
418
|
+
"on Metaflow GitHub if you see this message.",
|
|
400
419
|
)
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
data_metadata=cached_metadata,
|
|
410
|
-
)
|
|
411
|
-
# If we are here, we either have attempt=None or nothing in the cache
|
|
412
|
-
task_ds = flow_ds.get_task_datastore(
|
|
413
|
-
run_id, step_name, task_id, attempt=attempt
|
|
420
|
+
cache_id = self._filecache.task_ds_id(
|
|
421
|
+
self._ds_type,
|
|
422
|
+
self._ds_root,
|
|
423
|
+
self._flow_name,
|
|
424
|
+
run_id,
|
|
425
|
+
step_name,
|
|
426
|
+
task_id,
|
|
427
|
+
attempt,
|
|
414
428
|
)
|
|
415
|
-
|
|
416
|
-
|
|
429
|
+
token = (
|
|
430
|
+
"%s.cached"
|
|
431
|
+
% sha1(
|
|
432
|
+
os.path.join(
|
|
433
|
+
run_id, step_name, task_id, str(attempt), "metadata"
|
|
434
|
+
).encode("utf-8")
|
|
435
|
+
).hexdigest()
|
|
436
|
+
)
|
|
437
|
+
return os.path.join(self._filecache.cache_dir, cache_id, token[:2], token)
|
|
438
|
+
|
|
439
|
+
def load_metadata(self, run_id, step_name, task_id, attempt):
|
|
440
|
+
d = self._filecache.read_file(self._path(run_id, step_name, task_id, attempt))
|
|
441
|
+
if d:
|
|
442
|
+
return json.loads(d)
|
|
443
|
+
|
|
444
|
+
def store_metadata(self, run_id, step_name, task_id, attempt, metadata_dict):
|
|
445
|
+
self._filecache.create_file(
|
|
446
|
+
self._path(run_id, step_name, task_id, attempt),
|
|
447
|
+
json.dumps(metadata_dict).encode("utf-8"),
|
|
417
448
|
)
|
|
418
|
-
self._task_metadata_caches[cache_id] = task_ds.ds_metadata
|
|
419
|
-
if len(self._task_metadata_caches) > CLIENT_CACHE_MAX_TASKDATASTORE_COUNT:
|
|
420
|
-
self._task_metadata_caches.popitem(last=False)
|
|
421
|
-
return task_ds
|
|
422
449
|
|
|
423
450
|
|
|
424
451
|
class FileBlobCache(BlobCache):
|
metaflow/cmd/code/__init__.py
CHANGED
|
@@ -6,6 +6,7 @@ from tempfile import TemporaryDirectory
|
|
|
6
6
|
from typing import Any, Callable, List, Mapping, Optional, cast
|
|
7
7
|
|
|
8
8
|
from metaflow import Run
|
|
9
|
+
from metaflow.util import walk_without_cycles
|
|
9
10
|
from metaflow._vendor import click
|
|
10
11
|
from metaflow.cli import echo_always
|
|
11
12
|
|
|
@@ -51,7 +52,7 @@ def perform_diff(
|
|
|
51
52
|
target_dir = os.getcwd()
|
|
52
53
|
|
|
53
54
|
diffs = []
|
|
54
|
-
for dirpath,
|
|
55
|
+
for dirpath, _, filenames in walk_without_cycles(source_dir):
|
|
55
56
|
for fname in filenames:
|
|
56
57
|
# NOTE: the paths below need to be set up carefully
|
|
57
58
|
# for the `patch` command to work. Better not to touch
|