ob-metaflow 2.11.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/R.py +10 -7
- metaflow/__init__.py +40 -25
- metaflow/_vendor/imghdr/__init__.py +186 -0
- metaflow/_vendor/importlib_metadata/__init__.py +1063 -0
- metaflow/_vendor/importlib_metadata/_adapters.py +68 -0
- metaflow/_vendor/importlib_metadata/_collections.py +30 -0
- metaflow/_vendor/importlib_metadata/_compat.py +71 -0
- metaflow/_vendor/importlib_metadata/_functools.py +104 -0
- metaflow/_vendor/importlib_metadata/_itertools.py +73 -0
- metaflow/_vendor/importlib_metadata/_meta.py +48 -0
- metaflow/_vendor/importlib_metadata/_text.py +99 -0
- metaflow/_vendor/importlib_metadata/py.typed +0 -0
- metaflow/_vendor/typeguard/__init__.py +48 -0
- metaflow/_vendor/typeguard/_checkers.py +1070 -0
- metaflow/_vendor/typeguard/_config.py +108 -0
- metaflow/_vendor/typeguard/_decorators.py +233 -0
- metaflow/_vendor/typeguard/_exceptions.py +42 -0
- metaflow/_vendor/typeguard/_functions.py +308 -0
- metaflow/_vendor/typeguard/_importhook.py +213 -0
- metaflow/_vendor/typeguard/_memo.py +48 -0
- metaflow/_vendor/typeguard/_pytest_plugin.py +127 -0
- metaflow/_vendor/typeguard/_suppression.py +86 -0
- metaflow/_vendor/typeguard/_transformer.py +1229 -0
- metaflow/_vendor/typeguard/_union_transformer.py +55 -0
- metaflow/_vendor/typeguard/_utils.py +173 -0
- metaflow/_vendor/typeguard/py.typed +0 -0
- metaflow/_vendor/typing_extensions.py +3641 -0
- metaflow/_vendor/v3_7/importlib_metadata/__init__.py +1063 -0
- metaflow/_vendor/v3_7/importlib_metadata/_adapters.py +68 -0
- metaflow/_vendor/v3_7/importlib_metadata/_collections.py +30 -0
- metaflow/_vendor/v3_7/importlib_metadata/_compat.py +71 -0
- metaflow/_vendor/v3_7/importlib_metadata/_functools.py +104 -0
- metaflow/_vendor/v3_7/importlib_metadata/_itertools.py +73 -0
- metaflow/_vendor/v3_7/importlib_metadata/_meta.py +48 -0
- metaflow/_vendor/v3_7/importlib_metadata/_text.py +99 -0
- metaflow/_vendor/v3_7/importlib_metadata/py.typed +0 -0
- metaflow/_vendor/v3_7/typeguard/__init__.py +48 -0
- metaflow/_vendor/v3_7/typeguard/_checkers.py +906 -0
- metaflow/_vendor/v3_7/typeguard/_config.py +108 -0
- metaflow/_vendor/v3_7/typeguard/_decorators.py +237 -0
- metaflow/_vendor/v3_7/typeguard/_exceptions.py +42 -0
- metaflow/_vendor/v3_7/typeguard/_functions.py +310 -0
- metaflow/_vendor/v3_7/typeguard/_importhook.py +213 -0
- metaflow/_vendor/v3_7/typeguard/_memo.py +48 -0
- metaflow/_vendor/v3_7/typeguard/_pytest_plugin.py +100 -0
- metaflow/_vendor/v3_7/typeguard/_suppression.py +88 -0
- metaflow/_vendor/v3_7/typeguard/_transformer.py +1207 -0
- metaflow/_vendor/v3_7/typeguard/_union_transformer.py +54 -0
- metaflow/_vendor/v3_7/typeguard/_utils.py +169 -0
- metaflow/_vendor/v3_7/typeguard/py.typed +0 -0
- metaflow/_vendor/v3_7/typing_extensions.py +3072 -0
- metaflow/_vendor/yaml/__init__.py +427 -0
- metaflow/_vendor/yaml/composer.py +139 -0
- metaflow/_vendor/yaml/constructor.py +748 -0
- metaflow/_vendor/yaml/cyaml.py +101 -0
- metaflow/_vendor/yaml/dumper.py +62 -0
- metaflow/_vendor/yaml/emitter.py +1137 -0
- metaflow/_vendor/yaml/error.py +75 -0
- metaflow/_vendor/yaml/events.py +86 -0
- metaflow/_vendor/yaml/loader.py +63 -0
- metaflow/_vendor/yaml/nodes.py +49 -0
- metaflow/_vendor/yaml/parser.py +589 -0
- metaflow/_vendor/yaml/reader.py +185 -0
- metaflow/_vendor/yaml/representer.py +389 -0
- metaflow/_vendor/yaml/resolver.py +227 -0
- metaflow/_vendor/yaml/scanner.py +1435 -0
- metaflow/_vendor/yaml/serializer.py +111 -0
- metaflow/_vendor/yaml/tokens.py +104 -0
- metaflow/cards.py +5 -0
- metaflow/cli.py +331 -785
- metaflow/cli_args.py +17 -0
- metaflow/cli_components/__init__.py +0 -0
- metaflow/cli_components/dump_cmd.py +96 -0
- metaflow/cli_components/init_cmd.py +52 -0
- metaflow/cli_components/run_cmds.py +546 -0
- metaflow/cli_components/step_cmd.py +334 -0
- metaflow/cli_components/utils.py +140 -0
- metaflow/client/__init__.py +1 -0
- metaflow/client/core.py +467 -73
- metaflow/client/filecache.py +75 -35
- metaflow/clone_util.py +7 -1
- metaflow/cmd/code/__init__.py +231 -0
- metaflow/cmd/develop/stub_generator.py +756 -288
- metaflow/cmd/develop/stubs.py +12 -28
- metaflow/cmd/main_cli.py +6 -4
- metaflow/cmd/make_wrapper.py +78 -0
- metaflow/datastore/__init__.py +1 -0
- metaflow/datastore/content_addressed_store.py +41 -10
- metaflow/datastore/datastore_set.py +11 -2
- metaflow/datastore/flow_datastore.py +156 -10
- metaflow/datastore/spin_datastore.py +91 -0
- metaflow/datastore/task_datastore.py +154 -39
- metaflow/debug.py +5 -0
- metaflow/decorators.py +404 -78
- metaflow/exception.py +8 -2
- metaflow/extension_support/__init__.py +527 -376
- metaflow/extension_support/_empty_file.py +2 -2
- metaflow/extension_support/plugins.py +49 -31
- metaflow/flowspec.py +482 -33
- metaflow/graph.py +210 -42
- metaflow/includefile.py +84 -40
- metaflow/lint.py +141 -22
- metaflow/meta_files.py +13 -0
- metaflow/{metadata → metadata_provider}/heartbeat.py +24 -8
- metaflow/{metadata → metadata_provider}/metadata.py +86 -1
- metaflow/metaflow_config.py +175 -28
- metaflow/metaflow_config_funcs.py +51 -3
- metaflow/metaflow_current.py +4 -10
- metaflow/metaflow_environment.py +139 -53
- metaflow/metaflow_git.py +115 -0
- metaflow/metaflow_profile.py +18 -0
- metaflow/metaflow_version.py +150 -66
- metaflow/mflog/__init__.py +4 -3
- metaflow/mflog/save_logs.py +2 -2
- metaflow/multicore_utils.py +31 -14
- metaflow/package/__init__.py +673 -0
- metaflow/packaging_sys/__init__.py +880 -0
- metaflow/packaging_sys/backend.py +128 -0
- metaflow/packaging_sys/distribution_support.py +153 -0
- metaflow/packaging_sys/tar_backend.py +99 -0
- metaflow/packaging_sys/utils.py +54 -0
- metaflow/packaging_sys/v1.py +527 -0
- metaflow/parameters.py +149 -28
- metaflow/plugins/__init__.py +74 -5
- metaflow/plugins/airflow/airflow.py +40 -25
- metaflow/plugins/airflow/airflow_cli.py +22 -5
- metaflow/plugins/airflow/airflow_decorator.py +1 -1
- metaflow/plugins/airflow/airflow_utils.py +5 -3
- metaflow/plugins/airflow/sensors/base_sensor.py +4 -4
- metaflow/plugins/airflow/sensors/external_task_sensor.py +2 -2
- metaflow/plugins/airflow/sensors/s3_sensor.py +2 -2
- metaflow/plugins/argo/argo_client.py +78 -33
- metaflow/plugins/argo/argo_events.py +6 -6
- metaflow/plugins/argo/argo_workflows.py +2410 -527
- metaflow/plugins/argo/argo_workflows_cli.py +571 -121
- metaflow/plugins/argo/argo_workflows_decorator.py +43 -12
- metaflow/plugins/argo/argo_workflows_deployer.py +106 -0
- metaflow/plugins/argo/argo_workflows_deployer_objects.py +453 -0
- metaflow/plugins/argo/capture_error.py +73 -0
- metaflow/plugins/argo/conditional_input_paths.py +35 -0
- metaflow/plugins/argo/exit_hooks.py +209 -0
- metaflow/plugins/argo/jobset_input_paths.py +15 -0
- metaflow/plugins/argo/param_val.py +19 -0
- metaflow/plugins/aws/aws_client.py +10 -3
- metaflow/plugins/aws/aws_utils.py +55 -2
- metaflow/plugins/aws/batch/batch.py +72 -5
- metaflow/plugins/aws/batch/batch_cli.py +33 -10
- metaflow/plugins/aws/batch/batch_client.py +4 -3
- metaflow/plugins/aws/batch/batch_decorator.py +102 -35
- metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
- metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
- metaflow/plugins/aws/step_functions/production_token.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions.py +65 -8
- metaflow/plugins/aws/step_functions/step_functions_cli.py +101 -7
- metaflow/plugins/aws/step_functions/step_functions_decorator.py +1 -2
- metaflow/plugins/aws/step_functions/step_functions_deployer.py +97 -0
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +264 -0
- metaflow/plugins/azure/azure_exceptions.py +1 -1
- metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
- metaflow/plugins/azure/azure_tail.py +1 -1
- metaflow/plugins/azure/includefile_support.py +2 -0
- metaflow/plugins/cards/card_cli.py +66 -30
- metaflow/plugins/cards/card_creator.py +25 -1
- metaflow/plugins/cards/card_datastore.py +21 -49
- metaflow/plugins/cards/card_decorator.py +132 -8
- metaflow/plugins/cards/card_modules/basic.py +112 -17
- metaflow/plugins/cards/card_modules/bundle.css +1 -1
- metaflow/plugins/cards/card_modules/card.py +16 -1
- metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
- metaflow/plugins/cards/card_modules/components.py +665 -28
- metaflow/plugins/cards/card_modules/convert_to_native_type.py +36 -7
- metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
- metaflow/plugins/cards/card_modules/main.css +1 -0
- metaflow/plugins/cards/card_modules/main.js +68 -49
- metaflow/plugins/cards/card_modules/renderer_tools.py +1 -0
- metaflow/plugins/cards/card_modules/test_cards.py +26 -12
- metaflow/plugins/cards/card_server.py +39 -14
- metaflow/plugins/cards/component_serializer.py +2 -9
- metaflow/plugins/cards/metadata.py +22 -0
- metaflow/plugins/catch_decorator.py +9 -0
- metaflow/plugins/datastores/azure_storage.py +10 -1
- metaflow/plugins/datastores/gs_storage.py +6 -2
- metaflow/plugins/datastores/local_storage.py +12 -6
- metaflow/plugins/datastores/spin_storage.py +12 -0
- metaflow/plugins/datatools/local.py +2 -0
- metaflow/plugins/datatools/s3/s3.py +126 -75
- metaflow/plugins/datatools/s3/s3op.py +254 -121
- metaflow/plugins/env_escape/__init__.py +3 -3
- metaflow/plugins/env_escape/client_modules.py +102 -72
- metaflow/plugins/env_escape/server.py +7 -0
- metaflow/plugins/env_escape/stub.py +24 -5
- metaflow/plugins/events_decorator.py +343 -185
- metaflow/plugins/exit_hook/__init__.py +0 -0
- metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
- metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
- metaflow/plugins/gcp/__init__.py +1 -1
- metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +11 -6
- metaflow/plugins/gcp/gs_tail.py +10 -6
- metaflow/plugins/gcp/includefile_support.py +3 -0
- metaflow/plugins/kubernetes/kube_utils.py +108 -0
- metaflow/plugins/kubernetes/kubernetes.py +411 -130
- metaflow/plugins/kubernetes/kubernetes_cli.py +168 -36
- metaflow/plugins/kubernetes/kubernetes_client.py +104 -2
- metaflow/plugins/kubernetes/kubernetes_decorator.py +246 -88
- metaflow/plugins/kubernetes/kubernetes_job.py +253 -581
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +1071 -0
- metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
- metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
- metaflow/plugins/logs_cli.py +359 -0
- metaflow/plugins/{metadata → metadata_providers}/local.py +144 -84
- metaflow/plugins/{metadata → metadata_providers}/service.py +103 -26
- metaflow/plugins/metadata_providers/spin.py +16 -0
- metaflow/plugins/package_cli.py +36 -24
- metaflow/plugins/parallel_decorator.py +128 -11
- metaflow/plugins/parsers.py +16 -0
- metaflow/plugins/project_decorator.py +51 -5
- metaflow/plugins/pypi/bootstrap.py +357 -105
- metaflow/plugins/pypi/conda_decorator.py +82 -81
- metaflow/plugins/pypi/conda_environment.py +187 -52
- metaflow/plugins/pypi/micromamba.py +157 -47
- metaflow/plugins/pypi/parsers.py +268 -0
- metaflow/plugins/pypi/pip.py +88 -13
- metaflow/plugins/pypi/pypi_decorator.py +37 -1
- metaflow/plugins/pypi/utils.py +48 -2
- metaflow/plugins/resources_decorator.py +2 -2
- metaflow/plugins/secrets/__init__.py +3 -0
- metaflow/plugins/secrets/secrets_decorator.py +26 -181
- metaflow/plugins/secrets/secrets_func.py +49 -0
- metaflow/plugins/secrets/secrets_spec.py +101 -0
- metaflow/plugins/secrets/utils.py +74 -0
- metaflow/plugins/tag_cli.py +4 -7
- metaflow/plugins/test_unbounded_foreach_decorator.py +41 -6
- metaflow/plugins/timeout_decorator.py +3 -3
- metaflow/plugins/uv/__init__.py +0 -0
- metaflow/plugins/uv/bootstrap.py +128 -0
- metaflow/plugins/uv/uv_environment.py +72 -0
- metaflow/procpoll.py +1 -1
- metaflow/pylint_wrapper.py +5 -1
- metaflow/runner/__init__.py +0 -0
- metaflow/runner/click_api.py +717 -0
- metaflow/runner/deployer.py +470 -0
- metaflow/runner/deployer_impl.py +201 -0
- metaflow/runner/metaflow_runner.py +714 -0
- metaflow/runner/nbdeploy.py +132 -0
- metaflow/runner/nbrun.py +225 -0
- metaflow/runner/subprocess_manager.py +650 -0
- metaflow/runner/utils.py +335 -0
- metaflow/runtime.py +1078 -260
- metaflow/sidecar/sidecar_worker.py +1 -1
- metaflow/system/__init__.py +5 -0
- metaflow/system/system_logger.py +85 -0
- metaflow/system/system_monitor.py +108 -0
- metaflow/system/system_utils.py +19 -0
- metaflow/task.py +521 -225
- metaflow/tracing/__init__.py +7 -7
- metaflow/tracing/span_exporter.py +31 -38
- metaflow/tracing/tracing_modules.py +38 -43
- metaflow/tuple_util.py +27 -0
- metaflow/user_configs/__init__.py +0 -0
- metaflow/user_configs/config_options.py +563 -0
- metaflow/user_configs/config_parameters.py +598 -0
- metaflow/user_decorators/__init__.py +0 -0
- metaflow/user_decorators/common.py +144 -0
- metaflow/user_decorators/mutable_flow.py +512 -0
- metaflow/user_decorators/mutable_step.py +424 -0
- metaflow/user_decorators/user_flow_decorator.py +264 -0
- metaflow/user_decorators/user_step_decorator.py +749 -0
- metaflow/util.py +243 -27
- metaflow/vendor.py +23 -7
- metaflow/version.py +1 -1
- ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Makefile +355 -0
- ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Tiltfile +726 -0
- ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/pick_services.sh +105 -0
- ob_metaflow-2.19.7.1rc0.dist-info/METADATA +87 -0
- ob_metaflow-2.19.7.1rc0.dist-info/RECORD +445 -0
- {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
- {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +1 -0
- metaflow/_vendor/v3_5/__init__.py +0 -1
- metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
- metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
- metaflow/package.py +0 -188
- ob_metaflow-2.11.13.1.dist-info/METADATA +0 -85
- ob_metaflow-2.11.13.1.dist-info/RECORD +0 -308
- /metaflow/_vendor/{v3_5/zipp.py → zipp.py} +0 -0
- /metaflow/{metadata → metadata_provider}/__init__.py +0 -0
- /metaflow/{metadata → metadata_provider}/util.py +0 -0
- /metaflow/plugins/{metadata → metadata_providers}/__init__.py +0 -0
- {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info/licenses}/LICENSE +0 -0
- {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
|
@@ -6,11 +6,12 @@ import time
|
|
|
6
6
|
|
|
7
7
|
from functools import wraps
|
|
8
8
|
from io import BufferedIOBase, FileIO, RawIOBase
|
|
9
|
+
from typing import List, Optional
|
|
9
10
|
from types import MethodType, FunctionType
|
|
10
11
|
|
|
11
12
|
from .. import metaflow_config
|
|
12
13
|
from ..exception import MetaflowInternalError
|
|
13
|
-
from ..
|
|
14
|
+
from ..metadata_provider import DataArtifact, MetaDatum
|
|
14
15
|
from ..parameters import Parameter
|
|
15
16
|
from ..util import Path, is_stringish, to_fileobj
|
|
16
17
|
|
|
@@ -98,8 +99,8 @@ class TaskDataStore(object):
|
|
|
98
99
|
data_metadata=None,
|
|
99
100
|
mode="r",
|
|
100
101
|
allow_not_done=False,
|
|
102
|
+
persist=True,
|
|
101
103
|
):
|
|
102
|
-
|
|
103
104
|
self._storage_impl = flow_datastore._storage_impl
|
|
104
105
|
self.TYPE = self._storage_impl.TYPE
|
|
105
106
|
self._ca_store = flow_datastore.ca_store
|
|
@@ -114,11 +115,12 @@ class TaskDataStore(object):
|
|
|
114
115
|
self._attempt = attempt
|
|
115
116
|
self._metadata = flow_datastore.metadata
|
|
116
117
|
self._parent = flow_datastore
|
|
118
|
+
self._persist = persist
|
|
117
119
|
|
|
118
120
|
# The GZIP encodings are for backward compatibility
|
|
119
121
|
self._encodings = {"pickle-v2", "gzip+pickle-v2"}
|
|
120
122
|
ver = sys.version_info[0] * 10 + sys.version_info[1]
|
|
121
|
-
if ver >=
|
|
123
|
+
if ver >= 36:
|
|
122
124
|
self._encodings.add("pickle-v4")
|
|
123
125
|
self._encodings.add("gzip+pickle-v4")
|
|
124
126
|
|
|
@@ -149,6 +151,8 @@ class TaskDataStore(object):
|
|
|
149
151
|
)
|
|
150
152
|
if self.has_metadata(check_meta, add_attempt=False):
|
|
151
153
|
max_attempt = i
|
|
154
|
+
elif max_attempt is not None:
|
|
155
|
+
break
|
|
152
156
|
if self._attempt is None:
|
|
153
157
|
self._attempt = max_attempt
|
|
154
158
|
elif max_attempt is None or self._attempt > max_attempt:
|
|
@@ -173,6 +177,26 @@ class TaskDataStore(object):
|
|
|
173
177
|
if data_obj is not None:
|
|
174
178
|
self._objects = data_obj.get("objects", {})
|
|
175
179
|
self._info = data_obj.get("info", {})
|
|
180
|
+
elif self._mode == "d":
|
|
181
|
+
self._objects = {}
|
|
182
|
+
self._info = {}
|
|
183
|
+
|
|
184
|
+
if self._attempt is None:
|
|
185
|
+
for i in range(metaflow_config.MAX_ATTEMPTS):
|
|
186
|
+
check_meta = self._metadata_name_for_attempt(
|
|
187
|
+
self.METADATA_ATTEMPT_SUFFIX, i
|
|
188
|
+
)
|
|
189
|
+
if self.has_metadata(check_meta, add_attempt=False):
|
|
190
|
+
self._attempt = i
|
|
191
|
+
|
|
192
|
+
# Do not allow destructive operations on the datastore if attempt is still in flight
|
|
193
|
+
# and we explicitly did not allow operating on running tasks.
|
|
194
|
+
if not allow_not_done and not self.has_metadata(self.METADATA_DONE_SUFFIX):
|
|
195
|
+
raise DataException(
|
|
196
|
+
"No completed attempts of the task was found for task '%s'"
|
|
197
|
+
% self._path
|
|
198
|
+
)
|
|
199
|
+
|
|
176
200
|
else:
|
|
177
201
|
raise DataException("Unknown datastore mode: '%s'" % self._mode)
|
|
178
202
|
|
|
@@ -203,6 +227,9 @@ class TaskDataStore(object):
|
|
|
203
227
|
@property
|
|
204
228
|
def pathspec_index(self):
|
|
205
229
|
idxstr = ",".join(map(str, (f.index for f in self["_foreach_stack"])))
|
|
230
|
+
if "_iteration_stack" in self:
|
|
231
|
+
itrstr = ",".join(map(str, (f for f in self["_iteration_stack"])))
|
|
232
|
+
return "%s/%s[%s][%s]" % (self._run_id, self._step_name, idxstr, itrstr)
|
|
206
233
|
return "%s/%s[%s]" % (self._run_id, self._step_name, idxstr)
|
|
207
234
|
|
|
208
235
|
@property
|
|
@@ -233,7 +260,73 @@ class TaskDataStore(object):
|
|
|
233
260
|
|
|
234
261
|
@only_if_not_done
|
|
235
262
|
@require_mode("w")
|
|
236
|
-
def
|
|
263
|
+
def transfer_artifacts(
|
|
264
|
+
self, other_datastore: "TaskDataStore", names: Optional[List[str]] = None
|
|
265
|
+
):
|
|
266
|
+
"""
|
|
267
|
+
Copies the blobs from other_datastore to this datastore if the datastore roots
|
|
268
|
+
are different.
|
|
269
|
+
|
|
270
|
+
This is used specifically for spin so we can bring in artifacts from the original
|
|
271
|
+
datastore.
|
|
272
|
+
|
|
273
|
+
Parameters
|
|
274
|
+
----------
|
|
275
|
+
other_datastore : TaskDataStore
|
|
276
|
+
Other datastore from which to copy artifacts from
|
|
277
|
+
names : List[str], optional, default None
|
|
278
|
+
If provided, only transfer the artifacts with these names. If None,
|
|
279
|
+
transfer all artifacts from the other datastore.
|
|
280
|
+
"""
|
|
281
|
+
if (
|
|
282
|
+
other_datastore.TYPE == self.TYPE
|
|
283
|
+
and other_datastore._storage_impl.datastore_root
|
|
284
|
+
== self._storage_impl.datastore_root
|
|
285
|
+
):
|
|
286
|
+
# Nothing to transfer -- artifacts are already saved properly
|
|
287
|
+
return
|
|
288
|
+
|
|
289
|
+
# Determine which artifacts need to be transferred
|
|
290
|
+
if names is None:
|
|
291
|
+
# Transfer all artifacts from other datastore
|
|
292
|
+
artifacts_to_transfer = list(other_datastore._objects.keys())
|
|
293
|
+
else:
|
|
294
|
+
# Transfer only specified artifacts
|
|
295
|
+
artifacts_to_transfer = [
|
|
296
|
+
name for name in names if name in other_datastore._objects
|
|
297
|
+
]
|
|
298
|
+
|
|
299
|
+
if not artifacts_to_transfer:
|
|
300
|
+
return
|
|
301
|
+
|
|
302
|
+
# Get SHA keys for artifacts to transfer
|
|
303
|
+
shas_to_transfer = [
|
|
304
|
+
other_datastore._objects[name] for name in artifacts_to_transfer
|
|
305
|
+
]
|
|
306
|
+
|
|
307
|
+
# Check which blobs are missing locally
|
|
308
|
+
missing_shas = []
|
|
309
|
+
for sha in shas_to_transfer:
|
|
310
|
+
local_path = self._ca_store._storage_impl.path_join(
|
|
311
|
+
self._ca_store._prefix, sha[:2], sha
|
|
312
|
+
)
|
|
313
|
+
if not self._ca_store._storage_impl.is_file([local_path])[0]:
|
|
314
|
+
missing_shas.append(sha)
|
|
315
|
+
|
|
316
|
+
if not missing_shas:
|
|
317
|
+
return # All blobs already exist locally
|
|
318
|
+
|
|
319
|
+
# Load blobs from other datastore in transfer mode
|
|
320
|
+
transfer_blobs = other_datastore._ca_store.load_blobs(
|
|
321
|
+
missing_shas, is_transfer=True
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
# Save blobs to local datastore in transfer mode
|
|
325
|
+
self._ca_store.save_blobs(transfer_blobs, is_transfer=True)
|
|
326
|
+
|
|
327
|
+
@only_if_not_done
|
|
328
|
+
@require_mode("w")
|
|
329
|
+
def save_artifacts(self, artifacts_iter, len_hint=0):
|
|
237
330
|
"""
|
|
238
331
|
Saves Metaflow Artifacts (Python objects) to the datastore and stores
|
|
239
332
|
any relevant metadata needed to retrieve them.
|
|
@@ -249,11 +342,6 @@ class TaskDataStore(object):
|
|
|
249
342
|
artifacts : Iterator[(string, object)]
|
|
250
343
|
Iterator over the human-readable name of the object to save
|
|
251
344
|
and the object itself
|
|
252
|
-
force_v4 : boolean or Dict[string -> boolean]
|
|
253
|
-
Indicates whether the artifact should be pickled using the v4
|
|
254
|
-
version of pickle. If a single boolean, applies to all artifacts.
|
|
255
|
-
If a dictionary, applies to the object named only. Defaults to False
|
|
256
|
-
if not present or not specified
|
|
257
345
|
len_hint: integer
|
|
258
346
|
Estimated number of items in artifacts_iter
|
|
259
347
|
"""
|
|
@@ -261,40 +349,24 @@ class TaskDataStore(object):
|
|
|
261
349
|
|
|
262
350
|
def pickle_iter():
|
|
263
351
|
for name, obj in artifacts_iter:
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
if isinstance(force_v4, bool)
|
|
267
|
-
else force_v4.get(name, False)
|
|
268
|
-
)
|
|
269
|
-
if do_v4:
|
|
270
|
-
encode_type = "gzip+pickle-v4"
|
|
271
|
-
if encode_type not in self._encodings:
|
|
272
|
-
raise DataException(
|
|
273
|
-
"Artifact *%s* requires a serialization encoding that "
|
|
274
|
-
"requires Python 3.4 or newer." % name
|
|
275
|
-
)
|
|
352
|
+
encode_type = "gzip+pickle-v4"
|
|
353
|
+
if encode_type in self._encodings:
|
|
276
354
|
try:
|
|
277
355
|
blob = pickle.dumps(obj, protocol=4)
|
|
278
356
|
except TypeError as e:
|
|
279
|
-
raise UnpicklableArtifactException(name)
|
|
357
|
+
raise UnpicklableArtifactException(name) from e
|
|
280
358
|
else:
|
|
281
359
|
try:
|
|
282
360
|
blob = pickle.dumps(obj, protocol=2)
|
|
283
361
|
encode_type = "gzip+pickle-v2"
|
|
284
|
-
except (SystemError, OverflowError):
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
"serialize large objects." % name
|
|
291
|
-
)
|
|
292
|
-
try:
|
|
293
|
-
blob = pickle.dumps(obj, protocol=4)
|
|
294
|
-
except TypeError as e:
|
|
295
|
-
raise UnpicklableArtifactException(name)
|
|
362
|
+
except (SystemError, OverflowError) as e:
|
|
363
|
+
raise DataException(
|
|
364
|
+
"Artifact *%s* is very large (over 2GB). "
|
|
365
|
+
"You need to use Python 3.6 or newer if you want to "
|
|
366
|
+
"serialize large objects." % name
|
|
367
|
+
) from e
|
|
296
368
|
except TypeError as e:
|
|
297
|
-
raise UnpicklableArtifactException(name)
|
|
369
|
+
raise UnpicklableArtifactException(name) from e
|
|
298
370
|
|
|
299
371
|
self._info[name] = {
|
|
300
372
|
"size": len(blob),
|
|
@@ -353,7 +425,7 @@ class TaskDataStore(object):
|
|
|
353
425
|
encode_type = "gzip+pickle-v2"
|
|
354
426
|
if encode_type not in self._encodings:
|
|
355
427
|
raise DataException(
|
|
356
|
-
"Python 3.
|
|
428
|
+
"Python 3.6 or later is required to load artifact '%s'" % name
|
|
357
429
|
)
|
|
358
430
|
else:
|
|
359
431
|
to_load[self._objects[name]].append(name)
|
|
@@ -361,7 +433,7 @@ class TaskDataStore(object):
|
|
|
361
433
|
# We assume that if we have one "old" style artifact, all of them are
|
|
362
434
|
# like that which is an easy assumption to make since artifacts are all
|
|
363
435
|
# stored by the same implementation of the datastore for a given task.
|
|
364
|
-
for
|
|
436
|
+
for key, blob in self._ca_store.load_blobs(to_load.keys()):
|
|
365
437
|
names = to_load[key]
|
|
366
438
|
for name in names:
|
|
367
439
|
# We unpickle everytime to have fully distinct objects (the user
|
|
@@ -682,14 +754,16 @@ class TaskDataStore(object):
|
|
|
682
754
|
flow : FlowSpec
|
|
683
755
|
Flow to persist
|
|
684
756
|
"""
|
|
757
|
+
if not self._persist:
|
|
758
|
+
return
|
|
685
759
|
|
|
686
760
|
if flow._datastore:
|
|
687
761
|
self._objects.update(flow._datastore._objects)
|
|
688
762
|
self._info.update(flow._datastore._info)
|
|
689
763
|
|
|
690
|
-
#
|
|
691
|
-
# artifacts_iter, so we can provide a len_hint below
|
|
764
|
+
# Scan flow object FIRST
|
|
692
765
|
valid_artifacts = []
|
|
766
|
+
current_artifact_names = set()
|
|
693
767
|
for var in dir(flow):
|
|
694
768
|
if var.startswith("__") or var in flow._EPHEMERAL:
|
|
695
769
|
continue
|
|
@@ -706,6 +780,16 @@ class TaskDataStore(object):
|
|
|
706
780
|
or isinstance(val, Parameter)
|
|
707
781
|
):
|
|
708
782
|
valid_artifacts.append((var, val))
|
|
783
|
+
current_artifact_names.add(var)
|
|
784
|
+
|
|
785
|
+
# Transfer ONLY artifacts that aren't being overridden
|
|
786
|
+
if hasattr(flow._datastore, "orig_datastore"):
|
|
787
|
+
parent_artifacts = set(flow._datastore._objects.keys())
|
|
788
|
+
unchanged_artifacts = parent_artifacts - current_artifact_names
|
|
789
|
+
if unchanged_artifacts:
|
|
790
|
+
self.transfer_artifacts(
|
|
791
|
+
flow._datastore.orig_datastore, names=list(unchanged_artifacts)
|
|
792
|
+
)
|
|
709
793
|
|
|
710
794
|
def artifacts_iter():
|
|
711
795
|
# we consume the valid_artifacts list destructively to
|
|
@@ -721,6 +805,7 @@ class TaskDataStore(object):
|
|
|
721
805
|
delattr(flow, var)
|
|
722
806
|
yield var, val
|
|
723
807
|
|
|
808
|
+
# Save current artifacts
|
|
724
809
|
self.save_artifacts(artifacts_iter(), len_hint=len(valid_artifacts))
|
|
725
810
|
|
|
726
811
|
@only_if_not_done
|
|
@@ -750,6 +835,36 @@ class TaskDataStore(object):
|
|
|
750
835
|
to_store_dict[n] = data
|
|
751
836
|
self._save_file(to_store_dict)
|
|
752
837
|
|
|
838
|
+
@require_mode("d")
|
|
839
|
+
def scrub_logs(self, logsources, stream, attempt_override=None):
|
|
840
|
+
path_logsources = {
|
|
841
|
+
self._metadata_name_for_attempt(
|
|
842
|
+
self._get_log_location(s, stream),
|
|
843
|
+
attempt_override=attempt_override,
|
|
844
|
+
): s
|
|
845
|
+
for s in logsources
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
# Legacy log paths
|
|
849
|
+
legacy_log = self._metadata_name_for_attempt(
|
|
850
|
+
"%s.log" % stream, attempt_override
|
|
851
|
+
)
|
|
852
|
+
path_logsources[legacy_log] = stream
|
|
853
|
+
|
|
854
|
+
existing_paths = [
|
|
855
|
+
path
|
|
856
|
+
for path in path_logsources.keys()
|
|
857
|
+
if self.has_metadata(path, add_attempt=False)
|
|
858
|
+
]
|
|
859
|
+
|
|
860
|
+
# Replace log contents with [REDACTED source stream]
|
|
861
|
+
to_store_dict = {
|
|
862
|
+
path: bytes("[REDACTED %s %s]" % (path_logsources[path], stream), "utf-8")
|
|
863
|
+
for path in existing_paths
|
|
864
|
+
}
|
|
865
|
+
|
|
866
|
+
self._save_file(to_store_dict, add_attempt=False, allow_overwrite=True)
|
|
867
|
+
|
|
753
868
|
@require_mode("r")
|
|
754
869
|
def load_log_legacy(self, stream, attempt_override=None):
|
|
755
870
|
"""
|
metaflow/debug.py
CHANGED
|
@@ -42,6 +42,11 @@ class Debug(object):
|
|
|
42
42
|
filename = inspect.stack()[1][1]
|
|
43
43
|
print("debug[%s %s:%s]: %s" % (typ, filename, lineno, s), file=sys.stderr)
|
|
44
44
|
|
|
45
|
+
def __getattr__(self, name):
|
|
46
|
+
# Small piece of code to get pyright and other linters to recognize that there
|
|
47
|
+
# are dynamic attributes.
|
|
48
|
+
return getattr(self, name)
|
|
49
|
+
|
|
45
50
|
def noop(self, args):
|
|
46
51
|
pass
|
|
47
52
|
|