ob-metaflow 2.11.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/R.py +10 -7
- metaflow/__init__.py +40 -25
- metaflow/_vendor/imghdr/__init__.py +186 -0
- metaflow/_vendor/importlib_metadata/__init__.py +1063 -0
- metaflow/_vendor/importlib_metadata/_adapters.py +68 -0
- metaflow/_vendor/importlib_metadata/_collections.py +30 -0
- metaflow/_vendor/importlib_metadata/_compat.py +71 -0
- metaflow/_vendor/importlib_metadata/_functools.py +104 -0
- metaflow/_vendor/importlib_metadata/_itertools.py +73 -0
- metaflow/_vendor/importlib_metadata/_meta.py +48 -0
- metaflow/_vendor/importlib_metadata/_text.py +99 -0
- metaflow/_vendor/importlib_metadata/py.typed +0 -0
- metaflow/_vendor/typeguard/__init__.py +48 -0
- metaflow/_vendor/typeguard/_checkers.py +1070 -0
- metaflow/_vendor/typeguard/_config.py +108 -0
- metaflow/_vendor/typeguard/_decorators.py +233 -0
- metaflow/_vendor/typeguard/_exceptions.py +42 -0
- metaflow/_vendor/typeguard/_functions.py +308 -0
- metaflow/_vendor/typeguard/_importhook.py +213 -0
- metaflow/_vendor/typeguard/_memo.py +48 -0
- metaflow/_vendor/typeguard/_pytest_plugin.py +127 -0
- metaflow/_vendor/typeguard/_suppression.py +86 -0
- metaflow/_vendor/typeguard/_transformer.py +1229 -0
- metaflow/_vendor/typeguard/_union_transformer.py +55 -0
- metaflow/_vendor/typeguard/_utils.py +173 -0
- metaflow/_vendor/typeguard/py.typed +0 -0
- metaflow/_vendor/typing_extensions.py +3641 -0
- metaflow/_vendor/v3_7/importlib_metadata/__init__.py +1063 -0
- metaflow/_vendor/v3_7/importlib_metadata/_adapters.py +68 -0
- metaflow/_vendor/v3_7/importlib_metadata/_collections.py +30 -0
- metaflow/_vendor/v3_7/importlib_metadata/_compat.py +71 -0
- metaflow/_vendor/v3_7/importlib_metadata/_functools.py +104 -0
- metaflow/_vendor/v3_7/importlib_metadata/_itertools.py +73 -0
- metaflow/_vendor/v3_7/importlib_metadata/_meta.py +48 -0
- metaflow/_vendor/v3_7/importlib_metadata/_text.py +99 -0
- metaflow/_vendor/v3_7/importlib_metadata/py.typed +0 -0
- metaflow/_vendor/v3_7/typeguard/__init__.py +48 -0
- metaflow/_vendor/v3_7/typeguard/_checkers.py +906 -0
- metaflow/_vendor/v3_7/typeguard/_config.py +108 -0
- metaflow/_vendor/v3_7/typeguard/_decorators.py +237 -0
- metaflow/_vendor/v3_7/typeguard/_exceptions.py +42 -0
- metaflow/_vendor/v3_7/typeguard/_functions.py +310 -0
- metaflow/_vendor/v3_7/typeguard/_importhook.py +213 -0
- metaflow/_vendor/v3_7/typeguard/_memo.py +48 -0
- metaflow/_vendor/v3_7/typeguard/_pytest_plugin.py +100 -0
- metaflow/_vendor/v3_7/typeguard/_suppression.py +88 -0
- metaflow/_vendor/v3_7/typeguard/_transformer.py +1207 -0
- metaflow/_vendor/v3_7/typeguard/_union_transformer.py +54 -0
- metaflow/_vendor/v3_7/typeguard/_utils.py +169 -0
- metaflow/_vendor/v3_7/typeguard/py.typed +0 -0
- metaflow/_vendor/v3_7/typing_extensions.py +3072 -0
- metaflow/_vendor/yaml/__init__.py +427 -0
- metaflow/_vendor/yaml/composer.py +139 -0
- metaflow/_vendor/yaml/constructor.py +748 -0
- metaflow/_vendor/yaml/cyaml.py +101 -0
- metaflow/_vendor/yaml/dumper.py +62 -0
- metaflow/_vendor/yaml/emitter.py +1137 -0
- metaflow/_vendor/yaml/error.py +75 -0
- metaflow/_vendor/yaml/events.py +86 -0
- metaflow/_vendor/yaml/loader.py +63 -0
- metaflow/_vendor/yaml/nodes.py +49 -0
- metaflow/_vendor/yaml/parser.py +589 -0
- metaflow/_vendor/yaml/reader.py +185 -0
- metaflow/_vendor/yaml/representer.py +389 -0
- metaflow/_vendor/yaml/resolver.py +227 -0
- metaflow/_vendor/yaml/scanner.py +1435 -0
- metaflow/_vendor/yaml/serializer.py +111 -0
- metaflow/_vendor/yaml/tokens.py +104 -0
- metaflow/cards.py +5 -0
- metaflow/cli.py +331 -785
- metaflow/cli_args.py +17 -0
- metaflow/cli_components/__init__.py +0 -0
- metaflow/cli_components/dump_cmd.py +96 -0
- metaflow/cli_components/init_cmd.py +52 -0
- metaflow/cli_components/run_cmds.py +546 -0
- metaflow/cli_components/step_cmd.py +334 -0
- metaflow/cli_components/utils.py +140 -0
- metaflow/client/__init__.py +1 -0
- metaflow/client/core.py +467 -73
- metaflow/client/filecache.py +75 -35
- metaflow/clone_util.py +7 -1
- metaflow/cmd/code/__init__.py +231 -0
- metaflow/cmd/develop/stub_generator.py +756 -288
- metaflow/cmd/develop/stubs.py +12 -28
- metaflow/cmd/main_cli.py +6 -4
- metaflow/cmd/make_wrapper.py +78 -0
- metaflow/datastore/__init__.py +1 -0
- metaflow/datastore/content_addressed_store.py +41 -10
- metaflow/datastore/datastore_set.py +11 -2
- metaflow/datastore/flow_datastore.py +156 -10
- metaflow/datastore/spin_datastore.py +91 -0
- metaflow/datastore/task_datastore.py +154 -39
- metaflow/debug.py +5 -0
- metaflow/decorators.py +404 -78
- metaflow/exception.py +8 -2
- metaflow/extension_support/__init__.py +527 -376
- metaflow/extension_support/_empty_file.py +2 -2
- metaflow/extension_support/plugins.py +49 -31
- metaflow/flowspec.py +482 -33
- metaflow/graph.py +210 -42
- metaflow/includefile.py +84 -40
- metaflow/lint.py +141 -22
- metaflow/meta_files.py +13 -0
- metaflow/{metadata → metadata_provider}/heartbeat.py +24 -8
- metaflow/{metadata → metadata_provider}/metadata.py +86 -1
- metaflow/metaflow_config.py +175 -28
- metaflow/metaflow_config_funcs.py +51 -3
- metaflow/metaflow_current.py +4 -10
- metaflow/metaflow_environment.py +139 -53
- metaflow/metaflow_git.py +115 -0
- metaflow/metaflow_profile.py +18 -0
- metaflow/metaflow_version.py +150 -66
- metaflow/mflog/__init__.py +4 -3
- metaflow/mflog/save_logs.py +2 -2
- metaflow/multicore_utils.py +31 -14
- metaflow/package/__init__.py +673 -0
- metaflow/packaging_sys/__init__.py +880 -0
- metaflow/packaging_sys/backend.py +128 -0
- metaflow/packaging_sys/distribution_support.py +153 -0
- metaflow/packaging_sys/tar_backend.py +99 -0
- metaflow/packaging_sys/utils.py +54 -0
- metaflow/packaging_sys/v1.py +527 -0
- metaflow/parameters.py +149 -28
- metaflow/plugins/__init__.py +74 -5
- metaflow/plugins/airflow/airflow.py +40 -25
- metaflow/plugins/airflow/airflow_cli.py +22 -5
- metaflow/plugins/airflow/airflow_decorator.py +1 -1
- metaflow/plugins/airflow/airflow_utils.py +5 -3
- metaflow/plugins/airflow/sensors/base_sensor.py +4 -4
- metaflow/plugins/airflow/sensors/external_task_sensor.py +2 -2
- metaflow/plugins/airflow/sensors/s3_sensor.py +2 -2
- metaflow/plugins/argo/argo_client.py +78 -33
- metaflow/plugins/argo/argo_events.py +6 -6
- metaflow/plugins/argo/argo_workflows.py +2410 -527
- metaflow/plugins/argo/argo_workflows_cli.py +571 -121
- metaflow/plugins/argo/argo_workflows_decorator.py +43 -12
- metaflow/plugins/argo/argo_workflows_deployer.py +106 -0
- metaflow/plugins/argo/argo_workflows_deployer_objects.py +453 -0
- metaflow/plugins/argo/capture_error.py +73 -0
- metaflow/plugins/argo/conditional_input_paths.py +35 -0
- metaflow/plugins/argo/exit_hooks.py +209 -0
- metaflow/plugins/argo/jobset_input_paths.py +15 -0
- metaflow/plugins/argo/param_val.py +19 -0
- metaflow/plugins/aws/aws_client.py +10 -3
- metaflow/plugins/aws/aws_utils.py +55 -2
- metaflow/plugins/aws/batch/batch.py +72 -5
- metaflow/plugins/aws/batch/batch_cli.py +33 -10
- metaflow/plugins/aws/batch/batch_client.py +4 -3
- metaflow/plugins/aws/batch/batch_decorator.py +102 -35
- metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
- metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
- metaflow/plugins/aws/step_functions/production_token.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions.py +65 -8
- metaflow/plugins/aws/step_functions/step_functions_cli.py +101 -7
- metaflow/plugins/aws/step_functions/step_functions_decorator.py +1 -2
- metaflow/plugins/aws/step_functions/step_functions_deployer.py +97 -0
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +264 -0
- metaflow/plugins/azure/azure_exceptions.py +1 -1
- metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
- metaflow/plugins/azure/azure_tail.py +1 -1
- metaflow/plugins/azure/includefile_support.py +2 -0
- metaflow/plugins/cards/card_cli.py +66 -30
- metaflow/plugins/cards/card_creator.py +25 -1
- metaflow/plugins/cards/card_datastore.py +21 -49
- metaflow/plugins/cards/card_decorator.py +132 -8
- metaflow/plugins/cards/card_modules/basic.py +112 -17
- metaflow/plugins/cards/card_modules/bundle.css +1 -1
- metaflow/plugins/cards/card_modules/card.py +16 -1
- metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
- metaflow/plugins/cards/card_modules/components.py +665 -28
- metaflow/plugins/cards/card_modules/convert_to_native_type.py +36 -7
- metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
- metaflow/plugins/cards/card_modules/main.css +1 -0
- metaflow/plugins/cards/card_modules/main.js +68 -49
- metaflow/plugins/cards/card_modules/renderer_tools.py +1 -0
- metaflow/plugins/cards/card_modules/test_cards.py +26 -12
- metaflow/plugins/cards/card_server.py +39 -14
- metaflow/plugins/cards/component_serializer.py +2 -9
- metaflow/plugins/cards/metadata.py +22 -0
- metaflow/plugins/catch_decorator.py +9 -0
- metaflow/plugins/datastores/azure_storage.py +10 -1
- metaflow/plugins/datastores/gs_storage.py +6 -2
- metaflow/plugins/datastores/local_storage.py +12 -6
- metaflow/plugins/datastores/spin_storage.py +12 -0
- metaflow/plugins/datatools/local.py +2 -0
- metaflow/plugins/datatools/s3/s3.py +126 -75
- metaflow/plugins/datatools/s3/s3op.py +254 -121
- metaflow/plugins/env_escape/__init__.py +3 -3
- metaflow/plugins/env_escape/client_modules.py +102 -72
- metaflow/plugins/env_escape/server.py +7 -0
- metaflow/plugins/env_escape/stub.py +24 -5
- metaflow/plugins/events_decorator.py +343 -185
- metaflow/plugins/exit_hook/__init__.py +0 -0
- metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
- metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
- metaflow/plugins/gcp/__init__.py +1 -1
- metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +11 -6
- metaflow/plugins/gcp/gs_tail.py +10 -6
- metaflow/plugins/gcp/includefile_support.py +3 -0
- metaflow/plugins/kubernetes/kube_utils.py +108 -0
- metaflow/plugins/kubernetes/kubernetes.py +411 -130
- metaflow/plugins/kubernetes/kubernetes_cli.py +168 -36
- metaflow/plugins/kubernetes/kubernetes_client.py +104 -2
- metaflow/plugins/kubernetes/kubernetes_decorator.py +246 -88
- metaflow/plugins/kubernetes/kubernetes_job.py +253 -581
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +1071 -0
- metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
- metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
- metaflow/plugins/logs_cli.py +359 -0
- metaflow/plugins/{metadata → metadata_providers}/local.py +144 -84
- metaflow/plugins/{metadata → metadata_providers}/service.py +103 -26
- metaflow/plugins/metadata_providers/spin.py +16 -0
- metaflow/plugins/package_cli.py +36 -24
- metaflow/plugins/parallel_decorator.py +128 -11
- metaflow/plugins/parsers.py +16 -0
- metaflow/plugins/project_decorator.py +51 -5
- metaflow/plugins/pypi/bootstrap.py +357 -105
- metaflow/plugins/pypi/conda_decorator.py +82 -81
- metaflow/plugins/pypi/conda_environment.py +187 -52
- metaflow/plugins/pypi/micromamba.py +157 -47
- metaflow/plugins/pypi/parsers.py +268 -0
- metaflow/plugins/pypi/pip.py +88 -13
- metaflow/plugins/pypi/pypi_decorator.py +37 -1
- metaflow/plugins/pypi/utils.py +48 -2
- metaflow/plugins/resources_decorator.py +2 -2
- metaflow/plugins/secrets/__init__.py +3 -0
- metaflow/plugins/secrets/secrets_decorator.py +26 -181
- metaflow/plugins/secrets/secrets_func.py +49 -0
- metaflow/plugins/secrets/secrets_spec.py +101 -0
- metaflow/plugins/secrets/utils.py +74 -0
- metaflow/plugins/tag_cli.py +4 -7
- metaflow/plugins/test_unbounded_foreach_decorator.py +41 -6
- metaflow/plugins/timeout_decorator.py +3 -3
- metaflow/plugins/uv/__init__.py +0 -0
- metaflow/plugins/uv/bootstrap.py +128 -0
- metaflow/plugins/uv/uv_environment.py +72 -0
- metaflow/procpoll.py +1 -1
- metaflow/pylint_wrapper.py +5 -1
- metaflow/runner/__init__.py +0 -0
- metaflow/runner/click_api.py +717 -0
- metaflow/runner/deployer.py +470 -0
- metaflow/runner/deployer_impl.py +201 -0
- metaflow/runner/metaflow_runner.py +714 -0
- metaflow/runner/nbdeploy.py +132 -0
- metaflow/runner/nbrun.py +225 -0
- metaflow/runner/subprocess_manager.py +650 -0
- metaflow/runner/utils.py +335 -0
- metaflow/runtime.py +1078 -260
- metaflow/sidecar/sidecar_worker.py +1 -1
- metaflow/system/__init__.py +5 -0
- metaflow/system/system_logger.py +85 -0
- metaflow/system/system_monitor.py +108 -0
- metaflow/system/system_utils.py +19 -0
- metaflow/task.py +521 -225
- metaflow/tracing/__init__.py +7 -7
- metaflow/tracing/span_exporter.py +31 -38
- metaflow/tracing/tracing_modules.py +38 -43
- metaflow/tuple_util.py +27 -0
- metaflow/user_configs/__init__.py +0 -0
- metaflow/user_configs/config_options.py +563 -0
- metaflow/user_configs/config_parameters.py +598 -0
- metaflow/user_decorators/__init__.py +0 -0
- metaflow/user_decorators/common.py +144 -0
- metaflow/user_decorators/mutable_flow.py +512 -0
- metaflow/user_decorators/mutable_step.py +424 -0
- metaflow/user_decorators/user_flow_decorator.py +264 -0
- metaflow/user_decorators/user_step_decorator.py +749 -0
- metaflow/util.py +243 -27
- metaflow/vendor.py +23 -7
- metaflow/version.py +1 -1
- ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Makefile +355 -0
- ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Tiltfile +726 -0
- ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/pick_services.sh +105 -0
- ob_metaflow-2.19.7.1rc0.dist-info/METADATA +87 -0
- ob_metaflow-2.19.7.1rc0.dist-info/RECORD +445 -0
- {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
- {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +1 -0
- metaflow/_vendor/v3_5/__init__.py +0 -1
- metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
- metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
- metaflow/package.py +0 -188
- ob_metaflow-2.11.13.1.dist-info/METADATA +0 -85
- ob_metaflow-2.11.13.1.dist-info/RECORD +0 -308
- /metaflow/_vendor/{v3_5/zipp.py → zipp.py} +0 -0
- /metaflow/{metadata → metadata_provider}/__init__.py +0 -0
- /metaflow/{metadata → metadata_provider}/util.py +0 -0
- /metaflow/plugins/{metadata → metadata_providers}/__init__.py +0 -0
- {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info/licenses}/LICENSE +0 -0
- {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
metaflow/cmd/develop/stubs.py
CHANGED
|
@@ -12,23 +12,13 @@ from . import develop
|
|
|
12
12
|
from .stub_generator import StubGenerator
|
|
13
13
|
|
|
14
14
|
_py_ver = sys.version_info[:2]
|
|
15
|
-
_metadata_package = None
|
|
16
15
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
if _py_ver >= (3, 4):
|
|
24
|
-
if _py_ver >= (3, 8):
|
|
25
|
-
from importlib import metadata
|
|
26
|
-
elif _py_ver >= (3, 6):
|
|
27
|
-
from metaflow._vendor.v3_6 import importlib_metadata as metadata
|
|
28
|
-
else:
|
|
29
|
-
from metaflow._vendor.v3_5 import importlib_metadata as metadata
|
|
30
|
-
_metadata_package = metadata
|
|
31
|
-
return _metadata_package
|
|
16
|
+
if _py_ver >= (3, 8):
|
|
17
|
+
from importlib import metadata
|
|
18
|
+
elif _py_ver >= (3, 7):
|
|
19
|
+
from metaflow._vendor.v3_7 import importlib_metadata as metadata
|
|
20
|
+
else:
|
|
21
|
+
from metaflow._vendor.v3_6 import importlib_metadata as metadata
|
|
32
22
|
|
|
33
23
|
|
|
34
24
|
@develop.group(short_help="Stubs management")
|
|
@@ -43,12 +33,6 @@ def stubs(ctx: Any):
|
|
|
43
33
|
This CLI provides utilities to check and generate stubs for your current Metaflow
|
|
44
34
|
installation.
|
|
45
35
|
"""
|
|
46
|
-
if _check_stubs_supported() is None:
|
|
47
|
-
raise click.UsageError(
|
|
48
|
-
"Building and installing stubs are not supported on Python %d.%d "
|
|
49
|
-
"(3.4 minimum required)" % _py_ver,
|
|
50
|
-
ctx=ctx,
|
|
51
|
-
)
|
|
52
36
|
|
|
53
37
|
|
|
54
38
|
@stubs.command(short_help="Check validity of stubs")
|
|
@@ -168,7 +152,7 @@ def install(ctx: Any, force: bool):
|
|
|
168
152
|
"Metaflow stubs are already installed and valid -- use --force to reinstall"
|
|
169
153
|
)
|
|
170
154
|
return
|
|
171
|
-
mf_version, _ = get_mf_version()
|
|
155
|
+
mf_version, _ = get_mf_version(True)
|
|
172
156
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
173
157
|
with open(os.path.join(tmp_dir, "setup.py"), "w") as f:
|
|
174
158
|
f.write(
|
|
@@ -185,7 +169,7 @@ setup(
|
|
|
185
169
|
packages=find_namespace_packages(),
|
|
186
170
|
package_data={{"metaflow-stubs": ["generated_for.txt", "py.typed", "**/*.pyi"]}},
|
|
187
171
|
install_requires=["metaflow=={mf_version}"],
|
|
188
|
-
python_requires=">=3.
|
|
172
|
+
python_requires=">=3.6.1",
|
|
189
173
|
)
|
|
190
174
|
"""
|
|
191
175
|
)
|
|
@@ -259,10 +243,10 @@ def split_version(vers: str) -> Tuple[str, Optional[str]]:
|
|
|
259
243
|
return vers_split[0], vers_split[1]
|
|
260
244
|
|
|
261
245
|
|
|
262
|
-
def get_mf_version() -> Tuple[str, Optional[str]]:
|
|
246
|
+
def get_mf_version(public: bool = False) -> Tuple[str, Optional[str]]:
|
|
263
247
|
from metaflow.metaflow_version import get_version
|
|
264
248
|
|
|
265
|
-
return split_version(get_version())
|
|
249
|
+
return split_version(get_version(public))
|
|
266
250
|
|
|
267
251
|
|
|
268
252
|
def get_stubs_version(stubs_root_path: Optional[str]) -> Tuple[str, Optional[str]]:
|
|
@@ -328,14 +312,14 @@ def get_packages_for_stubs() -> Tuple[List[Tuple[str, str]], List[str]]:
|
|
|
328
312
|
# some reason it shows up multiple times.
|
|
329
313
|
interesting_dists = [
|
|
330
314
|
d
|
|
331
|
-
for d in
|
|
315
|
+
for d in metadata.distributions()
|
|
332
316
|
if any(
|
|
333
317
|
[
|
|
334
318
|
p == "metaflow-stubs"
|
|
335
319
|
for p in (d.read_text("top_level.txt") or "").split()
|
|
336
320
|
]
|
|
337
321
|
)
|
|
338
|
-
and isinstance(d,
|
|
322
|
+
and isinstance(d, metadata.PathDistribution)
|
|
339
323
|
]
|
|
340
324
|
|
|
341
325
|
for dist in interesting_dists:
|
metaflow/cmd/main_cli.py
CHANGED
|
@@ -12,7 +12,7 @@ import metaflow.tracing as tracing
|
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
@click.group()
|
|
15
|
-
@tracing.
|
|
15
|
+
@tracing.cli("cli/main")
|
|
16
16
|
def main():
|
|
17
17
|
pass
|
|
18
18
|
|
|
@@ -67,6 +67,7 @@ CMDS_DESC = [
|
|
|
67
67
|
("configure", ".configure_cmd.cli"),
|
|
68
68
|
("tutorials", ".tutorials_cmd.cli"),
|
|
69
69
|
("develop", ".develop.cli"),
|
|
70
|
+
("code", ".code.cli"),
|
|
70
71
|
]
|
|
71
72
|
|
|
72
73
|
process_cmds(globals())
|
|
@@ -84,15 +85,16 @@ def start(ctx):
|
|
|
84
85
|
|
|
85
86
|
import metaflow
|
|
86
87
|
|
|
88
|
+
version = get_version()
|
|
87
89
|
echo("Metaflow ", fg="magenta", bold=True, nl=False)
|
|
88
90
|
|
|
89
91
|
if ctx.invoked_subcommand is None:
|
|
90
|
-
echo("(%s): " %
|
|
92
|
+
echo("(%s): " % version, fg="magenta", bold=False, nl=False)
|
|
91
93
|
else:
|
|
92
|
-
echo("(%s)\n" %
|
|
94
|
+
echo("(%s)\n" % version, fg="magenta", bold=False)
|
|
93
95
|
|
|
94
96
|
if ctx.invoked_subcommand is None:
|
|
95
|
-
echo("More
|
|
97
|
+
echo("More AI, less engineering\n", fg="magenta")
|
|
96
98
|
|
|
97
99
|
lnk_sz = max(len(lnk) for lnk in CONTACT_INFO.values()) + 1
|
|
98
100
|
for what, lnk in CONTACT_INFO.items():
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import subprocess
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
import sysconfig
|
|
5
|
+
import site
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def find_makefile():
|
|
9
|
+
possible_dirs = []
|
|
10
|
+
|
|
11
|
+
# 1) The standard sysconfig-based location
|
|
12
|
+
data_dir = sysconfig.get_paths()["data"]
|
|
13
|
+
possible_dirs.append(Path(data_dir) / "share" / "metaflow" / "devtools")
|
|
14
|
+
|
|
15
|
+
# 2) The user base (e.g. ~/.local on many systems)
|
|
16
|
+
user_base = site.getuserbase() # e.g. /home/runner/.local
|
|
17
|
+
possible_dirs.append(Path(user_base) / "share" / "metaflow" / "devtools")
|
|
18
|
+
|
|
19
|
+
# 3) site-packages can vary, we can guess share/.. near each site-packages
|
|
20
|
+
# (Works if pip actually placed devtools near site-packages.)
|
|
21
|
+
for p in site.getsitepackages():
|
|
22
|
+
possible_dirs.append(Path(p).parent / "share" / "metaflow" / "devtools")
|
|
23
|
+
user_site = site.getusersitepackages()
|
|
24
|
+
possible_dirs.append(Path(user_site).parent / "share" / "metaflow" / "devtools")
|
|
25
|
+
|
|
26
|
+
for candidate_dir in possible_dirs:
|
|
27
|
+
makefile_candidate = candidate_dir / "Makefile"
|
|
28
|
+
if makefile_candidate.is_file():
|
|
29
|
+
return makefile_candidate
|
|
30
|
+
|
|
31
|
+
# 4) When developing, Metaflow might be installed with --editable, which means the devtools will not be located within site-packages.
|
|
32
|
+
# We read the actual location from package metadata in this case, but only do this heavier operation if the above lookups fail.
|
|
33
|
+
try:
|
|
34
|
+
import json
|
|
35
|
+
from importlib.metadata import Distribution
|
|
36
|
+
|
|
37
|
+
direct_url = Distribution.from_name("metaflow").read_text("direct_url.json")
|
|
38
|
+
if direct_url:
|
|
39
|
+
content = json.loads(direct_url)
|
|
40
|
+
url = content.get("url", "")
|
|
41
|
+
if not url.startswith("file://"):
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
makefile_candidate = (
|
|
45
|
+
Path(url.replace("file://", "")) / "devtools" / "Makefile"
|
|
46
|
+
)
|
|
47
|
+
if makefile_candidate.is_file():
|
|
48
|
+
return makefile_candidate
|
|
49
|
+
else:
|
|
50
|
+
# No dist metadata found. This is tied to the version of pip being used
|
|
51
|
+
# Do not bother with .egg-link installs due to the handling of the file contents being a headache due to lack of a unified spec.
|
|
52
|
+
print(
|
|
53
|
+
"Could not locate an installation of Metaflow. No package metadata found."
|
|
54
|
+
)
|
|
55
|
+
print(
|
|
56
|
+
"If Metaflow is installed as editable, try upgrading the version of pip and reinstalling in order to generate proper package metadata.\n"
|
|
57
|
+
)
|
|
58
|
+
except Exception:
|
|
59
|
+
return None
|
|
60
|
+
|
|
61
|
+
return None
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def main():
|
|
65
|
+
makefile_path = find_makefile()
|
|
66
|
+
if not makefile_path:
|
|
67
|
+
print("ERROR: Could not find executable in any known location.")
|
|
68
|
+
sys.exit(1)
|
|
69
|
+
cmd = ["make", "-f", str(makefile_path)] + sys.argv[1:]
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
completed = subprocess.run(cmd, check=True)
|
|
73
|
+
sys.exit(completed.returncode)
|
|
74
|
+
except subprocess.CalledProcessError as ex:
|
|
75
|
+
sys.exit(ex.returncode)
|
|
76
|
+
except KeyboardInterrupt:
|
|
77
|
+
print("Process interrupted by user. Exiting cleanly.")
|
|
78
|
+
sys.exit(1)
|
metaflow/datastore/__init__.py
CHANGED
|
@@ -38,7 +38,7 @@ class ContentAddressedStore(object):
|
|
|
38
38
|
def set_blob_cache(self, blob_cache):
|
|
39
39
|
self._blob_cache = blob_cache
|
|
40
40
|
|
|
41
|
-
def save_blobs(self, blob_iter, raw=False, len_hint=0):
|
|
41
|
+
def save_blobs(self, blob_iter, raw=False, len_hint=0, is_transfer=False):
|
|
42
42
|
"""
|
|
43
43
|
Saves blobs of data to the datastore
|
|
44
44
|
|
|
@@ -60,11 +60,16 @@ class ContentAddressedStore(object):
|
|
|
60
60
|
|
|
61
61
|
Parameters
|
|
62
62
|
----------
|
|
63
|
-
blob_iter : Iterator
|
|
64
|
-
|
|
63
|
+
blob_iter : Iterator
|
|
64
|
+
Iterator over bytes objects to save
|
|
65
|
+
raw : bool, default False
|
|
65
66
|
Whether to save the bytes directly or process them, by default False
|
|
66
|
-
len_hint :
|
|
67
|
+
len_hint : int, default 0
|
|
68
|
+
Hint of the number of blobs that will be produced by the
|
|
67
69
|
iterator, by default 0
|
|
70
|
+
is_transfer : bool, default False
|
|
71
|
+
If True, this indicates we are saving blobs directly from the output of another
|
|
72
|
+
content addressed store's
|
|
68
73
|
|
|
69
74
|
Returns
|
|
70
75
|
-------
|
|
@@ -76,6 +81,20 @@ class ContentAddressedStore(object):
|
|
|
76
81
|
|
|
77
82
|
def packing_iter():
|
|
78
83
|
for blob in blob_iter:
|
|
84
|
+
if is_transfer:
|
|
85
|
+
key, blob_data, meta = blob
|
|
86
|
+
path = self._storage_impl.path_join(self._prefix, key[:2], key)
|
|
87
|
+
# Transfer data is always raw/decompressed, so mark it as such
|
|
88
|
+
meta_corrected = {"cas_raw": True, "cas_version": 1}
|
|
89
|
+
|
|
90
|
+
results.append(
|
|
91
|
+
self.save_blobs_result(
|
|
92
|
+
uri=self._storage_impl.full_uri(path),
|
|
93
|
+
key=key,
|
|
94
|
+
)
|
|
95
|
+
)
|
|
96
|
+
yield path, (BytesIO(blob_data), meta_corrected)
|
|
97
|
+
continue
|
|
79
98
|
sha = sha1(blob).hexdigest()
|
|
80
99
|
path = self._storage_impl.path_join(self._prefix, sha[:2], sha)
|
|
81
100
|
results.append(
|
|
@@ -100,7 +119,7 @@ class ContentAddressedStore(object):
|
|
|
100
119
|
self._storage_impl.save_bytes(packing_iter(), overwrite=True, len_hint=len_hint)
|
|
101
120
|
return results
|
|
102
121
|
|
|
103
|
-
def load_blobs(self, keys, force_raw=False):
|
|
122
|
+
def load_blobs(self, keys, force_raw=False, is_transfer=False):
|
|
104
123
|
"""
|
|
105
124
|
Mirror function of save_blobs
|
|
106
125
|
|
|
@@ -111,15 +130,20 @@ class ContentAddressedStore(object):
|
|
|
111
130
|
----------
|
|
112
131
|
keys : List of string
|
|
113
132
|
Key describing the object to load
|
|
114
|
-
force_raw : bool,
|
|
133
|
+
force_raw : bool, default False
|
|
115
134
|
Support for backward compatibility with previous datastores. If
|
|
116
135
|
True, this will force the key to be loaded as is (raw). By default,
|
|
117
136
|
False
|
|
137
|
+
is_transfer : bool, default False
|
|
138
|
+
If True, this indicates we are loading blobs to transfer them directly
|
|
139
|
+
to another datastore. We will, in this case, also transfer the metadata
|
|
140
|
+
and do minimal processing. This is for internal use only.
|
|
118
141
|
|
|
119
142
|
Returns
|
|
120
143
|
-------
|
|
121
144
|
Returns an iterator of (string, bytes) tuples; the iterator may return keys
|
|
122
|
-
in a different order than were passed in.
|
|
145
|
+
in a different order than were passed in. If is_transfer is True, the tuple
|
|
146
|
+
has three elements with the third one being the metadata.
|
|
123
147
|
"""
|
|
124
148
|
load_paths = []
|
|
125
149
|
for key in keys:
|
|
@@ -127,13 +151,17 @@ class ContentAddressedStore(object):
|
|
|
127
151
|
if self._blob_cache:
|
|
128
152
|
blob = self._blob_cache.load_key(key)
|
|
129
153
|
if blob is not None:
|
|
130
|
-
|
|
154
|
+
if is_transfer:
|
|
155
|
+
# Cached blobs are decompressed/processed bytes regardless of original format
|
|
156
|
+
yield key, blob, {"cas_raw": False, "cas_version": 1}
|
|
157
|
+
else:
|
|
158
|
+
yield key, blob
|
|
131
159
|
else:
|
|
132
160
|
path = self._storage_impl.path_join(self._prefix, key[:2], key)
|
|
133
161
|
load_paths.append((key, path))
|
|
134
162
|
|
|
135
163
|
with self._storage_impl.load_bytes([p for _, p in load_paths]) as loaded:
|
|
136
|
-
for
|
|
164
|
+
for path_key, file_path, meta in loaded:
|
|
137
165
|
key = self._storage_impl.path_split(path_key)[-1]
|
|
138
166
|
# At this point, we either return the object as is (if raw) or
|
|
139
167
|
# decode it according to the encoding version
|
|
@@ -169,7 +197,10 @@ class ContentAddressedStore(object):
|
|
|
169
197
|
if self._blob_cache:
|
|
170
198
|
self._blob_cache.store_key(key, blob)
|
|
171
199
|
|
|
172
|
-
|
|
200
|
+
if is_transfer:
|
|
201
|
+
yield key, blob, meta # Preserve exact original metadata from storage
|
|
202
|
+
else:
|
|
203
|
+
yield key, blob
|
|
173
204
|
|
|
174
205
|
def _unpack_backward_compatible(self, blob):
|
|
175
206
|
# This is the backward compatible unpack
|
|
@@ -21,9 +21,18 @@ class TaskDataStoreSet(object):
|
|
|
21
21
|
pathspecs=None,
|
|
22
22
|
prefetch_data_artifacts=None,
|
|
23
23
|
allow_not_done=False,
|
|
24
|
+
join_type=None,
|
|
25
|
+
orig_flow_datastore=None,
|
|
26
|
+
spin_artifacts=None,
|
|
24
27
|
):
|
|
25
|
-
self.task_datastores = flow_datastore.
|
|
26
|
-
run_id,
|
|
28
|
+
self.task_datastores = flow_datastore.get_task_datastores(
|
|
29
|
+
run_id,
|
|
30
|
+
steps=steps,
|
|
31
|
+
pathspecs=pathspecs,
|
|
32
|
+
allow_not_done=allow_not_done,
|
|
33
|
+
join_type=join_type,
|
|
34
|
+
orig_flow_datastore=orig_flow_datastore,
|
|
35
|
+
spin_artifacts=spin_artifacts,
|
|
27
36
|
)
|
|
28
37
|
|
|
29
38
|
if prefetch_data_artifacts:
|
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
import itertools
|
|
2
2
|
import json
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
3
4
|
|
|
4
5
|
from .. import metaflow_config
|
|
5
6
|
|
|
6
7
|
from .content_addressed_store import ContentAddressedStore
|
|
7
8
|
from .task_datastore import TaskDataStore
|
|
9
|
+
from .spin_datastore import SpinTaskDatastore
|
|
10
|
+
from ..metaflow_profile import from_start
|
|
8
11
|
|
|
9
12
|
|
|
10
13
|
class FlowDataStore(object):
|
|
@@ -13,7 +16,7 @@ class FlowDataStore(object):
|
|
|
13
16
|
def __init__(
|
|
14
17
|
self,
|
|
15
18
|
flow_name,
|
|
16
|
-
environment,
|
|
19
|
+
environment=None,
|
|
17
20
|
metadata=None,
|
|
18
21
|
event_logger=None,
|
|
19
22
|
monitor=None,
|
|
@@ -31,7 +34,7 @@ class FlowDataStore(object):
|
|
|
31
34
|
----------
|
|
32
35
|
flow_name : str
|
|
33
36
|
The name of the flow
|
|
34
|
-
environment : MetaflowEnvironment
|
|
37
|
+
environment : MetaflowEnvironment, optional
|
|
35
38
|
Environment this datastore is operating in
|
|
36
39
|
metadata : MetadataProvider, optional
|
|
37
40
|
The metadata provider to use and update if needed, by default None
|
|
@@ -63,12 +66,28 @@ class FlowDataStore(object):
|
|
|
63
66
|
self._storage_impl.path_join(self.flow_name, "data"), self._storage_impl
|
|
64
67
|
)
|
|
65
68
|
|
|
69
|
+
# Private
|
|
70
|
+
self._metadata_cache = None
|
|
71
|
+
|
|
66
72
|
@property
|
|
67
73
|
def datastore_root(self):
|
|
68
74
|
return self._storage_impl.datastore_root
|
|
69
75
|
|
|
70
|
-
def
|
|
71
|
-
self
|
|
76
|
+
def set_metadata_cache(self, cache):
|
|
77
|
+
self._metadata_cache = cache
|
|
78
|
+
|
|
79
|
+
def get_task_datastores(
|
|
80
|
+
self,
|
|
81
|
+
run_id=None,
|
|
82
|
+
steps=None,
|
|
83
|
+
pathspecs=None,
|
|
84
|
+
allow_not_done=False,
|
|
85
|
+
attempt=None,
|
|
86
|
+
include_prior=False,
|
|
87
|
+
mode="r",
|
|
88
|
+
join_type=None,
|
|
89
|
+
orig_flow_datastore=None,
|
|
90
|
+
spin_artifacts=None,
|
|
72
91
|
):
|
|
73
92
|
"""
|
|
74
93
|
Return a list of TaskDataStore for a subset of the tasks.
|
|
@@ -88,11 +107,27 @@ class FlowDataStore(object):
|
|
|
88
107
|
Steps to get the tasks from. If run_id is specified, this
|
|
89
108
|
must also be specified, by default None
|
|
90
109
|
pathspecs : List[str], optional
|
|
91
|
-
Full task specs (run_id/step_name/task_id). Can be used instead of
|
|
110
|
+
Full task specs (run_id/step_name/task_id[/attempt]). Can be used instead of
|
|
92
111
|
specifying run_id and steps, by default None
|
|
93
112
|
allow_not_done : bool, optional
|
|
94
113
|
If True, returns the latest attempt of a task even if that attempt
|
|
95
114
|
wasn't marked as done, by default False
|
|
115
|
+
attempt : int, optional
|
|
116
|
+
Attempt number of the tasks to return. If not provided, returns latest attempt.
|
|
117
|
+
include_prior : boolean, default False
|
|
118
|
+
If True, returns all attempts up to and including attempt.
|
|
119
|
+
mode : str, default "r"
|
|
120
|
+
Mode to initialize the returned TaskDataStores in.
|
|
121
|
+
join_type : str, optional, default None
|
|
122
|
+
If specified, the join type for the task. This is used to determine
|
|
123
|
+
the user specified artifacts for the task in case of a spin task.
|
|
124
|
+
orig_flow_datastore : MetadataProvider, optional, default None
|
|
125
|
+
The metadata provider in case of a spin task. If provided, the
|
|
126
|
+
returned TaskDataStore will be a SpinTaskDatastore instead of a
|
|
127
|
+
TaskDataStore.
|
|
128
|
+
spin_artifacts : Dict[str, Any], optional, default None
|
|
129
|
+
Artifacts provided by user that can override the artifacts fetched via the
|
|
130
|
+
spin pathspec.
|
|
96
131
|
|
|
97
132
|
Returns
|
|
98
133
|
-------
|
|
@@ -126,8 +161,20 @@ class FlowDataStore(object):
|
|
|
126
161
|
if task.is_file is False
|
|
127
162
|
]
|
|
128
163
|
urls = []
|
|
164
|
+
# parse content urls for specific attempt only, or for all attempts in max range
|
|
165
|
+
attempt_range = range(metaflow_config.MAX_ATTEMPTS)
|
|
166
|
+
# we have no reason to check for attempts greater than MAX_ATTEMPTS, as they do not exist.
|
|
167
|
+
if attempt is not None and attempt <= metaflow_config.MAX_ATTEMPTS - 1:
|
|
168
|
+
attempt_range = range(attempt + 1) if include_prior else [attempt]
|
|
129
169
|
for task_url in task_urls:
|
|
130
|
-
|
|
170
|
+
# task_url can have a trailing slash, so strip this to avoid empty strings in the split
|
|
171
|
+
task_splits = task_url.rstrip("/").split("/")
|
|
172
|
+
# Usually it is flow, run, step, task (so 4 components) -- if we have a
|
|
173
|
+
# fifth one, there is a specific attempt number listed as well.
|
|
174
|
+
task_attempt_range = attempt_range
|
|
175
|
+
if len(task_splits) == 5:
|
|
176
|
+
task_attempt_range = [int(task_splits[4])]
|
|
177
|
+
for attempt in task_attempt_range:
|
|
131
178
|
for suffix in [
|
|
132
179
|
TaskDataStore.METADATA_DATA_SUFFIX,
|
|
133
180
|
TaskDataStore.METADATA_ATTEMPT_SUFFIX,
|
|
@@ -168,11 +215,30 @@ class FlowDataStore(object):
|
|
|
168
215
|
for (run, step, task), attempt in latest_started_attempts.items()
|
|
169
216
|
)
|
|
170
217
|
if allow_not_done:
|
|
171
|
-
latest_to_fetch =
|
|
218
|
+
latest_to_fetch = (
|
|
219
|
+
done_attempts.union(latest_started_attempts)
|
|
220
|
+
if include_prior
|
|
221
|
+
else latest_started_attempts
|
|
222
|
+
)
|
|
172
223
|
else:
|
|
173
|
-
latest_to_fetch =
|
|
224
|
+
latest_to_fetch = (
|
|
225
|
+
done_attempts
|
|
226
|
+
if include_prior
|
|
227
|
+
else (latest_started_attempts & done_attempts)
|
|
228
|
+
)
|
|
174
229
|
latest_to_fetch = [
|
|
175
|
-
(
|
|
230
|
+
(
|
|
231
|
+
v[0],
|
|
232
|
+
v[1],
|
|
233
|
+
v[2],
|
|
234
|
+
v[3],
|
|
235
|
+
data_objs.get(v),
|
|
236
|
+
mode,
|
|
237
|
+
allow_not_done,
|
|
238
|
+
join_type,
|
|
239
|
+
orig_flow_datastore,
|
|
240
|
+
spin_artifacts,
|
|
241
|
+
)
|
|
176
242
|
for v in latest_to_fetch
|
|
177
243
|
]
|
|
178
244
|
return list(itertools.starmap(self.get_task_datastore, latest_to_fetch))
|
|
@@ -186,8 +252,63 @@ class FlowDataStore(object):
|
|
|
186
252
|
data_metadata=None,
|
|
187
253
|
mode="r",
|
|
188
254
|
allow_not_done=False,
|
|
255
|
+
join_type=None,
|
|
256
|
+
orig_flow_datastore=None,
|
|
257
|
+
spin_artifacts=None,
|
|
258
|
+
persist=True,
|
|
189
259
|
):
|
|
190
|
-
|
|
260
|
+
if orig_flow_datastore is not None:
|
|
261
|
+
# In spin step subprocess, use SpinTaskDatastore for accessing artifacts
|
|
262
|
+
if join_type is not None:
|
|
263
|
+
# If join_type is specified, we need to use the artifacts corresponding
|
|
264
|
+
# to that particular join index, specified by the parent task pathspec.
|
|
265
|
+
spin_artifacts = spin_artifacts.get(
|
|
266
|
+
f"{run_id}/{step_name}/{task_id}", {}
|
|
267
|
+
)
|
|
268
|
+
from_start(
|
|
269
|
+
"FlowDataStore: get_task_datastore for spin task for type %s %s metadata"
|
|
270
|
+
% (self.TYPE, "without" if data_metadata is None else "with")
|
|
271
|
+
)
|
|
272
|
+
# Get the task datastore for the spun task.
|
|
273
|
+
orig_datastore = orig_flow_datastore.get_task_datastore(
|
|
274
|
+
run_id,
|
|
275
|
+
step_name,
|
|
276
|
+
task_id,
|
|
277
|
+
attempt=attempt,
|
|
278
|
+
data_metadata=data_metadata,
|
|
279
|
+
mode=mode,
|
|
280
|
+
allow_not_done=allow_not_done,
|
|
281
|
+
persist=persist,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
return SpinTaskDatastore(
|
|
285
|
+
self.flow_name,
|
|
286
|
+
run_id,
|
|
287
|
+
step_name,
|
|
288
|
+
task_id,
|
|
289
|
+
orig_datastore,
|
|
290
|
+
spin_artifacts,
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
cache_hit = False
|
|
294
|
+
if (
|
|
295
|
+
self._metadata_cache is not None
|
|
296
|
+
and data_metadata is None
|
|
297
|
+
and attempt is not None
|
|
298
|
+
and allow_not_done is False
|
|
299
|
+
):
|
|
300
|
+
# If we have a metadata cache, we can try to load the metadata
|
|
301
|
+
# from the cache if it is not provided.
|
|
302
|
+
data_metadata = self._metadata_cache.load_metadata(
|
|
303
|
+
run_id, step_name, task_id, attempt
|
|
304
|
+
)
|
|
305
|
+
cache_hit = data_metadata is not None
|
|
306
|
+
|
|
307
|
+
from_start(
|
|
308
|
+
"FlowDataStore: get_task_datastore for regular task for type %s %s metadata"
|
|
309
|
+
% (self.TYPE, "without" if data_metadata is None else "with")
|
|
310
|
+
)
|
|
311
|
+
task_datastore = TaskDataStore(
|
|
191
312
|
self,
|
|
192
313
|
run_id,
|
|
193
314
|
step_name,
|
|
@@ -196,8 +317,23 @@ class FlowDataStore(object):
|
|
|
196
317
|
data_metadata=data_metadata,
|
|
197
318
|
mode=mode,
|
|
198
319
|
allow_not_done=allow_not_done,
|
|
320
|
+
persist=persist,
|
|
199
321
|
)
|
|
200
322
|
|
|
323
|
+
# Only persist in cache if it is non-changing (so done only) and we have
|
|
324
|
+
# a non-None attempt
|
|
325
|
+
if (
|
|
326
|
+
not cache_hit
|
|
327
|
+
and self._metadata_cache is not None
|
|
328
|
+
and allow_not_done is False
|
|
329
|
+
and attempt is not None
|
|
330
|
+
):
|
|
331
|
+
self._metadata_cache.store_metadata(
|
|
332
|
+
run_id, step_name, task_id, attempt, task_datastore.ds_metadata
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
return task_datastore
|
|
336
|
+
|
|
201
337
|
def save_data(self, data_iter, len_hint=0):
|
|
202
338
|
"""Saves data to the underlying content-addressed store
|
|
203
339
|
|
|
@@ -239,3 +375,13 @@ class FlowDataStore(object):
|
|
|
239
375
|
"""
|
|
240
376
|
for key, blob in self.ca_store.load_blobs(keys, force_raw=force_raw):
|
|
241
377
|
yield key, blob
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
class MetadataCache(ABC):
|
|
381
|
+
@abstractmethod
|
|
382
|
+
def load_metadata(self, run_id, step_name, task_id, attempt):
|
|
383
|
+
raise NotImplementedError()
|
|
384
|
+
|
|
385
|
+
@abstractmethod
|
|
386
|
+
def store_metadata(self, run_id, step_name, task_id, attempt, metadata_dict):
|
|
387
|
+
raise NotImplementedError()
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
from typing import Dict, Any
|
|
2
|
+
from .task_datastore import TaskDataStore, require_mode
|
|
3
|
+
from ..metaflow_profile import from_start
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class SpinTaskDatastore(object):
|
|
7
|
+
def __init__(
|
|
8
|
+
self,
|
|
9
|
+
flow_name: str,
|
|
10
|
+
run_id: str,
|
|
11
|
+
step_name: str,
|
|
12
|
+
task_id: str,
|
|
13
|
+
orig_datastore: TaskDataStore,
|
|
14
|
+
spin_artifacts: Dict[str, Any],
|
|
15
|
+
):
|
|
16
|
+
"""
|
|
17
|
+
SpinTaskDatastore is a datastore for a task that is used to retrieve
|
|
18
|
+
artifacts and attributes for a spin step. It uses the task pathspec
|
|
19
|
+
from a previous execution of the step to access the artifacts and attributes.
|
|
20
|
+
|
|
21
|
+
Parameters:
|
|
22
|
+
-----------
|
|
23
|
+
flow_name : str
|
|
24
|
+
Name of the flow
|
|
25
|
+
run_id : str
|
|
26
|
+
Run ID of the flow
|
|
27
|
+
step_name : str
|
|
28
|
+
Name of the step
|
|
29
|
+
task_id : str
|
|
30
|
+
Task ID of the step
|
|
31
|
+
orig_datastore : TaskDataStore
|
|
32
|
+
The datastore for the underlying task that is being spun.
|
|
33
|
+
spin_artifacts : Dict[str, Any]
|
|
34
|
+
User provided artifacts that are to be used in the spin task. This is a dictionary
|
|
35
|
+
where keys are artifact names and values are the actual data or metadata.
|
|
36
|
+
"""
|
|
37
|
+
self.flow_name = flow_name
|
|
38
|
+
self.run_id = run_id
|
|
39
|
+
self.step_name = step_name
|
|
40
|
+
self.task_id = task_id
|
|
41
|
+
self.orig_datastore = orig_datastore
|
|
42
|
+
self.spin_artifacts = spin_artifacts
|
|
43
|
+
self._task = None
|
|
44
|
+
|
|
45
|
+
# Update _objects and _info in order to persist artifacts
|
|
46
|
+
# See `persist` method in `TaskDatastore` for more details
|
|
47
|
+
self._objects = self.orig_datastore._objects.copy()
|
|
48
|
+
self._info = self.orig_datastore._info.copy()
|
|
49
|
+
|
|
50
|
+
# We strip out some of the control ones
|
|
51
|
+
for key in ("_transition",):
|
|
52
|
+
if key in self._objects:
|
|
53
|
+
del self._objects[key]
|
|
54
|
+
del self._info[key]
|
|
55
|
+
|
|
56
|
+
from_start("SpinTaskDatastore: Initialized artifacts")
|
|
57
|
+
|
|
58
|
+
@require_mode(None)
|
|
59
|
+
def __getitem__(self, name):
|
|
60
|
+
try:
|
|
61
|
+
# Check if it's an artifact in the spin_artifacts
|
|
62
|
+
return self.spin_artifacts[name]
|
|
63
|
+
except KeyError:
|
|
64
|
+
try:
|
|
65
|
+
# Check if it's an attribute of the task
|
|
66
|
+
# _foreach_stack, _foreach_index, ...
|
|
67
|
+
return self.orig_datastore[name]
|
|
68
|
+
except (KeyError, AttributeError) as e:
|
|
69
|
+
raise KeyError(
|
|
70
|
+
f"Attribute '{name}' not found in the previous execution "
|
|
71
|
+
f"of the tasks for `{self.step_name}`."
|
|
72
|
+
) from e
|
|
73
|
+
|
|
74
|
+
@require_mode(None)
|
|
75
|
+
def is_none(self, name):
|
|
76
|
+
val = self.__getitem__(name)
|
|
77
|
+
return val is None
|
|
78
|
+
|
|
79
|
+
@require_mode(None)
|
|
80
|
+
def __contains__(self, name):
|
|
81
|
+
try:
|
|
82
|
+
_ = self.__getitem__(name)
|
|
83
|
+
return True
|
|
84
|
+
except KeyError:
|
|
85
|
+
return False
|
|
86
|
+
|
|
87
|
+
@require_mode(None)
|
|
88
|
+
def items(self):
|
|
89
|
+
if self._objects:
|
|
90
|
+
return self._objects.items()
|
|
91
|
+
return {}
|