ob-metaflow 2.11.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/R.py +10 -7
- metaflow/__init__.py +40 -25
- metaflow/_vendor/imghdr/__init__.py +186 -0
- metaflow/_vendor/importlib_metadata/__init__.py +1063 -0
- metaflow/_vendor/importlib_metadata/_adapters.py +68 -0
- metaflow/_vendor/importlib_metadata/_collections.py +30 -0
- metaflow/_vendor/importlib_metadata/_compat.py +71 -0
- metaflow/_vendor/importlib_metadata/_functools.py +104 -0
- metaflow/_vendor/importlib_metadata/_itertools.py +73 -0
- metaflow/_vendor/importlib_metadata/_meta.py +48 -0
- metaflow/_vendor/importlib_metadata/_text.py +99 -0
- metaflow/_vendor/importlib_metadata/py.typed +0 -0
- metaflow/_vendor/typeguard/__init__.py +48 -0
- metaflow/_vendor/typeguard/_checkers.py +1070 -0
- metaflow/_vendor/typeguard/_config.py +108 -0
- metaflow/_vendor/typeguard/_decorators.py +233 -0
- metaflow/_vendor/typeguard/_exceptions.py +42 -0
- metaflow/_vendor/typeguard/_functions.py +308 -0
- metaflow/_vendor/typeguard/_importhook.py +213 -0
- metaflow/_vendor/typeguard/_memo.py +48 -0
- metaflow/_vendor/typeguard/_pytest_plugin.py +127 -0
- metaflow/_vendor/typeguard/_suppression.py +86 -0
- metaflow/_vendor/typeguard/_transformer.py +1229 -0
- metaflow/_vendor/typeguard/_union_transformer.py +55 -0
- metaflow/_vendor/typeguard/_utils.py +173 -0
- metaflow/_vendor/typeguard/py.typed +0 -0
- metaflow/_vendor/typing_extensions.py +3641 -0
- metaflow/_vendor/v3_7/importlib_metadata/__init__.py +1063 -0
- metaflow/_vendor/v3_7/importlib_metadata/_adapters.py +68 -0
- metaflow/_vendor/v3_7/importlib_metadata/_collections.py +30 -0
- metaflow/_vendor/v3_7/importlib_metadata/_compat.py +71 -0
- metaflow/_vendor/v3_7/importlib_metadata/_functools.py +104 -0
- metaflow/_vendor/v3_7/importlib_metadata/_itertools.py +73 -0
- metaflow/_vendor/v3_7/importlib_metadata/_meta.py +48 -0
- metaflow/_vendor/v3_7/importlib_metadata/_text.py +99 -0
- metaflow/_vendor/v3_7/importlib_metadata/py.typed +0 -0
- metaflow/_vendor/v3_7/typeguard/__init__.py +48 -0
- metaflow/_vendor/v3_7/typeguard/_checkers.py +906 -0
- metaflow/_vendor/v3_7/typeguard/_config.py +108 -0
- metaflow/_vendor/v3_7/typeguard/_decorators.py +237 -0
- metaflow/_vendor/v3_7/typeguard/_exceptions.py +42 -0
- metaflow/_vendor/v3_7/typeguard/_functions.py +310 -0
- metaflow/_vendor/v3_7/typeguard/_importhook.py +213 -0
- metaflow/_vendor/v3_7/typeguard/_memo.py +48 -0
- metaflow/_vendor/v3_7/typeguard/_pytest_plugin.py +100 -0
- metaflow/_vendor/v3_7/typeguard/_suppression.py +88 -0
- metaflow/_vendor/v3_7/typeguard/_transformer.py +1207 -0
- metaflow/_vendor/v3_7/typeguard/_union_transformer.py +54 -0
- metaflow/_vendor/v3_7/typeguard/_utils.py +169 -0
- metaflow/_vendor/v3_7/typeguard/py.typed +0 -0
- metaflow/_vendor/v3_7/typing_extensions.py +3072 -0
- metaflow/_vendor/yaml/__init__.py +427 -0
- metaflow/_vendor/yaml/composer.py +139 -0
- metaflow/_vendor/yaml/constructor.py +748 -0
- metaflow/_vendor/yaml/cyaml.py +101 -0
- metaflow/_vendor/yaml/dumper.py +62 -0
- metaflow/_vendor/yaml/emitter.py +1137 -0
- metaflow/_vendor/yaml/error.py +75 -0
- metaflow/_vendor/yaml/events.py +86 -0
- metaflow/_vendor/yaml/loader.py +63 -0
- metaflow/_vendor/yaml/nodes.py +49 -0
- metaflow/_vendor/yaml/parser.py +589 -0
- metaflow/_vendor/yaml/reader.py +185 -0
- metaflow/_vendor/yaml/representer.py +389 -0
- metaflow/_vendor/yaml/resolver.py +227 -0
- metaflow/_vendor/yaml/scanner.py +1435 -0
- metaflow/_vendor/yaml/serializer.py +111 -0
- metaflow/_vendor/yaml/tokens.py +104 -0
- metaflow/cards.py +5 -0
- metaflow/cli.py +331 -785
- metaflow/cli_args.py +17 -0
- metaflow/cli_components/__init__.py +0 -0
- metaflow/cli_components/dump_cmd.py +96 -0
- metaflow/cli_components/init_cmd.py +52 -0
- metaflow/cli_components/run_cmds.py +546 -0
- metaflow/cli_components/step_cmd.py +334 -0
- metaflow/cli_components/utils.py +140 -0
- metaflow/client/__init__.py +1 -0
- metaflow/client/core.py +467 -73
- metaflow/client/filecache.py +75 -35
- metaflow/clone_util.py +7 -1
- metaflow/cmd/code/__init__.py +231 -0
- metaflow/cmd/develop/stub_generator.py +756 -288
- metaflow/cmd/develop/stubs.py +12 -28
- metaflow/cmd/main_cli.py +6 -4
- metaflow/cmd/make_wrapper.py +78 -0
- metaflow/datastore/__init__.py +1 -0
- metaflow/datastore/content_addressed_store.py +41 -10
- metaflow/datastore/datastore_set.py +11 -2
- metaflow/datastore/flow_datastore.py +156 -10
- metaflow/datastore/spin_datastore.py +91 -0
- metaflow/datastore/task_datastore.py +154 -39
- metaflow/debug.py +5 -0
- metaflow/decorators.py +404 -78
- metaflow/exception.py +8 -2
- metaflow/extension_support/__init__.py +527 -376
- metaflow/extension_support/_empty_file.py +2 -2
- metaflow/extension_support/plugins.py +49 -31
- metaflow/flowspec.py +482 -33
- metaflow/graph.py +210 -42
- metaflow/includefile.py +84 -40
- metaflow/lint.py +141 -22
- metaflow/meta_files.py +13 -0
- metaflow/{metadata → metadata_provider}/heartbeat.py +24 -8
- metaflow/{metadata → metadata_provider}/metadata.py +86 -1
- metaflow/metaflow_config.py +175 -28
- metaflow/metaflow_config_funcs.py +51 -3
- metaflow/metaflow_current.py +4 -10
- metaflow/metaflow_environment.py +139 -53
- metaflow/metaflow_git.py +115 -0
- metaflow/metaflow_profile.py +18 -0
- metaflow/metaflow_version.py +150 -66
- metaflow/mflog/__init__.py +4 -3
- metaflow/mflog/save_logs.py +2 -2
- metaflow/multicore_utils.py +31 -14
- metaflow/package/__init__.py +673 -0
- metaflow/packaging_sys/__init__.py +880 -0
- metaflow/packaging_sys/backend.py +128 -0
- metaflow/packaging_sys/distribution_support.py +153 -0
- metaflow/packaging_sys/tar_backend.py +99 -0
- metaflow/packaging_sys/utils.py +54 -0
- metaflow/packaging_sys/v1.py +527 -0
- metaflow/parameters.py +149 -28
- metaflow/plugins/__init__.py +74 -5
- metaflow/plugins/airflow/airflow.py +40 -25
- metaflow/plugins/airflow/airflow_cli.py +22 -5
- metaflow/plugins/airflow/airflow_decorator.py +1 -1
- metaflow/plugins/airflow/airflow_utils.py +5 -3
- metaflow/plugins/airflow/sensors/base_sensor.py +4 -4
- metaflow/plugins/airflow/sensors/external_task_sensor.py +2 -2
- metaflow/plugins/airflow/sensors/s3_sensor.py +2 -2
- metaflow/plugins/argo/argo_client.py +78 -33
- metaflow/plugins/argo/argo_events.py +6 -6
- metaflow/plugins/argo/argo_workflows.py +2410 -527
- metaflow/plugins/argo/argo_workflows_cli.py +571 -121
- metaflow/plugins/argo/argo_workflows_decorator.py +43 -12
- metaflow/plugins/argo/argo_workflows_deployer.py +106 -0
- metaflow/plugins/argo/argo_workflows_deployer_objects.py +453 -0
- metaflow/plugins/argo/capture_error.py +73 -0
- metaflow/plugins/argo/conditional_input_paths.py +35 -0
- metaflow/plugins/argo/exit_hooks.py +209 -0
- metaflow/plugins/argo/jobset_input_paths.py +15 -0
- metaflow/plugins/argo/param_val.py +19 -0
- metaflow/plugins/aws/aws_client.py +10 -3
- metaflow/plugins/aws/aws_utils.py +55 -2
- metaflow/plugins/aws/batch/batch.py +72 -5
- metaflow/plugins/aws/batch/batch_cli.py +33 -10
- metaflow/plugins/aws/batch/batch_client.py +4 -3
- metaflow/plugins/aws/batch/batch_decorator.py +102 -35
- metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
- metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
- metaflow/plugins/aws/step_functions/production_token.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions.py +65 -8
- metaflow/plugins/aws/step_functions/step_functions_cli.py +101 -7
- metaflow/plugins/aws/step_functions/step_functions_decorator.py +1 -2
- metaflow/plugins/aws/step_functions/step_functions_deployer.py +97 -0
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +264 -0
- metaflow/plugins/azure/azure_exceptions.py +1 -1
- metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
- metaflow/plugins/azure/azure_tail.py +1 -1
- metaflow/plugins/azure/includefile_support.py +2 -0
- metaflow/plugins/cards/card_cli.py +66 -30
- metaflow/plugins/cards/card_creator.py +25 -1
- metaflow/plugins/cards/card_datastore.py +21 -49
- metaflow/plugins/cards/card_decorator.py +132 -8
- metaflow/plugins/cards/card_modules/basic.py +112 -17
- metaflow/plugins/cards/card_modules/bundle.css +1 -1
- metaflow/plugins/cards/card_modules/card.py +16 -1
- metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
- metaflow/plugins/cards/card_modules/components.py +665 -28
- metaflow/plugins/cards/card_modules/convert_to_native_type.py +36 -7
- metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
- metaflow/plugins/cards/card_modules/main.css +1 -0
- metaflow/plugins/cards/card_modules/main.js +68 -49
- metaflow/plugins/cards/card_modules/renderer_tools.py +1 -0
- metaflow/plugins/cards/card_modules/test_cards.py +26 -12
- metaflow/plugins/cards/card_server.py +39 -14
- metaflow/plugins/cards/component_serializer.py +2 -9
- metaflow/plugins/cards/metadata.py +22 -0
- metaflow/plugins/catch_decorator.py +9 -0
- metaflow/plugins/datastores/azure_storage.py +10 -1
- metaflow/plugins/datastores/gs_storage.py +6 -2
- metaflow/plugins/datastores/local_storage.py +12 -6
- metaflow/plugins/datastores/spin_storage.py +12 -0
- metaflow/plugins/datatools/local.py +2 -0
- metaflow/plugins/datatools/s3/s3.py +126 -75
- metaflow/plugins/datatools/s3/s3op.py +254 -121
- metaflow/plugins/env_escape/__init__.py +3 -3
- metaflow/plugins/env_escape/client_modules.py +102 -72
- metaflow/plugins/env_escape/server.py +7 -0
- metaflow/plugins/env_escape/stub.py +24 -5
- metaflow/plugins/events_decorator.py +343 -185
- metaflow/plugins/exit_hook/__init__.py +0 -0
- metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
- metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
- metaflow/plugins/gcp/__init__.py +1 -1
- metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +11 -6
- metaflow/plugins/gcp/gs_tail.py +10 -6
- metaflow/plugins/gcp/includefile_support.py +3 -0
- metaflow/plugins/kubernetes/kube_utils.py +108 -0
- metaflow/plugins/kubernetes/kubernetes.py +411 -130
- metaflow/plugins/kubernetes/kubernetes_cli.py +168 -36
- metaflow/plugins/kubernetes/kubernetes_client.py +104 -2
- metaflow/plugins/kubernetes/kubernetes_decorator.py +246 -88
- metaflow/plugins/kubernetes/kubernetes_job.py +253 -581
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +1071 -0
- metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
- metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
- metaflow/plugins/logs_cli.py +359 -0
- metaflow/plugins/{metadata → metadata_providers}/local.py +144 -84
- metaflow/plugins/{metadata → metadata_providers}/service.py +103 -26
- metaflow/plugins/metadata_providers/spin.py +16 -0
- metaflow/plugins/package_cli.py +36 -24
- metaflow/plugins/parallel_decorator.py +128 -11
- metaflow/plugins/parsers.py +16 -0
- metaflow/plugins/project_decorator.py +51 -5
- metaflow/plugins/pypi/bootstrap.py +357 -105
- metaflow/plugins/pypi/conda_decorator.py +82 -81
- metaflow/plugins/pypi/conda_environment.py +187 -52
- metaflow/plugins/pypi/micromamba.py +157 -47
- metaflow/plugins/pypi/parsers.py +268 -0
- metaflow/plugins/pypi/pip.py +88 -13
- metaflow/plugins/pypi/pypi_decorator.py +37 -1
- metaflow/plugins/pypi/utils.py +48 -2
- metaflow/plugins/resources_decorator.py +2 -2
- metaflow/plugins/secrets/__init__.py +3 -0
- metaflow/plugins/secrets/secrets_decorator.py +26 -181
- metaflow/plugins/secrets/secrets_func.py +49 -0
- metaflow/plugins/secrets/secrets_spec.py +101 -0
- metaflow/plugins/secrets/utils.py +74 -0
- metaflow/plugins/tag_cli.py +4 -7
- metaflow/plugins/test_unbounded_foreach_decorator.py +41 -6
- metaflow/plugins/timeout_decorator.py +3 -3
- metaflow/plugins/uv/__init__.py +0 -0
- metaflow/plugins/uv/bootstrap.py +128 -0
- metaflow/plugins/uv/uv_environment.py +72 -0
- metaflow/procpoll.py +1 -1
- metaflow/pylint_wrapper.py +5 -1
- metaflow/runner/__init__.py +0 -0
- metaflow/runner/click_api.py +717 -0
- metaflow/runner/deployer.py +470 -0
- metaflow/runner/deployer_impl.py +201 -0
- metaflow/runner/metaflow_runner.py +714 -0
- metaflow/runner/nbdeploy.py +132 -0
- metaflow/runner/nbrun.py +225 -0
- metaflow/runner/subprocess_manager.py +650 -0
- metaflow/runner/utils.py +335 -0
- metaflow/runtime.py +1078 -260
- metaflow/sidecar/sidecar_worker.py +1 -1
- metaflow/system/__init__.py +5 -0
- metaflow/system/system_logger.py +85 -0
- metaflow/system/system_monitor.py +108 -0
- metaflow/system/system_utils.py +19 -0
- metaflow/task.py +521 -225
- metaflow/tracing/__init__.py +7 -7
- metaflow/tracing/span_exporter.py +31 -38
- metaflow/tracing/tracing_modules.py +38 -43
- metaflow/tuple_util.py +27 -0
- metaflow/user_configs/__init__.py +0 -0
- metaflow/user_configs/config_options.py +563 -0
- metaflow/user_configs/config_parameters.py +598 -0
- metaflow/user_decorators/__init__.py +0 -0
- metaflow/user_decorators/common.py +144 -0
- metaflow/user_decorators/mutable_flow.py +512 -0
- metaflow/user_decorators/mutable_step.py +424 -0
- metaflow/user_decorators/user_flow_decorator.py +264 -0
- metaflow/user_decorators/user_step_decorator.py +749 -0
- metaflow/util.py +243 -27
- metaflow/vendor.py +23 -7
- metaflow/version.py +1 -1
- ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Makefile +355 -0
- ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Tiltfile +726 -0
- ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/pick_services.sh +105 -0
- ob_metaflow-2.19.7.1rc0.dist-info/METADATA +87 -0
- ob_metaflow-2.19.7.1rc0.dist-info/RECORD +445 -0
- {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
- {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +1 -0
- metaflow/_vendor/v3_5/__init__.py +0 -1
- metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
- metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
- metaflow/package.py +0 -188
- ob_metaflow-2.11.13.1.dist-info/METADATA +0 -85
- ob_metaflow-2.11.13.1.dist-info/RECORD +0 -308
- /metaflow/_vendor/{v3_5/zipp.py → zipp.py} +0 -0
- /metaflow/{metadata → metadata_provider}/__init__.py +0 -0
- /metaflow/{metadata → metadata_provider}/util.py +0 -0
- /metaflow/plugins/{metadata → metadata_providers}/__init__.py +0 -0
- {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info/licenses}/LICENSE +0 -0
- {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
metaflow/client/filecache.py
CHANGED
|
@@ -1,20 +1,24 @@
|
|
|
1
1
|
from __future__ import print_function
|
|
2
2
|
from collections import OrderedDict
|
|
3
|
+
import json
|
|
3
4
|
import os
|
|
4
5
|
import sys
|
|
5
6
|
import time
|
|
6
7
|
from tempfile import NamedTemporaryFile
|
|
7
8
|
from hashlib import sha1
|
|
8
9
|
|
|
10
|
+
from urllib.parse import urlparse
|
|
11
|
+
|
|
9
12
|
from metaflow.datastore import FlowDataStore
|
|
10
13
|
from metaflow.datastore.content_addressed_store import BlobCache
|
|
14
|
+
from metaflow.datastore.flow_datastore import MetadataCache
|
|
11
15
|
from metaflow.exception import MetaflowException
|
|
12
16
|
from metaflow.metaflow_config import (
|
|
13
17
|
CLIENT_CACHE_PATH,
|
|
14
18
|
CLIENT_CACHE_MAX_SIZE,
|
|
15
19
|
CLIENT_CACHE_MAX_FLOWDATASTORE_COUNT,
|
|
16
|
-
CLIENT_CACHE_MAX_TASKDATASTORE_COUNT,
|
|
17
20
|
)
|
|
21
|
+
from metaflow.metaflow_profile import from_start
|
|
18
22
|
|
|
19
23
|
from metaflow.plugins import DATASTORES
|
|
20
24
|
|
|
@@ -61,8 +65,8 @@ class FileCache(object):
|
|
|
61
65
|
# when querying for sizes of artifacts. Once we have queried for the size
|
|
62
66
|
# of one artifact in a TaskDatastore, caching this means that any
|
|
63
67
|
# queries on that same TaskDatastore will be quick (since we already
|
|
64
|
-
# have all the metadata)
|
|
65
|
-
|
|
68
|
+
# have all the metadata). We keep track of this in a file so it persists
|
|
69
|
+
# across processes.
|
|
66
70
|
|
|
67
71
|
@property
|
|
68
72
|
def cache_dir(self):
|
|
@@ -83,10 +87,9 @@ class FileCache(object):
|
|
|
83
87
|
def get_log_legacy(
|
|
84
88
|
self, ds_type, location, logtype, attempt, flow_name, run_id, step_name, task_id
|
|
85
89
|
):
|
|
86
|
-
|
|
87
90
|
ds_cls = self._get_datastore_storage_impl(ds_type)
|
|
88
91
|
ds_root = ds_cls.path_join(*ds_cls.path_split(location)[:-5])
|
|
89
|
-
cache_id = self.
|
|
92
|
+
cache_id = self.flow_ds_id(ds_type, ds_root, flow_name)
|
|
90
93
|
|
|
91
94
|
token = (
|
|
92
95
|
"%s.cached"
|
|
@@ -310,13 +313,25 @@ class FileCache(object):
|
|
|
310
313
|
self._objects = sorted(objects, reverse=False)
|
|
311
314
|
|
|
312
315
|
@staticmethod
|
|
313
|
-
def
|
|
314
|
-
|
|
316
|
+
def flow_ds_id(ds_type, ds_root, flow_name):
|
|
317
|
+
p = urlparse(ds_root)
|
|
318
|
+
sanitized_root = (p.netloc + p.path).replace("/", "_")
|
|
319
|
+
return ".".join([ds_type, sanitized_root, flow_name])
|
|
315
320
|
|
|
316
321
|
@staticmethod
|
|
317
|
-
def
|
|
322
|
+
def task_ds_id(ds_type, ds_root, flow_name, run_id, step_name, task_id, attempt):
|
|
323
|
+
p = urlparse(ds_root)
|
|
324
|
+
sanitized_root = (p.netloc + p.path).replace("/", "_")
|
|
318
325
|
return ".".join(
|
|
319
|
-
[
|
|
326
|
+
[
|
|
327
|
+
ds_type,
|
|
328
|
+
sanitized_root,
|
|
329
|
+
flow_name,
|
|
330
|
+
run_id,
|
|
331
|
+
step_name,
|
|
332
|
+
task_id,
|
|
333
|
+
str(attempt),
|
|
334
|
+
]
|
|
320
335
|
)
|
|
321
336
|
|
|
322
337
|
def _garbage_collect(self):
|
|
@@ -352,7 +367,7 @@ class FileCache(object):
|
|
|
352
367
|
return storage_impl[0]
|
|
353
368
|
|
|
354
369
|
def _get_flow_datastore(self, ds_type, ds_root, flow_name):
|
|
355
|
-
cache_id = self.
|
|
370
|
+
cache_id = self.flow_ds_id(ds_type, ds_root, flow_name)
|
|
356
371
|
cached_flow_datastore = self._store_caches.get(cache_id)
|
|
357
372
|
|
|
358
373
|
if cached_flow_datastore:
|
|
@@ -367,9 +382,14 @@ class FileCache(object):
|
|
|
367
382
|
ds_root=ds_root,
|
|
368
383
|
)
|
|
369
384
|
blob_cache = self._blob_caches.setdefault(
|
|
370
|
-
cache_id,
|
|
385
|
+
cache_id,
|
|
386
|
+
(
|
|
387
|
+
FileBlobCache(self, cache_id),
|
|
388
|
+
TaskMetadataCache(self, ds_type, ds_root, flow_name),
|
|
389
|
+
),
|
|
371
390
|
)
|
|
372
|
-
cached_flow_datastore.ca_store.set_blob_cache(blob_cache)
|
|
391
|
+
cached_flow_datastore.ca_store.set_blob_cache(blob_cache[0])
|
|
392
|
+
cached_flow_datastore.set_metadata_cache(blob_cache[1])
|
|
373
393
|
self._store_caches[cache_id] = cached_flow_datastore
|
|
374
394
|
if len(self._store_caches) > CLIENT_CACHE_MAX_FLOWDATASTORE_COUNT:
|
|
375
395
|
cache_id_to_remove, _ = self._store_caches.popitem(last=False)
|
|
@@ -380,32 +400,52 @@ class FileCache(object):
|
|
|
380
400
|
self, ds_type, ds_root, flow_name, run_id, step_name, task_id, attempt
|
|
381
401
|
):
|
|
382
402
|
flow_ds = self._get_flow_datastore(ds_type, ds_root, flow_name)
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
403
|
+
|
|
404
|
+
return flow_ds.get_task_datastore(run_id, step_name, task_id, attempt=attempt)
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
class TaskMetadataCache(MetadataCache):
|
|
408
|
+
def __init__(self, filecache, ds_type, ds_root, flow_name):
|
|
409
|
+
self._filecache = filecache
|
|
410
|
+
self._ds_type = ds_type
|
|
411
|
+
self._ds_root = ds_root
|
|
412
|
+
self._flow_name = flow_name
|
|
413
|
+
|
|
414
|
+
def _path(self, run_id, step_name, task_id, attempt):
|
|
415
|
+
if attempt is None:
|
|
416
|
+
raise MetaflowException(
|
|
417
|
+
"Attempt number must be specified to use task metadata cache. Raise an issue "
|
|
418
|
+
"on Metaflow GitHub if you see this message.",
|
|
387
419
|
)
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
data_metadata=cached_metadata,
|
|
397
|
-
)
|
|
398
|
-
# If we are here, we either have attempt=None or nothing in the cache
|
|
399
|
-
task_ds = flow_ds.get_task_datastore(
|
|
400
|
-
run_id, step_name, task_id, attempt=attempt
|
|
420
|
+
cache_id = self._filecache.task_ds_id(
|
|
421
|
+
self._ds_type,
|
|
422
|
+
self._ds_root,
|
|
423
|
+
self._flow_name,
|
|
424
|
+
run_id,
|
|
425
|
+
step_name,
|
|
426
|
+
task_id,
|
|
427
|
+
attempt,
|
|
401
428
|
)
|
|
402
|
-
|
|
403
|
-
|
|
429
|
+
token = (
|
|
430
|
+
"%s.cached"
|
|
431
|
+
% sha1(
|
|
432
|
+
os.path.join(
|
|
433
|
+
run_id, step_name, task_id, str(attempt), "metadata"
|
|
434
|
+
).encode("utf-8")
|
|
435
|
+
).hexdigest()
|
|
436
|
+
)
|
|
437
|
+
return os.path.join(self._filecache.cache_dir, cache_id, token[:2], token)
|
|
438
|
+
|
|
439
|
+
def load_metadata(self, run_id, step_name, task_id, attempt):
|
|
440
|
+
d = self._filecache.read_file(self._path(run_id, step_name, task_id, attempt))
|
|
441
|
+
if d:
|
|
442
|
+
return json.loads(d)
|
|
443
|
+
|
|
444
|
+
def store_metadata(self, run_id, step_name, task_id, attempt, metadata_dict):
|
|
445
|
+
self._filecache.create_file(
|
|
446
|
+
self._path(run_id, step_name, task_id, attempt),
|
|
447
|
+
json.dumps(metadata_dict).encode("utf-8"),
|
|
404
448
|
)
|
|
405
|
-
self._task_metadata_caches[cache_id] = task_ds.ds_metadata
|
|
406
|
-
if len(self._task_metadata_caches) > CLIENT_CACHE_MAX_TASKDATASTORE_COUNT:
|
|
407
|
-
self._task_metadata_caches.popitem(last=False)
|
|
408
|
-
return task_ds
|
|
409
449
|
|
|
410
450
|
|
|
411
451
|
class FileBlobCache(BlobCache):
|
metaflow/clone_util.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import time
|
|
2
|
-
from .
|
|
2
|
+
from .metadata_provider import MetaDatum
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
def clone_task_helper(
|
|
@@ -66,6 +66,12 @@ def clone_task_helper(
|
|
|
66
66
|
type="attempt",
|
|
67
67
|
tags=metadata_tags,
|
|
68
68
|
),
|
|
69
|
+
MetaDatum(
|
|
70
|
+
field="attempt_ok",
|
|
71
|
+
value="True", # During clone, the task is always considered successful.
|
|
72
|
+
type="internal_attempt_status",
|
|
73
|
+
tags=metadata_tags,
|
|
74
|
+
),
|
|
69
75
|
],
|
|
70
76
|
)
|
|
71
77
|
output.done()
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import shutil
|
|
3
|
+
import sys
|
|
4
|
+
from subprocess import PIPE, CompletedProcess, run
|
|
5
|
+
from tempfile import TemporaryDirectory
|
|
6
|
+
from typing import Any, Callable, List, Mapping, Optional, cast
|
|
7
|
+
|
|
8
|
+
from metaflow import Run
|
|
9
|
+
from metaflow.util import walk_without_cycles
|
|
10
|
+
from metaflow._vendor import click
|
|
11
|
+
from metaflow.cli import echo_always
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@click.group()
|
|
15
|
+
def cli():
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@cli.group(help="Access, compare, and manage code associated with Metaflow runs.")
|
|
20
|
+
def code():
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def echo(line: str) -> None:
|
|
25
|
+
echo_always(line, err=True, fg="magenta")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def extract_code_package(runspec: str) -> TemporaryDirectory:
|
|
29
|
+
try:
|
|
30
|
+
mf_run = Run(runspec, _namespace_check=False)
|
|
31
|
+
echo(f"✅ Run *{runspec}* found, downloading code..")
|
|
32
|
+
except Exception as e:
|
|
33
|
+
echo(f"❌ Run **{runspec}** not found")
|
|
34
|
+
raise e
|
|
35
|
+
|
|
36
|
+
if mf_run.code is None:
|
|
37
|
+
echo(
|
|
38
|
+
f"❌ Run **{runspec}** doesn't have a code package. Maybe it's a local run?"
|
|
39
|
+
)
|
|
40
|
+
raise RuntimeError("no code package found")
|
|
41
|
+
|
|
42
|
+
return mf_run.code.extract()
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def perform_diff(
|
|
46
|
+
source_dir: str,
|
|
47
|
+
target_dir: Optional[str] = None,
|
|
48
|
+
output: bool = False,
|
|
49
|
+
**kwargs: Mapping[str, Any],
|
|
50
|
+
) -> Optional[List[str]]:
|
|
51
|
+
if target_dir is None:
|
|
52
|
+
target_dir = os.getcwd()
|
|
53
|
+
|
|
54
|
+
diffs = []
|
|
55
|
+
for dirpath, _, filenames in walk_without_cycles(source_dir):
|
|
56
|
+
for fname in filenames:
|
|
57
|
+
# NOTE: the paths below need to be set up carefully
|
|
58
|
+
# for the `patch` command to work. Better not to touch
|
|
59
|
+
# the directories below. If you must, test that patches
|
|
60
|
+
# work after your changes.
|
|
61
|
+
#
|
|
62
|
+
# target_file is the git repo in the current working directory
|
|
63
|
+
rel = os.path.relpath(dirpath, source_dir)
|
|
64
|
+
target_file = os.path.join(rel, fname)
|
|
65
|
+
# source_file is the run file loaded in a tmp directory
|
|
66
|
+
source_file = os.path.join(dirpath, fname)
|
|
67
|
+
|
|
68
|
+
if sys.stdout.isatty() and not output:
|
|
69
|
+
color = ["--color"]
|
|
70
|
+
else:
|
|
71
|
+
color = ["--no-color"]
|
|
72
|
+
|
|
73
|
+
if os.path.exists(os.path.join(target_dir, target_file)):
|
|
74
|
+
cmd = (
|
|
75
|
+
["git", "diff", "--no-index", "--exit-code"]
|
|
76
|
+
+ color
|
|
77
|
+
+ [
|
|
78
|
+
target_file,
|
|
79
|
+
source_file,
|
|
80
|
+
]
|
|
81
|
+
)
|
|
82
|
+
result: CompletedProcess = run(
|
|
83
|
+
cmd, text=True, stdout=PIPE, cwd=target_dir
|
|
84
|
+
)
|
|
85
|
+
if result.returncode == 0:
|
|
86
|
+
if not output:
|
|
87
|
+
echo(f"✅ {target_file} is identical, skipping")
|
|
88
|
+
continue
|
|
89
|
+
|
|
90
|
+
if output:
|
|
91
|
+
diffs.append(result.stdout)
|
|
92
|
+
else:
|
|
93
|
+
run(["less", "-R"], input=result.stdout, text=True)
|
|
94
|
+
else:
|
|
95
|
+
if not output:
|
|
96
|
+
echo(f"❗ {target_file} not in the target directory, skipping")
|
|
97
|
+
return diffs if output else None
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def run_op(
|
|
101
|
+
runspec: str, op: Callable[..., Optional[List[str]]], **op_args: Mapping[str, Any]
|
|
102
|
+
) -> Optional[List[str]]:
|
|
103
|
+
tmp = None
|
|
104
|
+
try:
|
|
105
|
+
tmp = extract_code_package(runspec)
|
|
106
|
+
return op(tmp.name, **op_args)
|
|
107
|
+
finally:
|
|
108
|
+
if tmp and os.path.exists(tmp.name):
|
|
109
|
+
shutil.rmtree(tmp.name)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def run_op_diff_runs(
|
|
113
|
+
source_run_pathspec: str, target_run_pathspec: str, **op_args: Mapping[str, Any]
|
|
114
|
+
) -> Optional[List[str]]:
|
|
115
|
+
source_tmp = None
|
|
116
|
+
target_tmp = None
|
|
117
|
+
try:
|
|
118
|
+
source_tmp = extract_code_package(source_run_pathspec)
|
|
119
|
+
target_tmp = extract_code_package(target_run_pathspec)
|
|
120
|
+
return perform_diff(source_tmp.name, target_tmp.name, **op_args)
|
|
121
|
+
finally:
|
|
122
|
+
for d in [source_tmp, target_tmp]:
|
|
123
|
+
if d and os.path.exists(d.name):
|
|
124
|
+
shutil.rmtree(d.name)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def op_diff(tmpdir: str, **kwargs: Mapping[str, Any]) -> Optional[List[str]]:
|
|
128
|
+
kwargs_dict = dict(kwargs)
|
|
129
|
+
target_dir = cast(Optional[str], kwargs_dict.pop("target_dir", None))
|
|
130
|
+
output: bool = bool(kwargs_dict.pop("output", False))
|
|
131
|
+
op_args: Mapping[str, Any] = {**kwargs_dict}
|
|
132
|
+
return perform_diff(tmpdir, target_dir=target_dir, output=output, **op_args)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def op_pull(tmpdir: str, dst: str, **op_args: Mapping[str, Any]) -> None:
|
|
136
|
+
if os.path.exists(dst):
|
|
137
|
+
echo(f"❌ Directory *{dst}* already exists")
|
|
138
|
+
else:
|
|
139
|
+
shutil.move(tmpdir, dst)
|
|
140
|
+
echo(f"Code downloaded to *{dst}*")
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def op_patch(tmpdir: str, dst: str, **kwargs: Mapping[str, Any]) -> None:
|
|
144
|
+
diffs = perform_diff(tmpdir, output=True) or []
|
|
145
|
+
with open(dst, "w", encoding="utf-8") as f:
|
|
146
|
+
for out in diffs:
|
|
147
|
+
out = out.replace(tmpdir, "/.")
|
|
148
|
+
out = out.replace("+++ b/./", "+++ b/")
|
|
149
|
+
out = out.replace("--- b/./", "--- b/")
|
|
150
|
+
out = out.replace("--- a/./", "--- a/")
|
|
151
|
+
out = out.replace("+++ a/./", "+++ a/")
|
|
152
|
+
f.write(out)
|
|
153
|
+
echo(f"Patch saved in *{dst}*")
|
|
154
|
+
path = run(
|
|
155
|
+
["git", "rev-parse", "--show-prefix"], text=True, stdout=PIPE
|
|
156
|
+
).stdout.strip()
|
|
157
|
+
if path:
|
|
158
|
+
diropt = f" --directory={path.rstrip('/')}"
|
|
159
|
+
else:
|
|
160
|
+
diropt = ""
|
|
161
|
+
echo("Apply the patch by running:")
|
|
162
|
+
echo_always(
|
|
163
|
+
f"git apply --verbose{diropt} {dst}", highlight=True, bold=True, err=True
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
@code.command()
|
|
168
|
+
@click.argument("run_pathspec")
|
|
169
|
+
def diff(run_pathspec: str, **kwargs: Mapping[str, Any]) -> None:
|
|
170
|
+
"""
|
|
171
|
+
Do a 'git diff' of the current directory and a Metaflow run.
|
|
172
|
+
"""
|
|
173
|
+
_ = run_op(run_pathspec, op_diff, **kwargs)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
@code.command()
|
|
177
|
+
@click.argument("source_run_pathspec")
|
|
178
|
+
@click.argument("target_run_pathspec")
|
|
179
|
+
def diff_runs(
|
|
180
|
+
source_run_pathspec: str, target_run_pathspec: str, **kwargs: Mapping[str, Any]
|
|
181
|
+
) -> None:
|
|
182
|
+
"""
|
|
183
|
+
Do a 'git diff' between two Metaflow runs.
|
|
184
|
+
"""
|
|
185
|
+
_ = run_op_diff_runs(source_run_pathspec, target_run_pathspec, **kwargs)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
@code.command()
|
|
189
|
+
@click.argument("run_pathspec")
|
|
190
|
+
@click.option(
|
|
191
|
+
"--dir", help="Destination directory (default: {run_pathspec}_code)", default=None
|
|
192
|
+
)
|
|
193
|
+
def pull(
|
|
194
|
+
run_pathspec: str, dir: Optional[str] = None, **kwargs: Mapping[str, Any]
|
|
195
|
+
) -> None:
|
|
196
|
+
"""
|
|
197
|
+
Pull the code of a Metaflow run.
|
|
198
|
+
"""
|
|
199
|
+
if dir is None:
|
|
200
|
+
dir = run_pathspec.lower().replace("/", "_") + "_code"
|
|
201
|
+
op_args: Mapping[str, Any] = {**kwargs, "dst": dir}
|
|
202
|
+
run_op(run_pathspec, op_pull, **op_args)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
@code.command()
|
|
206
|
+
@click.argument("run_pathspec")
|
|
207
|
+
@click.option(
|
|
208
|
+
"--file_path",
|
|
209
|
+
help="Patch file name. If not provided, defaults to a sanitized version of RUN_PATHSPEC "
|
|
210
|
+
"with slashes replaced by underscores, plus '.patch'.",
|
|
211
|
+
show_default=False,
|
|
212
|
+
)
|
|
213
|
+
@click.option(
|
|
214
|
+
"--overwrite", is_flag=True, help="Overwrite the patch file if it exists."
|
|
215
|
+
)
|
|
216
|
+
def patch(
|
|
217
|
+
run_pathspec: str,
|
|
218
|
+
file_path: Optional[str] = None,
|
|
219
|
+
overwrite: bool = False,
|
|
220
|
+
**kwargs: Mapping[str, Any],
|
|
221
|
+
) -> None:
|
|
222
|
+
"""
|
|
223
|
+
Create a patch by comparing current dir with a Metaflow run.
|
|
224
|
+
"""
|
|
225
|
+
if file_path is None:
|
|
226
|
+
file_path = run_pathspec.lower().replace("/", "_") + ".patch"
|
|
227
|
+
if os.path.exists(file_path) and not overwrite:
|
|
228
|
+
echo(f"File *{file_path}* already exists. To overwrite, specify --overwrite.")
|
|
229
|
+
return
|
|
230
|
+
op_args: Mapping[str, Any] = {**kwargs, "dst": file_path}
|
|
231
|
+
run_op(run_pathspec, op_patch, **op_args)
|