dkist-processing-common 10.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- changelog/.gitempty +0 -0
- dkist_processing_common/__init__.py +9 -0
- dkist_processing_common/_util/__init__.py +1 -0
- dkist_processing_common/_util/constants.py +98 -0
- dkist_processing_common/_util/graphql.py +93 -0
- dkist_processing_common/_util/scratch.py +337 -0
- dkist_processing_common/_util/tags.py +232 -0
- dkist_processing_common/codecs/__init__.py +5 -0
- dkist_processing_common/codecs/asdf.py +23 -0
- dkist_processing_common/codecs/bytes.py +17 -0
- dkist_processing_common/codecs/fits.py +63 -0
- dkist_processing_common/codecs/iobase.py +26 -0
- dkist_processing_common/codecs/json.py +22 -0
- dkist_processing_common/codecs/path.py +7 -0
- dkist_processing_common/codecs/quality.py +88 -0
- dkist_processing_common/codecs/str.py +17 -0
- dkist_processing_common/config.py +108 -0
- dkist_processing_common/fonts/Lato-Regular.ttf +0 -0
- dkist_processing_common/fonts/__init__.py +1 -0
- dkist_processing_common/manual.py +190 -0
- dkist_processing_common/models/__init__.py +1 -0
- dkist_processing_common/models/constants.py +171 -0
- dkist_processing_common/models/fits_access.py +131 -0
- dkist_processing_common/models/flower_pot.py +194 -0
- dkist_processing_common/models/graphql.py +187 -0
- dkist_processing_common/models/message.py +65 -0
- dkist_processing_common/models/message_queue_binding.py +25 -0
- dkist_processing_common/models/metric_code.py +25 -0
- dkist_processing_common/models/parameters.py +197 -0
- dkist_processing_common/models/quality.py +76 -0
- dkist_processing_common/models/tags.py +450 -0
- dkist_processing_common/models/task_name.py +20 -0
- dkist_processing_common/models/wavelength.py +28 -0
- dkist_processing_common/parsers/__init__.py +1 -0
- dkist_processing_common/parsers/cs_step.py +183 -0
- dkist_processing_common/parsers/dsps_repeat.py +46 -0
- dkist_processing_common/parsers/experiment_id_bud.py +20 -0
- dkist_processing_common/parsers/id_bud.py +56 -0
- dkist_processing_common/parsers/l0_fits_access.py +30 -0
- dkist_processing_common/parsers/l1_fits_access.py +63 -0
- dkist_processing_common/parsers/near_bud.py +119 -0
- dkist_processing_common/parsers/proposal_id_bud.py +20 -0
- dkist_processing_common/parsers/quality.py +33 -0
- dkist_processing_common/parsers/single_value_single_key_flower.py +50 -0
- dkist_processing_common/parsers/task.py +119 -0
- dkist_processing_common/parsers/time.py +257 -0
- dkist_processing_common/parsers/unique_bud.py +103 -0
- dkist_processing_common/parsers/wavelength.py +15 -0
- dkist_processing_common/tasks/__init__.py +10 -0
- dkist_processing_common/tasks/assemble_movie.py +310 -0
- dkist_processing_common/tasks/base.py +379 -0
- dkist_processing_common/tasks/l1_output_data.py +263 -0
- dkist_processing_common/tasks/mixin/__init__.py +1 -0
- dkist_processing_common/tasks/mixin/globus.py +178 -0
- dkist_processing_common/tasks/mixin/input_dataset.py +166 -0
- dkist_processing_common/tasks/mixin/interservice_bus.py +22 -0
- dkist_processing_common/tasks/mixin/metadata_store.py +340 -0
- dkist_processing_common/tasks/mixin/object_store.py +78 -0
- dkist_processing_common/tasks/mixin/quality/__init__.py +7 -0
- dkist_processing_common/tasks/mixin/quality/_base.py +199 -0
- dkist_processing_common/tasks/mixin/quality/_metrics.py +1225 -0
- dkist_processing_common/tasks/output_data_base.py +104 -0
- dkist_processing_common/tasks/parse_l0_input_data.py +217 -0
- dkist_processing_common/tasks/quality_metrics.py +311 -0
- dkist_processing_common/tasks/teardown.py +74 -0
- dkist_processing_common/tasks/transfer_input_data.py +123 -0
- dkist_processing_common/tasks/trial_catalog.py +199 -0
- dkist_processing_common/tasks/trial_output_data.py +224 -0
- dkist_processing_common/tasks/write_l1.py +494 -0
- dkist_processing_common/tests/__init__.py +0 -0
- dkist_processing_common/tests/conftest.py +772 -0
- dkist_processing_common/tests/test_assemble_movie.py +110 -0
- dkist_processing_common/tests/test_assemble_quality.py +517 -0
- dkist_processing_common/tests/test_base.py +220 -0
- dkist_processing_common/tests/test_codecs.py +664 -0
- dkist_processing_common/tests/test_constants.py +169 -0
- dkist_processing_common/tests/test_cs_step.py +65 -0
- dkist_processing_common/tests/test_fits_access.py +313 -0
- dkist_processing_common/tests/test_flower_pot.py +105 -0
- dkist_processing_common/tests/test_input_dataset.py +527 -0
- dkist_processing_common/tests/test_interservice_bus.py +95 -0
- dkist_processing_common/tests/test_interservice_bus_mixin.py +120 -0
- dkist_processing_common/tests/test_output_data_base.py +93 -0
- dkist_processing_common/tests/test_parameters.py +345 -0
- dkist_processing_common/tests/test_parse_l0_input_data.py +308 -0
- dkist_processing_common/tests/test_publish_catalog_messages.py +81 -0
- dkist_processing_common/tests/test_quality.py +290 -0
- dkist_processing_common/tests/test_quality_mixin.py +1115 -0
- dkist_processing_common/tests/test_scratch.py +468 -0
- dkist_processing_common/tests/test_stems.py +742 -0
- dkist_processing_common/tests/test_submit_dataset_metadata.py +112 -0
- dkist_processing_common/tests/test_tags.py +186 -0
- dkist_processing_common/tests/test_task_name.py +25 -0
- dkist_processing_common/tests/test_task_parsing.py +109 -0
- dkist_processing_common/tests/test_teardown.py +151 -0
- dkist_processing_common/tests/test_transfer_input_data.py +167 -0
- dkist_processing_common/tests/test_transfer_l1_output_data.py +59 -0
- dkist_processing_common/tests/test_trial_catalog.py +184 -0
- dkist_processing_common/tests/test_trial_output_data.py +507 -0
- dkist_processing_common/tests/test_workflow_task_base.py +301 -0
- dkist_processing_common/tests/test_write_l1.py +489 -0
- dkist_processing_common-10.2.2.dist-info/METADATA +174 -0
- dkist_processing_common-10.2.2.dist-info/RECORD +112 -0
- dkist_processing_common-10.2.2.dist-info/WHEEL +5 -0
- dkist_processing_common-10.2.2.dist-info/top_level.txt +4 -0
- docs/Makefile +134 -0
- docs/changelog.rst +7 -0
- docs/conf.py +46 -0
- docs/index.rst +9 -0
- docs/make.bat +170 -0
- docs/requirements.txt +1 -0
- licenses/LICENSE.rst +11 -0
changelog/.gitempty
ADDED
|
File without changes
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""Package providing support classes and methods used by all workflow tasks."""
|
|
2
|
+
from importlib.metadata import PackageNotFoundError
|
|
3
|
+
from importlib.metadata import version
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
__version__ = version(distribution_name=__name__)
|
|
7
|
+
except PackageNotFoundError:
|
|
8
|
+
# package is not installed
|
|
9
|
+
__version__ = "unknown"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Utilities not intended for direct access outside the dkist_processing_common package."""
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Wrapper for interactions with shared database that holds arbitrary data that persists across the entire recipe run."""
|
|
2
|
+
import json
|
|
3
|
+
from collections.abc import MutableMapping
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import Generator
|
|
6
|
+
|
|
7
|
+
from dkist_processing_common._util.tags import TagDB
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ConstantsDb(MutableMapping):
|
|
11
|
+
"""
|
|
12
|
+
Base class defining the constants db.
|
|
13
|
+
|
|
14
|
+
Initialize a connection to the shared database.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
recipe_run_id
|
|
19
|
+
The resipe_run_id
|
|
20
|
+
task_name
|
|
21
|
+
The task name
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, recipe_run_id: int, task_name: str):
|
|
25
|
+
self.store = TagDB(recipe_run_id, task_name, "constant")
|
|
26
|
+
self._audit_db = TagDB(
|
|
27
|
+
recipe_run_id=recipe_run_id, task_name=task_name, namespace="constants_audit"
|
|
28
|
+
)
|
|
29
|
+
self._audit_set_tag = f"SET_{task_name}"
|
|
30
|
+
|
|
31
|
+
@staticmethod
|
|
32
|
+
def extract_value(value: set) -> int | str | float:
|
|
33
|
+
"""
|
|
34
|
+
Pop the first (and only) value from set and convert it from a json string.
|
|
35
|
+
|
|
36
|
+
Parameters
|
|
37
|
+
----------
|
|
38
|
+
value
|
|
39
|
+
The set from which to pop the value
|
|
40
|
+
|
|
41
|
+
Returns
|
|
42
|
+
-------
|
|
43
|
+
The value popped from the set.
|
|
44
|
+
"""
|
|
45
|
+
return json.loads(value.pop())
|
|
46
|
+
|
|
47
|
+
def __getitem__(self, key: str) -> int | str | float | tuple | list:
|
|
48
|
+
"""Return the constant stored at a specific key. Raise and error if the key doesn't exist."""
|
|
49
|
+
if isinstance(key, Enum):
|
|
50
|
+
key = key.value
|
|
51
|
+
value = self.store.all(key)
|
|
52
|
+
if not value:
|
|
53
|
+
raise KeyError(f"Constant {key} does not exist")
|
|
54
|
+
return self.extract_value(value)
|
|
55
|
+
|
|
56
|
+
def __delitem__(self, key: str):
|
|
57
|
+
"""'delete' a key by making it map to an empty set."""
|
|
58
|
+
self.store.clear_tag(key)
|
|
59
|
+
|
|
60
|
+
def __setitem__(self, key: str, value: str | int | float | tuple | list):
|
|
61
|
+
"""Set a constant key with the specified value. Raise an error if the key already exists."""
|
|
62
|
+
if self.store.all(key):
|
|
63
|
+
raise ValueError(f"Constant {key} already exists")
|
|
64
|
+
self.store.add(key, json.dumps(value))
|
|
65
|
+
# audit which constant key had a value set
|
|
66
|
+
self._audit_db.add(tag=self._audit_set_tag, value=key)
|
|
67
|
+
|
|
68
|
+
def __iter__(self) -> Generator[str, None, None]:
|
|
69
|
+
"""Yield the currently defined constants as strings."""
|
|
70
|
+
yield from self.store.tags
|
|
71
|
+
|
|
72
|
+
def __len__(self):
|
|
73
|
+
"""Return the number of constants currently defined."""
|
|
74
|
+
return len(self.store.tags)
|
|
75
|
+
|
|
76
|
+
def close(self):
|
|
77
|
+
"""Close the db connection. Call on __exit__ of a Task."""
|
|
78
|
+
self.store.close()
|
|
79
|
+
self._audit_db.close()
|
|
80
|
+
|
|
81
|
+
def purge(self):
|
|
82
|
+
"""Remove all constants associated with the instance recipe run id."""
|
|
83
|
+
self.store.purge()
|
|
84
|
+
self._audit_db.purge()
|
|
85
|
+
|
|
86
|
+
def rollback(self):
|
|
87
|
+
"""Remove all constants associated with the instance recipe run id and task name."""
|
|
88
|
+
# remove constants
|
|
89
|
+
for key in self._audit_db.all(tags=self._audit_set_tag):
|
|
90
|
+
self.store.clear_tag(key)
|
|
91
|
+
# remove audit
|
|
92
|
+
self._audit_db.clear_tag(tag=self._audit_set_tag)
|
|
93
|
+
|
|
94
|
+
def __enter__(self):
|
|
95
|
+
return self
|
|
96
|
+
|
|
97
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
98
|
+
self.close()
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""Extension of the GraphQL supporting retries for data processing use cases."""
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Any
|
|
4
|
+
from typing import Callable
|
|
5
|
+
|
|
6
|
+
import requests
|
|
7
|
+
from gqlclient.base import DefaultParameters
|
|
8
|
+
from gqlclient.base import GraphQLClientBase
|
|
9
|
+
from gqlclient.request_wrap import wrap_request
|
|
10
|
+
from requests.adapters import HTTPAdapter
|
|
11
|
+
from requests.packages.urllib3.util.retry import Retry
|
|
12
|
+
|
|
13
|
+
from dkist_processing_common.config import common_configurations
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class GraphQLClient(GraphQLClientBase):
|
|
19
|
+
"""Helper class for formatting and executing synchronous GraphQL queries and mutations."""
|
|
20
|
+
|
|
21
|
+
adapter = HTTPAdapter(
|
|
22
|
+
max_retries=Retry(
|
|
23
|
+
total=10,
|
|
24
|
+
backoff_factor=1,
|
|
25
|
+
status_forcelist=[502, 503, 404],
|
|
26
|
+
allowed_methods=["POST"], # all graphql methods are POST
|
|
27
|
+
)
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
def execute_gql_call(self, query: dict, **kwargs) -> dict:
|
|
31
|
+
"""
|
|
32
|
+
Execute a GraphQL query or mutation using requests.
|
|
33
|
+
|
|
34
|
+
:param query: Dictionary formatted graphql query
|
|
35
|
+
|
|
36
|
+
:param kwargs: Optional arguments that `requests` takes. e.g. headers
|
|
37
|
+
|
|
38
|
+
:return: Dictionary containing the response from the GraphQL endpoint
|
|
39
|
+
"""
|
|
40
|
+
logger.debug(f"Executing graphql call: host={self.gql_uri}")
|
|
41
|
+
kwargs["headers"] = {
|
|
42
|
+
**kwargs.get("headers", {}),
|
|
43
|
+
"Service-Name": __name__,
|
|
44
|
+
"Authorization": common_configurations.gql_auth_token,
|
|
45
|
+
}
|
|
46
|
+
with requests.sessions.Session() as http:
|
|
47
|
+
http.mount("http://", self.adapter)
|
|
48
|
+
response = http.post(url=self.gql_uri, json=query, **kwargs)
|
|
49
|
+
try:
|
|
50
|
+
response.raise_for_status()
|
|
51
|
+
except requests.exceptions.HTTPError as e:
|
|
52
|
+
logger.error(
|
|
53
|
+
f"Error executing graphql call: status_code={e.response.status_code}, detail={e.response.text}"
|
|
54
|
+
)
|
|
55
|
+
raise e
|
|
56
|
+
return response.json()
|
|
57
|
+
|
|
58
|
+
def execute_gql_query(
|
|
59
|
+
self,
|
|
60
|
+
query_base: str,
|
|
61
|
+
query_response_cls: type,
|
|
62
|
+
query_parameters: object | None = DefaultParameters,
|
|
63
|
+
response_encoder: Callable[[str, list[dict] | dict, type], Any] | None = None,
|
|
64
|
+
**kwargs,
|
|
65
|
+
) -> Any:
|
|
66
|
+
"""Execute gql query with parameters dynamically wrapped."""
|
|
67
|
+
if query_parameters is not None and query_parameters is not DefaultParameters:
|
|
68
|
+
query_parameters = wrap_request(query_parameters)
|
|
69
|
+
return super().execute_gql_query(
|
|
70
|
+
query_base=query_base,
|
|
71
|
+
query_response_cls=query_response_cls,
|
|
72
|
+
query_parameters=query_parameters,
|
|
73
|
+
response_encoder=response_encoder,
|
|
74
|
+
**kwargs,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
def execute_gql_mutation(
|
|
78
|
+
self,
|
|
79
|
+
mutation_base: str,
|
|
80
|
+
mutation_parameters: object,
|
|
81
|
+
mutation_response_cls: type | None = None,
|
|
82
|
+
response_encoder: Callable[[str, list[dict] | dict, type], Any] | None = None,
|
|
83
|
+
**kwargs,
|
|
84
|
+
) -> Any:
|
|
85
|
+
"""Execute gql mutation with parameters dynamically wrapped."""
|
|
86
|
+
mutation_parameters = wrap_request(mutation_parameters)
|
|
87
|
+
return super().execute_gql_mutation(
|
|
88
|
+
mutation_base=mutation_base,
|
|
89
|
+
mutation_parameters=mutation_parameters,
|
|
90
|
+
mutation_response_cls=mutation_response_cls,
|
|
91
|
+
response_encoder=response_encoder,
|
|
92
|
+
**kwargs,
|
|
93
|
+
)
|
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
"""Scratch file system api."""
|
|
2
|
+
import logging
|
|
3
|
+
from contextlib import contextmanager
|
|
4
|
+
from os import umask
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from shutil import rmtree
|
|
7
|
+
from typing import Generator
|
|
8
|
+
|
|
9
|
+
from dkist_processing_common._util.tags import TagDB
|
|
10
|
+
from dkist_processing_common.config import common_configurations
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class WorkflowFileSystem:
|
|
17
|
+
"""
|
|
18
|
+
Wrapper for interactions with the shared file system "scratch" supporting recipe run id based namespaces and tagged data.
|
|
19
|
+
|
|
20
|
+
Create a workflow file system object.
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
recipe_run_id
|
|
25
|
+
The recipe_run_id
|
|
26
|
+
task_name
|
|
27
|
+
The task_name
|
|
28
|
+
scratch_base_path
|
|
29
|
+
The base path at which to create the file system
|
|
30
|
+
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
recipe_run_id: int = 0,
|
|
36
|
+
task_name: str = "dev_task",
|
|
37
|
+
scratch_base_path: Path | str | None = None,
|
|
38
|
+
):
|
|
39
|
+
self.recipe_run_id = recipe_run_id
|
|
40
|
+
self.task_name = task_name
|
|
41
|
+
if not scratch_base_path:
|
|
42
|
+
scratch_base_path = common_configurations.scratch_base_path
|
|
43
|
+
self.scratch_base_path = scratch_base_path
|
|
44
|
+
self.workflow_base_path = Path(self.scratch_base_path) / str(recipe_run_id)
|
|
45
|
+
with self._mask():
|
|
46
|
+
self.workflow_base_path.mkdir(parents=True, exist_ok=True)
|
|
47
|
+
self._tag_db = TagDB(recipe_run_id=self.recipe_run_id, task_name=self.task_name)
|
|
48
|
+
self._audit_db = TagDB(
|
|
49
|
+
recipe_run_id=self.recipe_run_id, task_name=self.task_name, namespace="scratch_audit"
|
|
50
|
+
)
|
|
51
|
+
self._audit_write_tag = f"WRITE_{self.task_name}"
|
|
52
|
+
self._audit_tag_tag = f"TAG_{self.task_name}"
|
|
53
|
+
self._audit_new_tag_cache = dict()
|
|
54
|
+
|
|
55
|
+
@staticmethod
|
|
56
|
+
@contextmanager
|
|
57
|
+
def _mask():
|
|
58
|
+
"""Set a permissive umask to allow other users (e.g. globus) to modify resources created by the scratch library."""
|
|
59
|
+
old_mask = umask(0)
|
|
60
|
+
try:
|
|
61
|
+
yield
|
|
62
|
+
finally:
|
|
63
|
+
umask(old_mask)
|
|
64
|
+
|
|
65
|
+
def absolute_path(self, relative_path: Path | str) -> Path:
|
|
66
|
+
"""
|
|
67
|
+
Convert a relative path to an absolute path with the base directories for the that workflow instance.
|
|
68
|
+
|
|
69
|
+
Parameters
|
|
70
|
+
----------
|
|
71
|
+
relative_path
|
|
72
|
+
The relative_path input
|
|
73
|
+
|
|
74
|
+
Returns
|
|
75
|
+
-------
|
|
76
|
+
The absolute path.
|
|
77
|
+
"""
|
|
78
|
+
relative_path = Path(relative_path)
|
|
79
|
+
if relative_path.is_absolute():
|
|
80
|
+
raise ValueError("Relative path must be relative")
|
|
81
|
+
|
|
82
|
+
return self.workflow_base_path / relative_path
|
|
83
|
+
|
|
84
|
+
@staticmethod
|
|
85
|
+
def _parse_tags(tags: str | list | None) -> list:
|
|
86
|
+
"""Parse tags to support an individual tag in the form of a string or an arbitrarily nested list of strings."""
|
|
87
|
+
if tags is None:
|
|
88
|
+
return []
|
|
89
|
+
if isinstance(tags, str):
|
|
90
|
+
return [tags]
|
|
91
|
+
return _flatten_list(tags)
|
|
92
|
+
|
|
93
|
+
def write(
|
|
94
|
+
self,
|
|
95
|
+
file_obj: bytes,
|
|
96
|
+
relative_path: Path | str,
|
|
97
|
+
tags: str | list | None = None,
|
|
98
|
+
overwrite: bool = False,
|
|
99
|
+
) -> None:
|
|
100
|
+
"""
|
|
101
|
+
Write a file object to the path specified and tagged with any tags listed in tags.
|
|
102
|
+
|
|
103
|
+
Parameters
|
|
104
|
+
----------
|
|
105
|
+
file_obj
|
|
106
|
+
The file object to be written
|
|
107
|
+
relative_path
|
|
108
|
+
The relative path at which to write the file
|
|
109
|
+
tags
|
|
110
|
+
The tags to be associated with the file object
|
|
111
|
+
overwrite
|
|
112
|
+
Should the file be overwritten if it already exists?
|
|
113
|
+
|
|
114
|
+
Returns
|
|
115
|
+
-------
|
|
116
|
+
None
|
|
117
|
+
"""
|
|
118
|
+
tags = self._parse_tags(tags)
|
|
119
|
+
path = self.absolute_path(relative_path)
|
|
120
|
+
# audit the path that was written to scratch
|
|
121
|
+
self._audit_db.add(tag=self._audit_write_tag, value=str(path))
|
|
122
|
+
with self._mask():
|
|
123
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
124
|
+
if overwrite:
|
|
125
|
+
mode = "wb"
|
|
126
|
+
else:
|
|
127
|
+
mode = "xb"
|
|
128
|
+
with path.open(mode=mode) as f:
|
|
129
|
+
f.write(file_obj)
|
|
130
|
+
self.tag(path, tags)
|
|
131
|
+
|
|
132
|
+
def delete(self, path: Path | str):
|
|
133
|
+
"""
|
|
134
|
+
Delete the file or path.
|
|
135
|
+
|
|
136
|
+
Parameters
|
|
137
|
+
----------
|
|
138
|
+
path
|
|
139
|
+
The path to be deleted
|
|
140
|
+
|
|
141
|
+
Returns
|
|
142
|
+
-------
|
|
143
|
+
None
|
|
144
|
+
"""
|
|
145
|
+
path = Path(path)
|
|
146
|
+
path.unlink(missing_ok=True)
|
|
147
|
+
self._tag_db.clear_value(value=path)
|
|
148
|
+
|
|
149
|
+
def tag(self, path: Path | str, tags: list | str) -> None:
|
|
150
|
+
"""
|
|
151
|
+
Tag existing paths.
|
|
152
|
+
|
|
153
|
+
The path must be relative to the WorkflowFileSystem base path and must exist.
|
|
154
|
+
|
|
155
|
+
Parameters
|
|
156
|
+
----------
|
|
157
|
+
path
|
|
158
|
+
The path to tag
|
|
159
|
+
tags
|
|
160
|
+
The tags associated with the path.
|
|
161
|
+
|
|
162
|
+
Returns
|
|
163
|
+
-------
|
|
164
|
+
None
|
|
165
|
+
"""
|
|
166
|
+
tags = self._parse_tags(tags)
|
|
167
|
+
path = Path(path)
|
|
168
|
+
if not (self.workflow_base_path in path.parents):
|
|
169
|
+
raise ValueError(
|
|
170
|
+
f"Cannot tag paths which are not children of the base path {self.workflow_base_path}"
|
|
171
|
+
)
|
|
172
|
+
if not path.exists():
|
|
173
|
+
raise FileNotFoundError(f"Cannot tag paths which do not exist. {path=}")
|
|
174
|
+
|
|
175
|
+
for tag in tags:
|
|
176
|
+
# audit the tag that was newly added to the scratch tag db
|
|
177
|
+
if self._tag_is_new(tag=tag):
|
|
178
|
+
self._audit_db.add(tag=self._audit_tag_tag, value=tag)
|
|
179
|
+
self._tag_db.add(tag, str(path))
|
|
180
|
+
|
|
181
|
+
def _tag_is_new(self, tag: str) -> bool:
|
|
182
|
+
if self._audit_new_tag_cache.get(tag, None) is None:
|
|
183
|
+
tag_is_new = not bool(self._tag_db.all(tags=tag))
|
|
184
|
+
self._audit_new_tag_cache[tag] = tag_is_new
|
|
185
|
+
return self._audit_new_tag_cache[tag]
|
|
186
|
+
|
|
187
|
+
def tags(self, path: Path | str):
|
|
188
|
+
"""
|
|
189
|
+
Return the tags associated with the given file object.
|
|
190
|
+
|
|
191
|
+
Parameters
|
|
192
|
+
----------
|
|
193
|
+
path
|
|
194
|
+
The input file object
|
|
195
|
+
Returns
|
|
196
|
+
-------
|
|
197
|
+
An iterable containing the tags associated with the file
|
|
198
|
+
"""
|
|
199
|
+
value = str(path)
|
|
200
|
+
return self._tag_db.tags_for_value(value=value)
|
|
201
|
+
|
|
202
|
+
def remove_tags(self, path: Path | str, tags: list | str) -> None:
|
|
203
|
+
"""Remove a tag or tags from a given path."""
|
|
204
|
+
tags = self._parse_tags(tags)
|
|
205
|
+
for tag in tags:
|
|
206
|
+
self._tag_db.remove(tag, str(path))
|
|
207
|
+
|
|
208
|
+
def find_any(self, tags: str | list) -> Generator[Path, None, None]:
|
|
209
|
+
"""
|
|
210
|
+
Return a generator of Path objects that are tagged by the union of the input tags.
|
|
211
|
+
|
|
212
|
+
Parameters
|
|
213
|
+
----------
|
|
214
|
+
tags
|
|
215
|
+
The tags to be used in the search
|
|
216
|
+
|
|
217
|
+
Returns
|
|
218
|
+
-------
|
|
219
|
+
A generator of path objects matching the union of the desired tags
|
|
220
|
+
"""
|
|
221
|
+
tags = self._parse_tags(tags)
|
|
222
|
+
paths = self._tag_db.any(tags)
|
|
223
|
+
logger.debug(f"Found {len(paths)} files containing the set of {tags=}")
|
|
224
|
+
for path in paths:
|
|
225
|
+
yield Path(path)
|
|
226
|
+
|
|
227
|
+
def find_all(self, tags: str | list) -> Generator[Path, None, None]:
|
|
228
|
+
"""
|
|
229
|
+
Return a generator of Path objects that are tagged by the intersection of the input tags.
|
|
230
|
+
|
|
231
|
+
Parameters
|
|
232
|
+
----------
|
|
233
|
+
tags
|
|
234
|
+
The tags to be used in the search
|
|
235
|
+
|
|
236
|
+
Returns
|
|
237
|
+
-------
|
|
238
|
+
A generator of path objects matching the intersection of the desired tags
|
|
239
|
+
"""
|
|
240
|
+
tags = self._parse_tags(tags)
|
|
241
|
+
paths = self._tag_db.all(tags)
|
|
242
|
+
logger.debug(f"Found {len(paths)} files containing the set of {tags=}")
|
|
243
|
+
for path in paths:
|
|
244
|
+
yield Path(path)
|
|
245
|
+
|
|
246
|
+
def count_any(self, tags: str | list) -> int:
|
|
247
|
+
"""
|
|
248
|
+
Return the number of objects that are tagged by the union of the input tags.
|
|
249
|
+
|
|
250
|
+
Parameters
|
|
251
|
+
----------
|
|
252
|
+
tags
|
|
253
|
+
The tags to be used in the search
|
|
254
|
+
|
|
255
|
+
Returns
|
|
256
|
+
-------
|
|
257
|
+
The number of objects tagged with the union of the input tags.
|
|
258
|
+
"""
|
|
259
|
+
tags = self._parse_tags(tags)
|
|
260
|
+
return len(self._tag_db.any(tags))
|
|
261
|
+
|
|
262
|
+
def count_all(self, tags: str | list) -> int:
|
|
263
|
+
"""
|
|
264
|
+
Return the number of objects that are tagged by the intersection of the input tags.
|
|
265
|
+
|
|
266
|
+
Parameters
|
|
267
|
+
----------
|
|
268
|
+
tags
|
|
269
|
+
The tags to be used in the search
|
|
270
|
+
|
|
271
|
+
Returns
|
|
272
|
+
-------
|
|
273
|
+
The number of objects tagged with the intersection of the input tags.
|
|
274
|
+
|
|
275
|
+
"""
|
|
276
|
+
tags = self._parse_tags(tags)
|
|
277
|
+
return len(self._tag_db.all(tags))
|
|
278
|
+
|
|
279
|
+
def close(self):
|
|
280
|
+
"""Close the db connection. Call on __exit__ of a Task."""
|
|
281
|
+
self._tag_db.close()
|
|
282
|
+
self._audit_db.close()
|
|
283
|
+
|
|
284
|
+
def purge(self, ignore_errors: bool = False):
|
|
285
|
+
"""
|
|
286
|
+
Remove all data (tags, files, and folders) for the instance.
|
|
287
|
+
|
|
288
|
+
Call when tearing down a workflow
|
|
289
|
+
|
|
290
|
+
Parameters
|
|
291
|
+
----------
|
|
292
|
+
ignore_errors
|
|
293
|
+
If set, errors will be ignored, otherwise stop at the first error
|
|
294
|
+
Returns
|
|
295
|
+
-------
|
|
296
|
+
None
|
|
297
|
+
"""
|
|
298
|
+
rmtree(self.workflow_base_path, ignore_errors=ignore_errors)
|
|
299
|
+
self._tag_db.purge()
|
|
300
|
+
self._audit_db.purge()
|
|
301
|
+
|
|
302
|
+
def rollback(self):
|
|
303
|
+
"""Remove all files and new tags associated with the instance recipe run id and task name."""
|
|
304
|
+
# remove files
|
|
305
|
+
for path in self._audit_db.all(tags=self._audit_write_tag):
|
|
306
|
+
path = Path(path)
|
|
307
|
+
path.unlink(missing_ok=True)
|
|
308
|
+
self._tag_db.clear_value(path)
|
|
309
|
+
# remove tags
|
|
310
|
+
for tag in self._audit_db.all(tags=self._audit_tag_tag):
|
|
311
|
+
self._tag_db.clear_tag(tag=tag)
|
|
312
|
+
# remove audit
|
|
313
|
+
self._audit_db.clear_tag(tag=self._audit_write_tag)
|
|
314
|
+
self._audit_db.clear_tag(tag=self._audit_tag_tag)
|
|
315
|
+
|
|
316
|
+
def __repr__(self):
|
|
317
|
+
return f"WorkflowFileSystem(recipe_run_id={self.recipe_run_id}, task_name={self.task_name}, scratch_base_path={self.scratch_base_path})"
|
|
318
|
+
|
|
319
|
+
def __str__(self):
|
|
320
|
+
return f"{self!r} connected to {self._tag_db}"
|
|
321
|
+
|
|
322
|
+
def __enter__(self):
|
|
323
|
+
return self
|
|
324
|
+
|
|
325
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
326
|
+
self.close()
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def _flatten_list(elements: list) -> list:
|
|
330
|
+
"""Flatten an arbitrarily nested list."""
|
|
331
|
+
result = []
|
|
332
|
+
for element in elements:
|
|
333
|
+
if isinstance(element, list):
|
|
334
|
+
result.extend(_flatten_list(element))
|
|
335
|
+
else:
|
|
336
|
+
result.append(element)
|
|
337
|
+
return result
|