sammi-cdf 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sammi/CITATION.rst +16 -0
- sammi/__init__.py +7 -0
- sammi/_version.py +16 -0
- sammi/cdf_attribute_manager.py +611 -0
- sammi/data/README.rst +6 -0
- sammi/data/default_global_cdf_attrs_schema.yaml +280 -0
- sammi/data/default_variable_cdf_attrs_schema.yaml +551 -0
- sammi/data/imap_default_global_cdf_attrs.yaml +0 -0
- sammi/data/shared/default_global_cdf_attrs_schema.yaml +0 -0
- sammi/data/shared/default_variable_cdf_attrs_schema.yaml +0 -0
- sammi/schema.py +374 -0
- sammi/tests/__init__.py +4 -0
- sammi/tests/shared/default_global_cdf_attrs_schema.yaml +0 -0
- sammi/tests/shared/default_variable_cdf_attrs_schema.yaml +466 -0
- sammi/tests/test_cdf_attribute_manager.py +519 -0
- sammi/tests/test_data/default_global_test_cdf_attrs.yaml +8 -0
- sammi/tests/test_data/imap_default_global_cdf_attrs.yaml +7 -0
- sammi/tests/test_data/imap_default_global_test_cdf_attrs.yaml +7 -0
- sammi/tests/test_data/imap_instrument1_global_cdf_attrs.yaml +14 -0
- sammi/tests/test_data/imap_instrument1_level1_variable_attrs.yaml +23 -0
- sammi/tests/test_data/imap_instrument2_global_cdf_attrs.yaml +23 -0
- sammi/tests/test_data/imap_instrument2_level2_variable_attrs.yaml +30 -0
- sammi/tests/test_data/imap_test_global.yaml +26 -0
- sammi/tests/test_data/imap_test_variable.yaml +41 -0
- sammi/tests/test_schema.py +174 -0
- sammi_cdf-0.1.0.dev0.dist-info/LICENSE.rst +1 -0
- sammi_cdf-0.1.0.dev0.dist-info/METADATA +141 -0
- sammi_cdf-0.1.0.dev0.dist-info/RECORD +30 -0
- sammi_cdf-0.1.0.dev0.dist-info/WHEEL +5 -0
- sammi_cdf-0.1.0.dev0.dist-info/top_level.txt +1 -0
sammi/CITATION.rst
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
Acknowledging this Package
|
|
2
|
+
==========================
|
|
3
|
+
|
|
4
|
+
If you use this package in your scientific work, we would appreciate citing it in your publications.
|
|
5
|
+
Proper citations and acknowledgement is key to a healthy scientific community and enables scientific reproducibility.
|
|
6
|
+
|
|
7
|
+
Citing in Publications
|
|
8
|
+
----------------------
|
|
9
|
+
|
|
10
|
+
Please add the following line within your methods, conclusion or acknowledgements sections:
|
|
11
|
+
|
|
12
|
+
*This research used version X.Y.Z (software citation) of the SWxSOC Schema open source
|
|
13
|
+
software package (paper citation).*
|
|
14
|
+
|
|
15
|
+
The software citation should be the specific `Zenodo DOI`_ for the version used in your work.
|
|
16
|
+
A paper citation does not yet exist.
|
sammi/__init__.py
ADDED
sammi/_version.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# file generated by setuptools_scm
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
TYPE_CHECKING = False
|
|
4
|
+
if TYPE_CHECKING:
|
|
5
|
+
from typing import Tuple, Union
|
|
6
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
7
|
+
else:
|
|
8
|
+
VERSION_TUPLE = object
|
|
9
|
+
|
|
10
|
+
version: str
|
|
11
|
+
__version__: str
|
|
12
|
+
__version_tuple__: VERSION_TUPLE
|
|
13
|
+
version_tuple: VERSION_TUPLE
|
|
14
|
+
|
|
15
|
+
__version__ = version = '0.1.dev11+g3cb909c.d20241008'
|
|
16
|
+
__version_tuple__ = version_tuple = (0, 1, 'dev11', 'g3cb909c.d20241008')
|
|
@@ -0,0 +1,611 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional
|
|
6
|
+
import yaml
|
|
7
|
+
|
|
8
|
+
import sammi
|
|
9
|
+
|
|
10
|
+
__all__ = ["CdfAttributeManager"]
|
|
11
|
+
|
|
12
|
+
DEFAULT_GLOBAL_CDF_ATTRS_SCHEMA_FILE = "default_global_cdf_attrs_schema.yaml"
|
|
13
|
+
DEFAULT_VARIABLE_CDF_ATTRS_SCHEMA_FILE = "default_variable_cdf_attrs_schema.yaml"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class CdfAttributeManager:
|
|
17
|
+
"""
|
|
18
|
+
Class for creating and managing CDF attributes based out of yaml files.
|
|
19
|
+
The SAMMI default schema only includes attributes required for ISTP compliance.
|
|
20
|
+
Additional mission-specific attributes or requirements should be added through additional global and variable schema layers.
|
|
21
|
+
|
|
22
|
+
There are two main components to the SAMMI CDF Attribut Manager, including both global and variable attribute information.
|
|
23
|
+
|
|
24
|
+
Global schema information is loaded from YAML (dict-like) files in the following format:
|
|
25
|
+
|
|
26
|
+
.. code-block:: yaml
|
|
27
|
+
|
|
28
|
+
attribute_name:
|
|
29
|
+
description: >
|
|
30
|
+
Include a meaningful description of the attribute and context needed to understand
|
|
31
|
+
its values.
|
|
32
|
+
default: <string> # A default value for the attribute if needed/desired
|
|
33
|
+
required: <bool> # Whether the attribute is required
|
|
34
|
+
|
|
35
|
+
Variable schema information is loaded from YAML (dict-like) files in the following format:
|
|
36
|
+
|
|
37
|
+
.. code-block:: yaml
|
|
38
|
+
|
|
39
|
+
attribute_key:
|
|
40
|
+
attribute_name:
|
|
41
|
+
description: >
|
|
42
|
+
Include a meaningful description of the attribute and context needed to understand
|
|
43
|
+
its values.
|
|
44
|
+
required: <bool> # Whether the attribute is required
|
|
45
|
+
valid_values: <list> # A list of valid values that the attribute can take.
|
|
46
|
+
alternate: <string> An additional attribute name that can be treated as an alternative of the given attribute.
|
|
47
|
+
data:
|
|
48
|
+
- attribute_name
|
|
49
|
+
- ...
|
|
50
|
+
support_data:
|
|
51
|
+
- ...
|
|
52
|
+
metadata:
|
|
53
|
+
- ...
|
|
54
|
+
|
|
55
|
+
Parameters
|
|
56
|
+
----------
|
|
57
|
+
global_schema_layers : `Optional[list[Path]]`
|
|
58
|
+
Absolute file paths to global attribute schema files. These schema files are layered
|
|
59
|
+
on top of one another in a latest-priority ordering. That is, the latest file that modifies
|
|
60
|
+
a common schema attribute will take precedence over earlier values for a given attribute.
|
|
61
|
+
variable_schema_layers : `Optional[list[Path]]`
|
|
62
|
+
Absolute file paths to variable attribute schema files. These schema files are layered
|
|
63
|
+
on top of one another in a latest-priority ordering. That is, the latest file that modifies
|
|
64
|
+
a common schema attribute will take precedence over earlier values for a given attribute.
|
|
65
|
+
use_defaults: `Optional[bool]`
|
|
66
|
+
Whether or not to load the default global and variable attribute schema files. These
|
|
67
|
+
default schema files contain only the requirements for CDF ISTP validation.
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
Examples
|
|
71
|
+
--------
|
|
72
|
+
To use, you can load one or many global and variable attribute files:
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
>>> import sammi
|
|
76
|
+
>>> cdf_attr_manager = sammi.cdf_attribute_manager.CdfAttributeManager(use_defaults=True)
|
|
77
|
+
>>> data_path = Path(sammi.__file__).parent.parent / "sammi" / "data"
|
|
78
|
+
>>> cdf_attr_manager.load_global_attributes(data_path / "default_global_cdf_attrs_schema.yaml")
|
|
79
|
+
>>> cdf_attr_manager.load_global_attributes(data_path / "default_variable_cdf_attrs_schema.yaml")
|
|
80
|
+
>>> cdf_attr_manager.load_variable_attributes(data_path / "variable_attrs.yaml") #doctest: +SKIP
|
|
81
|
+
|
|
82
|
+
Later files will overwrite earlier files if the same attribute is defined.
|
|
83
|
+
|
|
84
|
+
You can then get the global and variable attributes:
|
|
85
|
+
|
|
86
|
+
If you provide an instrument_id, it will also add the attributes defined under
|
|
87
|
+
instrument_id. If this is not included, then only the attributes defined in the top
|
|
88
|
+
level of the file are used.
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
>>> # Instrument ID is optional for refining the attributes used from the file
|
|
92
|
+
>>> global_attrs = cdf_attr_manager.get_global_attributes("instrument_id")
|
|
93
|
+
>>> variable_attrs = cdf_attr_manager.get_variable_attributes("Epoch") #doctest: +SKIP
|
|
94
|
+
|
|
95
|
+
The variable and global attributes are validated against the schemas upon calling
|
|
96
|
+
``get_global_attributes`` and ``get_variable_attributes``.
|
|
97
|
+
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
def __init__(
|
|
101
|
+
self,
|
|
102
|
+
global_schema_layers: Optional[list[Path]] = None,
|
|
103
|
+
variable_schema_layers: Optional[list[Path]] = None,
|
|
104
|
+
use_defaults: Optional[bool] = True,
|
|
105
|
+
) -> None:
|
|
106
|
+
# Input Validation
|
|
107
|
+
if not use_defaults and (
|
|
108
|
+
not global_schema_layers
|
|
109
|
+
or not variable_schema_layers
|
|
110
|
+
or len(global_schema_layers) == 0
|
|
111
|
+
or len(variable_schema_layers) == 0
|
|
112
|
+
):
|
|
113
|
+
raise ValueError(
|
|
114
|
+
"Not enough information to create schema. You must either use the defaults or provide alternative layers for both global and variable attribbute schemas."
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# Construct the Global Attribute Schema
|
|
118
|
+
_global_attr_schema = {}
|
|
119
|
+
if use_defaults:
|
|
120
|
+
_def_global_attr_schema = self._load_default_global_attr_schema()
|
|
121
|
+
_global_attr_schema = self._merge(
|
|
122
|
+
base_layer=_global_attr_schema, new_layer=_def_global_attr_schema
|
|
123
|
+
)
|
|
124
|
+
if global_schema_layers is not None:
|
|
125
|
+
for schema_layer_path in global_schema_layers:
|
|
126
|
+
_global_attr_layer = CdfAttributeManager._load_yaml_data(
|
|
127
|
+
file_path=schema_layer_path
|
|
128
|
+
)
|
|
129
|
+
_global_attr_schema = self._merge(
|
|
130
|
+
base_layer=_global_attr_schema, new_layer=_global_attr_layer
|
|
131
|
+
)
|
|
132
|
+
# Set Final Member
|
|
133
|
+
self._global_attr_schema = _global_attr_schema
|
|
134
|
+
|
|
135
|
+
# Data Validation and Compliance for Variable Data
|
|
136
|
+
_variable_attr_schema = {}
|
|
137
|
+
if use_defaults:
|
|
138
|
+
_def_variable_attr_schema = self._load_default_variable_attr_schema()
|
|
139
|
+
_variable_attr_schema = self._merge(
|
|
140
|
+
base_layer=_variable_attr_schema, new_layer=_def_variable_attr_schema
|
|
141
|
+
)
|
|
142
|
+
if variable_schema_layers is not None:
|
|
143
|
+
for schema_layer_path in variable_schema_layers:
|
|
144
|
+
_variable_attr_layer = CdfAttributeManager._load_yaml_data(
|
|
145
|
+
file_path=schema_layer_path
|
|
146
|
+
)
|
|
147
|
+
_variable_attr_schema = self._merge(
|
|
148
|
+
base_layer=_variable_attr_schema, new_layer=_variable_attr_layer
|
|
149
|
+
)
|
|
150
|
+
# Set the Final Member
|
|
151
|
+
self._variable_attr_schema = _variable_attr_schema
|
|
152
|
+
|
|
153
|
+
self._variable_attributes: dict = {}
|
|
154
|
+
self._global_attributes: dict = self._load_default_global_attributes()
|
|
155
|
+
|
|
156
|
+
@property
|
|
157
|
+
def global_attribute_schema(self):
|
|
158
|
+
"""(`dict`) Schema for variable attributes of the file."""
|
|
159
|
+
return self._global_attr_schema
|
|
160
|
+
|
|
161
|
+
@property
|
|
162
|
+
def variable_attribute_schema(self):
|
|
163
|
+
"""(`dict`) Schema for variable attributes of the file."""
|
|
164
|
+
return self._variable_attr_schema
|
|
165
|
+
|
|
166
|
+
# =========================================================================
|
|
167
|
+
# INITIALIZATION FUNCTIONS
|
|
168
|
+
# =========================================================================
|
|
169
|
+
|
|
170
|
+
def _load_default_global_attr_schema(self) -> dict:
|
|
171
|
+
"""
|
|
172
|
+
Load the default global schema from the source directory.
|
|
173
|
+
|
|
174
|
+
Returns
|
|
175
|
+
-------
|
|
176
|
+
dict
|
|
177
|
+
The dict representing the global schema.
|
|
178
|
+
"""
|
|
179
|
+
# The Default Schema file is contained in the `sammi/data` directory
|
|
180
|
+
default_schema_path = str(
|
|
181
|
+
Path(sammi.__file__).parent.parent
|
|
182
|
+
/ "sammi"
|
|
183
|
+
/ "data"
|
|
184
|
+
/ DEFAULT_GLOBAL_CDF_ATTRS_SCHEMA_FILE
|
|
185
|
+
)
|
|
186
|
+
# Load the Schema
|
|
187
|
+
return CdfAttributeManager._load_yaml_data(file_path=default_schema_path)
|
|
188
|
+
|
|
189
|
+
def _load_default_variable_attr_schema(self) -> dict:
|
|
190
|
+
"""
|
|
191
|
+
Load the default variable schema from the source directory.
|
|
192
|
+
|
|
193
|
+
Returns
|
|
194
|
+
-------
|
|
195
|
+
dict
|
|
196
|
+
The dict representing the variable schema.
|
|
197
|
+
"""
|
|
198
|
+
# The Default Schema file is contained in the `sammi/data` directory
|
|
199
|
+
default_schema_path = str(
|
|
200
|
+
Path(sammi.__file__).parent.parent
|
|
201
|
+
/ "sammi"
|
|
202
|
+
/ "data"
|
|
203
|
+
/ DEFAULT_VARIABLE_CDF_ATTRS_SCHEMA_FILE
|
|
204
|
+
)
|
|
205
|
+
# Load the Schema
|
|
206
|
+
return CdfAttributeManager._load_yaml_data(file_path=default_schema_path)
|
|
207
|
+
|
|
208
|
+
def _load_default_global_attributes(self) -> dict:
|
|
209
|
+
# Use the Existing Global Schema
|
|
210
|
+
global_schema = self.global_attribute_schema
|
|
211
|
+
return {
|
|
212
|
+
attr_name: info["default"]
|
|
213
|
+
for attr_name, info in global_schema.items()
|
|
214
|
+
if info["default"] is not None
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
@staticmethod
|
|
218
|
+
def _load_yaml_data(file_path: Path) -> dict:
|
|
219
|
+
"""
|
|
220
|
+
Load a yaml file from the provided path.
|
|
221
|
+
|
|
222
|
+
Parameters
|
|
223
|
+
----------
|
|
224
|
+
file_path : `Path`
|
|
225
|
+
Path to the yaml file to load.
|
|
226
|
+
|
|
227
|
+
Returns
|
|
228
|
+
-------
|
|
229
|
+
dict
|
|
230
|
+
Loaded yaml.
|
|
231
|
+
"""
|
|
232
|
+
assert Path(file_path).exists()
|
|
233
|
+
# Load the Yaml file to Dict
|
|
234
|
+
yaml_data = {}
|
|
235
|
+
with open(file_path, "r") as f:
|
|
236
|
+
yaml_data = yaml.safe_load(f)
|
|
237
|
+
return yaml_data
|
|
238
|
+
|
|
239
|
+
def _merge(self, base_layer: dict, new_layer: dict, path: list = None) -> None:
|
|
240
|
+
"""
|
|
241
|
+
Function to do in-place merging and updating of two dictionaries.
|
|
242
|
+
This is an improvemnent over the built-in dict.update() method, as it allows for nested dictionaries and lists.
|
|
243
|
+
|
|
244
|
+
Parameters
|
|
245
|
+
----------
|
|
246
|
+
base_layer : `dict`
|
|
247
|
+
The base dictionary to merge into.
|
|
248
|
+
new_layer : `dict`
|
|
249
|
+
The new dictionary to merge into the base.
|
|
250
|
+
path : `list`
|
|
251
|
+
The path to the current dictionary being merged. Used for recursion.
|
|
252
|
+
|
|
253
|
+
Returns
|
|
254
|
+
-------
|
|
255
|
+
None - operation is done in-place.
|
|
256
|
+
"""
|
|
257
|
+
# If we are at the top of the recursion, and we don't have a path, create a new one
|
|
258
|
+
if not path:
|
|
259
|
+
path = []
|
|
260
|
+
# for each key in the base layer
|
|
261
|
+
for key in new_layer:
|
|
262
|
+
# If its a shared key
|
|
263
|
+
if key in base_layer:
|
|
264
|
+
# If both are dictionaries
|
|
265
|
+
if isinstance(base_layer[key], dict) and isinstance(
|
|
266
|
+
new_layer[key], dict
|
|
267
|
+
):
|
|
268
|
+
# Merge the two nested dictionaries together
|
|
269
|
+
self._merge(base_layer[key], new_layer[key], path + [str(key)])
|
|
270
|
+
# If both are lists
|
|
271
|
+
if isinstance(base_layer[key], list) and isinstance(
|
|
272
|
+
new_layer[key], list
|
|
273
|
+
):
|
|
274
|
+
# Extend the list of the base layer by the new layer
|
|
275
|
+
base_layer[key].extend(new_layer[key])
|
|
276
|
+
# If they are not lists or dicts (scalars)
|
|
277
|
+
elif base_layer[key] != new_layer[key]:
|
|
278
|
+
# We've reached a conflict, may want to overwrite the base with the new layer.
|
|
279
|
+
base_layer[key] = new_layer[key]
|
|
280
|
+
# If its not a shared key
|
|
281
|
+
else:
|
|
282
|
+
base_layer[key] = new_layer[key]
|
|
283
|
+
return base_layer
|
|
284
|
+
|
|
285
|
+
# =========================================================================
|
|
286
|
+
# GLOBAL ATTRIBUTE FUNCTIONS
|
|
287
|
+
# =========================================================================
|
|
288
|
+
|
|
289
|
+
def load_global_attributes(self, file_path: Path) -> None:
|
|
290
|
+
"""
|
|
291
|
+
Update the global attributes property with the attributes from the file.
|
|
292
|
+
|
|
293
|
+
Calling this method multiple times on different files will add all the
|
|
294
|
+
attributes from the files, overwriting existing attributes if they are
|
|
295
|
+
duplicated.
|
|
296
|
+
|
|
297
|
+
Parameters
|
|
298
|
+
----------
|
|
299
|
+
file_path : `Path`
|
|
300
|
+
File path to load the global attributes from.
|
|
301
|
+
"""
|
|
302
|
+
new_global_layer = CdfAttributeManager._load_yaml_data(file_path)
|
|
303
|
+
self._merge(self._global_attributes, new_global_layer)
|
|
304
|
+
|
|
305
|
+
def add_global_attribute(self, attribute_name: str, attribute_value: str) -> None:
|
|
306
|
+
"""
|
|
307
|
+
Add a single global attribute to the global attributes.
|
|
308
|
+
|
|
309
|
+
This is intended only for dynamic global attributes which change per-file, such
|
|
310
|
+
as Data_version. It is not intended to be used for static attributes, which
|
|
311
|
+
should all be included in the YAML files.
|
|
312
|
+
|
|
313
|
+
This will overwrite any existing value in attribute_name if it exists. The
|
|
314
|
+
attribute must be in the global schema, or it will not be included as output.
|
|
315
|
+
|
|
316
|
+
Parameters
|
|
317
|
+
----------
|
|
318
|
+
attribute_name : str
|
|
319
|
+
The name of the attribute to add.
|
|
320
|
+
attribute_value : str
|
|
321
|
+
The value of the attribute to add.
|
|
322
|
+
"""
|
|
323
|
+
self._global_attributes[attribute_name] = attribute_value
|
|
324
|
+
|
|
325
|
+
def get_global_attributes(self, instrument_id: str | None = None) -> dict:
|
|
326
|
+
"""
|
|
327
|
+
Generate a dictionary global attributes based off the loaded schema and attrs.
|
|
328
|
+
|
|
329
|
+
Validates against the global schema to ensure all required variables are
|
|
330
|
+
present. It can also include instrument specific global attributes if
|
|
331
|
+
instrumet_id is set.
|
|
332
|
+
|
|
333
|
+
If an instrument_id is provided, the level and instrument specific
|
|
334
|
+
attributes that were previously loaded using add_instrument_global_attrs will
|
|
335
|
+
be included.
|
|
336
|
+
|
|
337
|
+
Parameters
|
|
338
|
+
----------
|
|
339
|
+
instrument_id : str
|
|
340
|
+
The id of the CDF file, used to retrieve instrument and level
|
|
341
|
+
specific global attributes. Suggested value is the logical_source_id.
|
|
342
|
+
|
|
343
|
+
Returns
|
|
344
|
+
-------
|
|
345
|
+
output : dict
|
|
346
|
+
The global attribute values created from the input global attribute files
|
|
347
|
+
and schemas.
|
|
348
|
+
"""
|
|
349
|
+
output = dict()
|
|
350
|
+
for attr_name, attr_schema in self.global_attribute_schema.items():
|
|
351
|
+
if attr_name in self._global_attributes:
|
|
352
|
+
output[attr_name] = self._global_attributes[attr_name]
|
|
353
|
+
# Retrieve instrument specific global attributes from the variable file
|
|
354
|
+
elif (
|
|
355
|
+
instrument_id is not None
|
|
356
|
+
and attr_name in self._global_attributes[instrument_id]
|
|
357
|
+
):
|
|
358
|
+
output[attr_name] = self._global_attributes[instrument_id][attr_name]
|
|
359
|
+
elif attr_schema["required"] and attr_name not in self._global_attributes:
|
|
360
|
+
# TODO throw an error
|
|
361
|
+
output[attr_name] = None
|
|
362
|
+
return output
|
|
363
|
+
|
|
364
|
+
def global_attribute_template(self) -> dict:
|
|
365
|
+
"""
|
|
366
|
+
Function to generate a template of required global attributes
|
|
367
|
+
that must be set for a valid data file.
|
|
368
|
+
|
|
369
|
+
Returns
|
|
370
|
+
-------
|
|
371
|
+
template : `dict`
|
|
372
|
+
A template for required global attributes that must be provided.
|
|
373
|
+
"""
|
|
374
|
+
template = {}
|
|
375
|
+
for attr_name, attr_schema in self.global_attribute_schema.items():
|
|
376
|
+
if attr_schema["required"] and attr_name not in self._global_attributes:
|
|
377
|
+
template[attr_name] = None
|
|
378
|
+
return template
|
|
379
|
+
|
|
380
|
+
def global_attribute_info(self, attribute_name: Optional[str] = None) -> dict:
|
|
381
|
+
"""
|
|
382
|
+
Function to generate a `dict` of information about each global
|
|
383
|
+
metadata attribute. The `dict` contains all information in the
|
|
384
|
+
global attribute schema including:
|
|
385
|
+
|
|
386
|
+
- description: (`str`) A brief description of the attribute
|
|
387
|
+
- default: (`str`) The default value used if none is provided
|
|
388
|
+
- required: (`bool`) Whether the attribute is required
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
Parameters
|
|
392
|
+
----------
|
|
393
|
+
attribute_name : `str`, optional, default None
|
|
394
|
+
The name of the attribute to get specific information for.
|
|
395
|
+
|
|
396
|
+
Returns
|
|
397
|
+
-------
|
|
398
|
+
info: `dict`
|
|
399
|
+
information about global metadata
|
|
400
|
+
|
|
401
|
+
Raises
|
|
402
|
+
------
|
|
403
|
+
KeyError: If attribute_name is not a recognized global attribute.
|
|
404
|
+
"""
|
|
405
|
+
info = self.global_attribute_schema.copy()
|
|
406
|
+
|
|
407
|
+
# Strip the Description of New Lines
|
|
408
|
+
for attr_name in info.keys():
|
|
409
|
+
info[attr_name]["description"] = info[attr_name]["description"].strip()
|
|
410
|
+
|
|
411
|
+
# Limit the Info to the requested Attribute
|
|
412
|
+
if attribute_name and attribute_name in info:
|
|
413
|
+
info = info[attribute_name]
|
|
414
|
+
elif attribute_name and attribute_name not in info:
|
|
415
|
+
raise KeyError(
|
|
416
|
+
f"Cannot find Global Metadata for attribute name: {attribute_name}"
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
return info
|
|
420
|
+
|
|
421
|
+
# =========================================================================
|
|
422
|
+
# VARIABLE ATTRIBUTE FUNCTIONS
|
|
423
|
+
# =========================================================================
|
|
424
|
+
|
|
425
|
+
def load_variable_attributes(self, file_path: Path) -> None:
|
|
426
|
+
"""
|
|
427
|
+
Update the variable attributes property with the attributes from the file.
|
|
428
|
+
|
|
429
|
+
Calling this method multiple times on different files will add all the
|
|
430
|
+
attributes from the files, overwriting existing attributes if they are
|
|
431
|
+
duplicated.
|
|
432
|
+
|
|
433
|
+
Parameters
|
|
434
|
+
----------
|
|
435
|
+
file_path : `Path`
|
|
436
|
+
File path to load the variable attributes from.
|
|
437
|
+
"""
|
|
438
|
+
new_variable_layer = CdfAttributeManager._load_yaml_data(file_path)
|
|
439
|
+
self._merge(self._variable_attributes, new_variable_layer)
|
|
440
|
+
|
|
441
|
+
def get_variable_attributes(
|
|
442
|
+
self, variable_name: str, check_schema: bool = True
|
|
443
|
+
) -> dict:
|
|
444
|
+
"""
|
|
445
|
+
Get the attributes for a given variable name.
|
|
446
|
+
|
|
447
|
+
It retrieves the variable from previously loaded variable definition files and
|
|
448
|
+
validates against the defined variable schemas.
|
|
449
|
+
|
|
450
|
+
Parameters
|
|
451
|
+
----------
|
|
452
|
+
variable_name : str
|
|
453
|
+
The name of the variable to retrieve attributes for.
|
|
454
|
+
|
|
455
|
+
check_schema : bool
|
|
456
|
+
Flag to bypass schema validation.
|
|
457
|
+
|
|
458
|
+
Returns
|
|
459
|
+
-------
|
|
460
|
+
dict
|
|
461
|
+
Information containing specific variable attributes
|
|
462
|
+
associated with "variable_name".
|
|
463
|
+
"""
|
|
464
|
+
# Case to handle attributes not in schema
|
|
465
|
+
if check_schema is False:
|
|
466
|
+
if variable_name in self._variable_attributes:
|
|
467
|
+
return_dict: dict = self._variable_attributes[variable_name]
|
|
468
|
+
return return_dict
|
|
469
|
+
# TODO: throw an error?
|
|
470
|
+
return {}
|
|
471
|
+
|
|
472
|
+
output = dict()
|
|
473
|
+
for attr_name in self.variable_attribute_schema["attribute_key"]:
|
|
474
|
+
# Standard case
|
|
475
|
+
if attr_name in self._variable_attributes[variable_name]:
|
|
476
|
+
output[attr_name] = self._variable_attributes[variable_name][attr_name]
|
|
477
|
+
# Case to handle DEPEND_i schema issues
|
|
478
|
+
elif attr_name == "DEPEND_i":
|
|
479
|
+
# DEFAULT_0 is not required, UNLESS we are dealing with
|
|
480
|
+
# variable_name = epoch
|
|
481
|
+
# Find all the attributes of variable_name that contain "DEPEND"
|
|
482
|
+
variable_depend_attrs = [
|
|
483
|
+
key
|
|
484
|
+
for key in self._variable_attributes[variable_name].keys()
|
|
485
|
+
if "DEPEND" in key
|
|
486
|
+
]
|
|
487
|
+
# Confirm that each DEPEND_i attribute is unique
|
|
488
|
+
if len(set(variable_depend_attrs)) != len(variable_depend_attrs):
|
|
489
|
+
logging.warning(
|
|
490
|
+
f"Found duplicate DEPEND_i attribute in variable "
|
|
491
|
+
f"{variable_name}: {variable_depend_attrs}"
|
|
492
|
+
)
|
|
493
|
+
for variable_depend_attr in variable_depend_attrs:
|
|
494
|
+
output[variable_depend_attr] = self._variable_attributes[
|
|
495
|
+
variable_name
|
|
496
|
+
][variable_depend_attr]
|
|
497
|
+
# TODO: Add more DEPEND_0 variable checks!
|
|
498
|
+
# Case to handle LABL_PTR_i schema issues
|
|
499
|
+
elif attr_name == "LABL_PTR_i":
|
|
500
|
+
# Find all the attributes of variable_name that contain "LABL_PTR"
|
|
501
|
+
variable_labl_attrs = [
|
|
502
|
+
key
|
|
503
|
+
for key in self._variable_attributes[variable_name].keys()
|
|
504
|
+
if "LABL_PTR" in key
|
|
505
|
+
]
|
|
506
|
+
for variable_labl_attr in variable_labl_attrs:
|
|
507
|
+
output[variable_labl_attr] = self._variable_attributes[
|
|
508
|
+
variable_name
|
|
509
|
+
][variable_labl_attr]
|
|
510
|
+
# Case to handle REPRESENTATION_i schema issues
|
|
511
|
+
elif attr_name == "REPRESENTATION_i":
|
|
512
|
+
# Find all the attributes of variable_name that contain
|
|
513
|
+
# "REPRESENTATION_i"
|
|
514
|
+
variable_rep_attrs = [
|
|
515
|
+
key
|
|
516
|
+
for key in self._variable_attributes[variable_name].keys()
|
|
517
|
+
if "REPRESENTATION" in key
|
|
518
|
+
]
|
|
519
|
+
for variable_rep_attr in variable_rep_attrs:
|
|
520
|
+
output[variable_rep_attr] = self._variable_attributes[
|
|
521
|
+
variable_name
|
|
522
|
+
][variable_rep_attr]
|
|
523
|
+
# Validating required schema
|
|
524
|
+
elif (
|
|
525
|
+
self.variable_attribute_schema["attribute_key"][attr_name]["required"]
|
|
526
|
+
and attr_name not in self._variable_attributes[variable_name]
|
|
527
|
+
):
|
|
528
|
+
logging.warning(
|
|
529
|
+
"Required schema '"
|
|
530
|
+
+ attr_name
|
|
531
|
+
+ "' attribute not present for "
|
|
532
|
+
+ variable_name
|
|
533
|
+
)
|
|
534
|
+
output[attr_name] = ""
|
|
535
|
+
|
|
536
|
+
return output
|
|
537
|
+
|
|
538
|
+
def variable_attribute_template(self) -> dict:
|
|
539
|
+
"""
|
|
540
|
+
Function to generate a template of required variable attributes
|
|
541
|
+
that must be set for a valid data file.
|
|
542
|
+
|
|
543
|
+
Returns
|
|
544
|
+
-------
|
|
545
|
+
template: `dict`
|
|
546
|
+
A template for required variable attributes that must be provided.
|
|
547
|
+
"""
|
|
548
|
+
template = {}
|
|
549
|
+
for attr_name, attr_schema in self.variable_attribute_schema[
|
|
550
|
+
"attribute_key"
|
|
551
|
+
].items():
|
|
552
|
+
if attr_schema["required"]:
|
|
553
|
+
template[attr_name] = None
|
|
554
|
+
return template
|
|
555
|
+
|
|
556
|
+
def variable_attribute_info(self, attribute_name: Optional[str] = None) -> dict:
|
|
557
|
+
"""
|
|
558
|
+
Function to generate a `dict` of information about each variable
|
|
559
|
+
metadata attribute. The `dict` contains all information in the SWxSOC
|
|
560
|
+
variable attribute schema including:
|
|
561
|
+
|
|
562
|
+
- description: (`str`) A brief description of the attribute
|
|
563
|
+
- required: (`bool`) Whether the attribute is required by SWxSOC standards
|
|
564
|
+
- valid_values: (`str`) List of allowed values the attribute can take for SWxSOC products,
|
|
565
|
+
if applicable
|
|
566
|
+
- alternate: (`str`) An additional attribute name that can be treated as an alternative
|
|
567
|
+
of the given attribute. Not all attributes have an alternative and only one of a given
|
|
568
|
+
attribute or its alternate are required.
|
|
569
|
+
- var_types: (`str`) A list of the variable types that require the given
|
|
570
|
+
attribute to be present.
|
|
571
|
+
|
|
572
|
+
Parameters
|
|
573
|
+
----------
|
|
574
|
+
attribute_name : `str`, optional, default None
|
|
575
|
+
The name of the attribute to get specific information for.
|
|
576
|
+
|
|
577
|
+
Returns
|
|
578
|
+
-------
|
|
579
|
+
info: `dict`
|
|
580
|
+
information about variable metadata
|
|
581
|
+
|
|
582
|
+
Raises
|
|
583
|
+
------
|
|
584
|
+
KeyError: If attribute_name is not a recognized variable attribute.
|
|
585
|
+
"""
|
|
586
|
+
info = self.variable_attribute_schema["attribute_key"].copy()
|
|
587
|
+
|
|
588
|
+
# Strip the Description of New Lines
|
|
589
|
+
for attr_name in info.keys():
|
|
590
|
+
info[attr_name]["description"] = info[attr_name]["description"].strip()
|
|
591
|
+
|
|
592
|
+
# Create New Column to describe which VAR_TYPE's require the given attribute
|
|
593
|
+
for attr_name in info.keys():
|
|
594
|
+
# Create a new list to store the var types
|
|
595
|
+
info[attr_name]["var_types"] = []
|
|
596
|
+
for var_type in ["data", "support_data", "metadata"]:
|
|
597
|
+
# If the attribute is required for the given var type
|
|
598
|
+
if attr_name in self.variable_attribute_schema[var_type]:
|
|
599
|
+
info[attr_name]["var_types"].append(var_type)
|
|
600
|
+
# Convert the list to a string that can be written to a CSV from the table
|
|
601
|
+
info[attr_name]["var_types"] = ", ".join(info[attr_name]["var_types"])
|
|
602
|
+
|
|
603
|
+
# Limit the Info to the requested Attribute
|
|
604
|
+
if attribute_name and attribute_name in info:
|
|
605
|
+
info = info[attribute_name]
|
|
606
|
+
elif attribute_name and attribute_name not in info:
|
|
607
|
+
raise KeyError(
|
|
608
|
+
f"Cannot find Variable Metadata for attribute name: {attribute_name}"
|
|
609
|
+
)
|
|
610
|
+
|
|
611
|
+
return info
|
sammi/data/README.rst
ADDED