dapla-toolbelt-metadata 0.8.4__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dapla-toolbelt-metadata might be problematic. Click here for more details.
- dapla_metadata/datasets/code_list.py +1 -1
- dapla_metadata/datasets/compatibility/__init__.py +10 -0
- dapla_metadata/datasets/compatibility/_handlers.py +363 -0
- dapla_metadata/datasets/compatibility/_utils.py +259 -0
- dapla_metadata/datasets/compatibility/model_backwards_compatibility.py +135 -0
- dapla_metadata/datasets/core.py +2 -6
- dapla_metadata/datasets/utility/constants.py +1 -6
- dapla_metadata/datasets/utility/utils.py +7 -18
- {dapla_toolbelt_metadata-0.8.4.dist-info → dapla_toolbelt_metadata-0.9.0.dist-info}/METADATA +30 -36
- {dapla_toolbelt_metadata-0.8.4.dist-info → dapla_toolbelt_metadata-0.9.0.dist-info}/RECORD +27 -24
- {dapla_toolbelt_metadata-0.8.4.dist-info → dapla_toolbelt_metadata-0.9.0.dist-info}/WHEEL +1 -1
- dapla_metadata/datasets/model_backwards_compatibility.py +0 -626
- {dapla_toolbelt_metadata-0.8.4.dist-info → dapla_toolbelt_metadata-0.9.0.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,626 +0,0 @@
|
|
|
1
|
-
"""Upgrade old metadata files to be compatible with new versions.
|
|
2
|
-
|
|
3
|
-
An important principle of Datadoc is that we ALWAYS guarantee backwards
|
|
4
|
-
compatibility of existing metadata documents. This means that we guarantee
|
|
5
|
-
that a user will never lose data, even if their document is decades old.
|
|
6
|
-
|
|
7
|
-
For each document version we release with breaking changes, we implement a
|
|
8
|
-
handler and register the version by defining a BackwardsCompatibleVersion
|
|
9
|
-
instance. These documents will then be upgraded when they're opened in Datadoc.
|
|
10
|
-
|
|
11
|
-
A test must also be implemented for each new version.
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
from __future__ import annotations
|
|
15
|
-
|
|
16
|
-
import logging
|
|
17
|
-
from collections import OrderedDict
|
|
18
|
-
from dataclasses import dataclass
|
|
19
|
-
from datetime import datetime
|
|
20
|
-
from datetime import timezone
|
|
21
|
-
from typing import TYPE_CHECKING
|
|
22
|
-
from typing import Any
|
|
23
|
-
|
|
24
|
-
import arrow
|
|
25
|
-
|
|
26
|
-
logger = logging.getLogger(__name__)
|
|
27
|
-
|
|
28
|
-
if TYPE_CHECKING:
|
|
29
|
-
from collections.abc import Callable
|
|
30
|
-
|
|
31
|
-
VERSION_FIELD_NAME = "document_version"
|
|
32
|
-
PSEUDONYMIZATION_KEY = "pseudonymization"
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
class UnknownModelVersionError(Exception):
|
|
36
|
-
"""Exception raised for unknown model versions.
|
|
37
|
-
|
|
38
|
-
This error is thrown when an unrecognized model version is encountered.
|
|
39
|
-
"""
|
|
40
|
-
|
|
41
|
-
def __init__(
|
|
42
|
-
self,
|
|
43
|
-
supplied_version: str,
|
|
44
|
-
*args: tuple[Any, ...],
|
|
45
|
-
) -> None:
|
|
46
|
-
"""Initialize the exception with the supplied version.
|
|
47
|
-
|
|
48
|
-
Args:
|
|
49
|
-
supplied_version: The version of the model that was not recognized.
|
|
50
|
-
*args: Additional arguments for the Exception base class.
|
|
51
|
-
"""
|
|
52
|
-
super().__init__(args)
|
|
53
|
-
self.supplied_version = supplied_version
|
|
54
|
-
|
|
55
|
-
def __str__(self) -> str:
|
|
56
|
-
"""Return string representation."""
|
|
57
|
-
return f"Document Version ({self.supplied_version}) of discovered file is not supported"
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
SUPPORTED_VERSIONS: OrderedDict[str, BackwardsCompatibleVersion] = OrderedDict()
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
@dataclass()
|
|
64
|
-
class BackwardsCompatibleVersion:
|
|
65
|
-
"""A version which we support with backwards compatibility.
|
|
66
|
-
|
|
67
|
-
This class registers a version and its corresponding handler function
|
|
68
|
-
for backwards compatibility.
|
|
69
|
-
"""
|
|
70
|
-
|
|
71
|
-
version: str
|
|
72
|
-
handler: Callable[[dict[str, Any]], dict[str, Any]]
|
|
73
|
-
|
|
74
|
-
def __post_init__(self) -> None:
|
|
75
|
-
"""Register this version in the supported versions map.
|
|
76
|
-
|
|
77
|
-
This method adds the instance to the `SUPPORTED_VERSIONS` dictionary
|
|
78
|
-
using the version as the key.
|
|
79
|
-
"""
|
|
80
|
-
SUPPORTED_VERSIONS[self.version] = self
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
def handle_current_version(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
|
|
84
|
-
"""Handle the current version of the metadata.
|
|
85
|
-
|
|
86
|
-
This function returns the supplied metadata unmodified.
|
|
87
|
-
|
|
88
|
-
Args:
|
|
89
|
-
supplied_metadata: The metadata for the current version.
|
|
90
|
-
|
|
91
|
-
Returns:
|
|
92
|
-
The unmodified supplied metadata.
|
|
93
|
-
"""
|
|
94
|
-
return supplied_metadata
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
def _find_and_update_language_strings(supplied_metadata: dict | None) -> dict | None:
|
|
98
|
-
"""Find and update language-specific strings in the supplied metadata.
|
|
99
|
-
|
|
100
|
-
This function iterates through the supplied metadata dictionary.
|
|
101
|
-
For each key-value pair, if the value is a dictionary containing "en"
|
|
102
|
-
it is passed to the `_convert_language_string_type` function to potentially
|
|
103
|
-
update its format.
|
|
104
|
-
|
|
105
|
-
Args:
|
|
106
|
-
supplied_metadata: A metadata dictionary where values may include nested
|
|
107
|
-
dictionaries with language-specific strings.
|
|
108
|
-
|
|
109
|
-
Returns:
|
|
110
|
-
The updated metadata dictionary. If the supplied metadata is not a
|
|
111
|
-
dictionary, it returns `None`.
|
|
112
|
-
"""
|
|
113
|
-
if isinstance(supplied_metadata, dict):
|
|
114
|
-
for key, value in supplied_metadata.items():
|
|
115
|
-
if isinstance(value, dict) and "en" in value:
|
|
116
|
-
supplied_metadata[key] = _convert_language_string_type(value)
|
|
117
|
-
return supplied_metadata
|
|
118
|
-
return None
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
def _convert_language_string_type(supplied_value: dict) -> list[dict[str, str]]:
|
|
122
|
-
"""Convert a dictionary of language-specific strings to a list of dictionaries.
|
|
123
|
-
|
|
124
|
-
This function takes a dictionary with language codes as keys and
|
|
125
|
-
corresponding language-specific strings as values, and converts it to a list
|
|
126
|
-
of dictionaries with 'languageCode' and 'languageText' keys.
|
|
127
|
-
|
|
128
|
-
Args:
|
|
129
|
-
supplied_value: A dictionary containing language codes as keys and
|
|
130
|
-
language strings as values.
|
|
131
|
-
|
|
132
|
-
Returns:
|
|
133
|
-
A list of dictionaries, each containing 'languageCode' and 'languageText'
|
|
134
|
-
keys, representing the converted language strings.
|
|
135
|
-
"""
|
|
136
|
-
return [
|
|
137
|
-
{
|
|
138
|
-
"languageCode": "en",
|
|
139
|
-
"languageText": supplied_value["en"],
|
|
140
|
-
},
|
|
141
|
-
{
|
|
142
|
-
"languageCode": "nn",
|
|
143
|
-
"languageText": supplied_value["nn"],
|
|
144
|
-
},
|
|
145
|
-
{
|
|
146
|
-
"languageCode": "nb",
|
|
147
|
-
"languageText": supplied_value["nb"],
|
|
148
|
-
},
|
|
149
|
-
]
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
def _remove_element_from_model(
|
|
153
|
-
supplied_metadata: dict[str, Any],
|
|
154
|
-
element_to_remove: str,
|
|
155
|
-
) -> None:
|
|
156
|
-
"""Remove an element from the supplied metadata dictionary.
|
|
157
|
-
|
|
158
|
-
This function deletes a specified element from the supplied metadata dictionary
|
|
159
|
-
if it exists.
|
|
160
|
-
|
|
161
|
-
Args:
|
|
162
|
-
supplied_metadata: The metadata dictionary from which the element will be
|
|
163
|
-
removed.
|
|
164
|
-
element_to_remove: The key of the element to be removed from the metadata
|
|
165
|
-
dictionary.
|
|
166
|
-
"""
|
|
167
|
-
supplied_metadata.pop(element_to_remove, None)
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
def _cast_to_date_type(value_to_update: str | None) -> str | None:
|
|
171
|
-
"""Convert a string to a date string in ISO format.
|
|
172
|
-
|
|
173
|
-
This function takes a string representing a date and converts it to a
|
|
174
|
-
date string in ISO format. If the input is `None`, it returns `None` without
|
|
175
|
-
modification.
|
|
176
|
-
|
|
177
|
-
Args:
|
|
178
|
-
value_to_update: A string representing a date or `None`.
|
|
179
|
-
|
|
180
|
-
Returns:
|
|
181
|
-
The date string in ISO format if the input was a valid date string, or
|
|
182
|
-
`None` if the input was `None`.
|
|
183
|
-
"""
|
|
184
|
-
if value_to_update is None:
|
|
185
|
-
return value_to_update
|
|
186
|
-
|
|
187
|
-
return str(
|
|
188
|
-
arrow.get(
|
|
189
|
-
value_to_update,
|
|
190
|
-
).date(),
|
|
191
|
-
)
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
def convert_is_personal_data(supplied_metadata: dict[str, Any]) -> None:
|
|
195
|
-
"""Convert 'is_personal_data' values in the supplied metadata to boolean.
|
|
196
|
-
|
|
197
|
-
Iterates over variables in the supplied metadata and updates the
|
|
198
|
-
'is_personal_data' field:
|
|
199
|
-
- Sets it to True for NON_PSEUDONYMISED_ENCRYPTED_PERSONAL_DATA and PSEUDONYMISED_ENCRYPTED_PERSONAL_DATA.
|
|
200
|
-
- Sets it to False for NOT_PERSONAL_DATA.
|
|
201
|
-
|
|
202
|
-
Args:
|
|
203
|
-
supplied_metadata: The metadata dictionary to be updated.
|
|
204
|
-
"""
|
|
205
|
-
for variable in supplied_metadata["datadoc"]["variables"]:
|
|
206
|
-
value = variable["is_personal_data"]
|
|
207
|
-
if value in (
|
|
208
|
-
"NON_PSEUDONYMISED_ENCRYPTED_PERSONAL_DATA",
|
|
209
|
-
"PSEUDONYMISED_ENCRYPTED_PERSONAL_DATA",
|
|
210
|
-
):
|
|
211
|
-
variable["is_personal_data"] = True
|
|
212
|
-
elif value == "NOT_PERSONAL_DATA":
|
|
213
|
-
variable["is_personal_data"] = False
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
def copy_pseudonymization_metadata(supplied_metadata: dict[str, Any]) -> None:
|
|
217
|
-
"""Copies pseudonymization metadata from the old pseudonymization section into the corresponding variable.
|
|
218
|
-
|
|
219
|
-
For each variable in `supplied_metadata["datadoc"]["variables"]` that has a matching
|
|
220
|
-
`short_name` in `supplied_metadata["pseudonymization"]["pseudo_variables"]`, this
|
|
221
|
-
function copies the following fields into the variable's 'pseudonymization' dictionary:
|
|
222
|
-
|
|
223
|
-
- stable_identifier_type
|
|
224
|
-
- stable_identifier_version
|
|
225
|
-
- encryption_algorithm
|
|
226
|
-
- encryption_key_reference
|
|
227
|
-
- encryption_algorithm_parameters
|
|
228
|
-
|
|
229
|
-
From the pseudo_dataset the value dataset_pseudo_time is copied to each variable as pseudonymization_time.
|
|
230
|
-
|
|
231
|
-
Args:
|
|
232
|
-
supplied_metadata: The metadata dictionary to be updated.
|
|
233
|
-
"""
|
|
234
|
-
pseudo_vars = supplied_metadata.get(PSEUDONYMIZATION_KEY, {}).get(
|
|
235
|
-
"pseudo_variables", []
|
|
236
|
-
)
|
|
237
|
-
pseudo_dataset = (
|
|
238
|
-
supplied_metadata.get(PSEUDONYMIZATION_KEY, {}).get("pseudo_dataset") or {}
|
|
239
|
-
)
|
|
240
|
-
pseudo_time = pseudo_dataset.get("dataset_pseudo_time", None)
|
|
241
|
-
datadoc_vars = supplied_metadata.get("datadoc", {}).get("variables", [])
|
|
242
|
-
pseudo_lookup = {var.get("short_name"): var for var in pseudo_vars}
|
|
243
|
-
|
|
244
|
-
for variable in datadoc_vars:
|
|
245
|
-
short_name = variable.get("short_name")
|
|
246
|
-
if short_name in pseudo_lookup:
|
|
247
|
-
pseudo_var = pseudo_lookup[short_name]
|
|
248
|
-
variable[PSEUDONYMIZATION_KEY] = variable.get(
|
|
249
|
-
PSEUDONYMIZATION_KEY, {}
|
|
250
|
-
).copy()
|
|
251
|
-
|
|
252
|
-
for field in [
|
|
253
|
-
"stable_identifier_type",
|
|
254
|
-
"stable_identifier_version",
|
|
255
|
-
"encryption_algorithm",
|
|
256
|
-
"encryption_key_reference",
|
|
257
|
-
"encryption_algorithm_parameters",
|
|
258
|
-
]:
|
|
259
|
-
variable[PSEUDONYMIZATION_KEY][field] = pseudo_var[field]
|
|
260
|
-
variable[PSEUDONYMIZATION_KEY]["pseudonymization_time"] = pseudo_time
|
|
261
|
-
|
|
262
|
-
else:
|
|
263
|
-
variable[PSEUDONYMIZATION_KEY] = None
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
def handle_version_4_0_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
|
|
267
|
-
"""Handle breaking changes for version 5.0.1.
|
|
268
|
-
|
|
269
|
-
This function modifies the supplied metadata to accommodate breaking changes
|
|
270
|
-
introduced in version 5.0.1. Specifically, it:
|
|
271
|
-
- Copies pseudonymization metadata if pseudonymization is enabled.
|
|
272
|
-
- Converts the 'is_personal_data' fields to be a bool.
|
|
273
|
-
- Updates the 'document_version' field in the 'datadoc' section to "5.0.1".
|
|
274
|
-
- All 'pseudonymization' from the container is removed.
|
|
275
|
-
- It also updates the container version to 1.0.0 from 0.0.1
|
|
276
|
-
|
|
277
|
-
Args:
|
|
278
|
-
supplied_metadata: The metadata dictionary to be updated.
|
|
279
|
-
|
|
280
|
-
Returns:
|
|
281
|
-
The updated metadata dictionary.
|
|
282
|
-
"""
|
|
283
|
-
if supplied_metadata.get(PSEUDONYMIZATION_KEY):
|
|
284
|
-
copy_pseudonymization_metadata(supplied_metadata)
|
|
285
|
-
|
|
286
|
-
convert_is_personal_data(supplied_metadata)
|
|
287
|
-
|
|
288
|
-
supplied_metadata["datadoc"]["document_version"] = "5.0.1"
|
|
289
|
-
_remove_element_from_model(supplied_metadata, PSEUDONYMIZATION_KEY)
|
|
290
|
-
supplied_metadata["document_version"] = "1.0.0"
|
|
291
|
-
return supplied_metadata
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
def handle_version_3_3_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
|
|
295
|
-
"""Handle breaking changes for version 3.3.0.
|
|
296
|
-
|
|
297
|
-
This function modifies the supplied metadata to accommodate breaking changes
|
|
298
|
-
introduced in version 4.0.0. Specifically, it removes the
|
|
299
|
-
'direct_person_identifying' field from each variable in 'datadoc.variables'
|
|
300
|
-
and updates the 'document_version' field to "4.0.0".
|
|
301
|
-
|
|
302
|
-
Version 4.0.0 used an enum for is_personal_data, however this was changed to a bool again for version 5.0.1.
|
|
303
|
-
We skip setting the enum here and just keep the value it has.
|
|
304
|
-
|
|
305
|
-
Args:
|
|
306
|
-
supplied_metadata: The metadata dictionary to be updated.
|
|
307
|
-
|
|
308
|
-
Returns:
|
|
309
|
-
The updated metadata dictionary.
|
|
310
|
-
"""
|
|
311
|
-
for variable in supplied_metadata["datadoc"]["variables"]:
|
|
312
|
-
variable["is_personal_data"] = variable["direct_person_identifying"]
|
|
313
|
-
_remove_element_from_model(variable, "direct_person_identifying")
|
|
314
|
-
|
|
315
|
-
supplied_metadata["datadoc"]["document_version"] = "4.0.0"
|
|
316
|
-
return supplied_metadata
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
def handle_version_3_2_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
|
|
320
|
-
"""Handle breaking changes for version 3.2.0.
|
|
321
|
-
|
|
322
|
-
This function modifies the supplied metadata to accommodate breaking
|
|
323
|
-
changes introduced in version 3.3.0. Specifically, it updates the
|
|
324
|
-
'contains_data_from' and 'contains_data_until' fields in both the 'dataset'
|
|
325
|
-
and 'variables' sections of the supplied metadata dictionary to ensure they
|
|
326
|
-
are stored as date strings.
|
|
327
|
-
It also updates the 'document_version' field to "3.3.0".
|
|
328
|
-
|
|
329
|
-
Args:
|
|
330
|
-
supplied_metadata: The metadata dictionary to be updated.
|
|
331
|
-
|
|
332
|
-
Returns:
|
|
333
|
-
The updated metadata dictionary.
|
|
334
|
-
"""
|
|
335
|
-
fields = ["contains_data_from", "contains_data_until"]
|
|
336
|
-
for field in fields:
|
|
337
|
-
supplied_metadata["datadoc"]["dataset"][field] = _cast_to_date_type(
|
|
338
|
-
supplied_metadata["datadoc"]["dataset"].get(field, None),
|
|
339
|
-
)
|
|
340
|
-
for v in supplied_metadata["datadoc"]["variables"]:
|
|
341
|
-
v[field] = _cast_to_date_type(v.get(field, None))
|
|
342
|
-
|
|
343
|
-
supplied_metadata["datadoc"]["document_version"] = "3.3.0"
|
|
344
|
-
return supplied_metadata
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
def handle_version_3_1_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
|
|
348
|
-
"""Handle breaking changes for version 3.1.0.
|
|
349
|
-
|
|
350
|
-
This function modifies the supplied metadata to accommodate breaking
|
|
351
|
-
changes introduced in version 3.2.0. Specifically, it updates the
|
|
352
|
-
'data_source' field in both the 'dataset' and 'variables' sections of the
|
|
353
|
-
supplied metadata dictionary by converting value to string.
|
|
354
|
-
The 'document_version' field is also updated to "3.2.0".
|
|
355
|
-
|
|
356
|
-
Args:
|
|
357
|
-
supplied_metadata: The metadata dictionary to be updated.
|
|
358
|
-
|
|
359
|
-
Returns:
|
|
360
|
-
The updated metadata dictionary.
|
|
361
|
-
"""
|
|
362
|
-
data = supplied_metadata["datadoc"]["dataset"]["data_source"]
|
|
363
|
-
|
|
364
|
-
if data is not None:
|
|
365
|
-
supplied_metadata["datadoc"]["dataset"]["data_source"] = str(
|
|
366
|
-
data[0]["languageText"],
|
|
367
|
-
)
|
|
368
|
-
|
|
369
|
-
for i in range(len(supplied_metadata["datadoc"]["variables"])):
|
|
370
|
-
data = supplied_metadata["datadoc"]["variables"][i]["data_source"]
|
|
371
|
-
if data is not None:
|
|
372
|
-
supplied_metadata["datadoc"]["variables"][i]["data_source"] = str(
|
|
373
|
-
data[0]["languageText"],
|
|
374
|
-
)
|
|
375
|
-
|
|
376
|
-
supplied_metadata["datadoc"]["document_version"] = "3.2.0"
|
|
377
|
-
return supplied_metadata
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
def handle_version_2_2_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
|
|
381
|
-
"""Handle breaking changes for version 2.2.0.
|
|
382
|
-
|
|
383
|
-
This function modifies the supplied metadata to accommodate breaking changes
|
|
384
|
-
introduced in version 3.1.0. Specifically, it updates the 'subject_field' in
|
|
385
|
-
the 'dataset' section of the supplied metadata dictionary by converting it to
|
|
386
|
-
a string. It also removes the 'register_uri' field from the 'dataset'.
|
|
387
|
-
Additionally, it removes 'sentinel_value_uri' from each variable,
|
|
388
|
-
sets 'special_value' and 'custom_type' fields to None, and updates
|
|
389
|
-
language strings in the 'variables' and 'dataset' sections.
|
|
390
|
-
The 'document_version' is updated to "3.1.0".
|
|
391
|
-
|
|
392
|
-
Args:
|
|
393
|
-
supplied_metadata: The metadata dictionary to be updated.
|
|
394
|
-
|
|
395
|
-
Returns:
|
|
396
|
-
The updated metadata dictionary.
|
|
397
|
-
"""
|
|
398
|
-
if supplied_metadata["datadoc"]["dataset"]["subject_field"] is not None:
|
|
399
|
-
data = supplied_metadata["datadoc"]["dataset"]["subject_field"]
|
|
400
|
-
supplied_metadata["datadoc"]["dataset"]["subject_field"] = str(
|
|
401
|
-
data["nb"] or data["nn"] or data["en"],
|
|
402
|
-
)
|
|
403
|
-
|
|
404
|
-
_remove_element_from_model(supplied_metadata["datadoc"]["dataset"], "register_uri")
|
|
405
|
-
|
|
406
|
-
for i in range(len(supplied_metadata["datadoc"]["variables"])):
|
|
407
|
-
_remove_element_from_model(
|
|
408
|
-
supplied_metadata["datadoc"]["variables"][i],
|
|
409
|
-
"sentinel_value_uri",
|
|
410
|
-
)
|
|
411
|
-
supplied_metadata["datadoc"]["variables"][i]["special_value"] = None
|
|
412
|
-
supplied_metadata["datadoc"]["variables"][i]["custom_type"] = None
|
|
413
|
-
supplied_metadata["datadoc"]["variables"][i] = (
|
|
414
|
-
_find_and_update_language_strings(
|
|
415
|
-
supplied_metadata["datadoc"]["variables"][i],
|
|
416
|
-
)
|
|
417
|
-
)
|
|
418
|
-
supplied_metadata["datadoc"]["dataset"]["custom_type"] = None
|
|
419
|
-
supplied_metadata["datadoc"]["dataset"] = _find_and_update_language_strings(
|
|
420
|
-
supplied_metadata["datadoc"]["dataset"],
|
|
421
|
-
)
|
|
422
|
-
supplied_metadata["datadoc"]["document_version"] = "3.1.0"
|
|
423
|
-
return supplied_metadata
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
def add_container(existing_metadata: dict) -> dict:
|
|
427
|
-
"""Add container for previous versions.
|
|
428
|
-
|
|
429
|
-
Adds a container structure for previous versions of metadata.
|
|
430
|
-
This function wraps the existing metadata in a new container structure
|
|
431
|
-
that includes the 'document_version', 'datadoc', and 'pseudonymization'
|
|
432
|
-
fields. The 'document_version' is set to "0.0.1" and 'pseudonymization'
|
|
433
|
-
is set to None.
|
|
434
|
-
|
|
435
|
-
Args:
|
|
436
|
-
existing_metadata: The original metadata dictionary to be wrapped.
|
|
437
|
-
|
|
438
|
-
Returns:
|
|
439
|
-
A new dictionary containing the wrapped metadata with additional fields.
|
|
440
|
-
"""
|
|
441
|
-
return {
|
|
442
|
-
"document_version": "0.0.1",
|
|
443
|
-
"datadoc": existing_metadata,
|
|
444
|
-
"pseudonymization": None,
|
|
445
|
-
}
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
def handle_version_2_1_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
|
|
449
|
-
"""Handle breaking changes for version 2.1.0.
|
|
450
|
-
|
|
451
|
-
This function modifies the supplied metadata to accommodate breaking changes
|
|
452
|
-
introduced in version 2.2.0. Specifically, it updates the 'owner' field in
|
|
453
|
-
the 'dataset' section of the supplied metadata dictionary by converting it
|
|
454
|
-
from a LanguageStringType to a string.
|
|
455
|
-
The 'document_version' is updated to "2.2.0".
|
|
456
|
-
|
|
457
|
-
Args:
|
|
458
|
-
supplied_metadata: The metadata dictionary to be updated.
|
|
459
|
-
|
|
460
|
-
Returns:
|
|
461
|
-
The updated metadata dictionary.
|
|
462
|
-
"""
|
|
463
|
-
data = supplied_metadata["dataset"]["owner"]
|
|
464
|
-
supplied_metadata["dataset"]["owner"] = str(data["nb"] or data["nn"] or data["en"])
|
|
465
|
-
supplied_metadata["document_version"] = "2.2.0"
|
|
466
|
-
return add_container(supplied_metadata)
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
def handle_version_1_0_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
|
|
470
|
-
"""Handle breaking changes for version 1.0.0.
|
|
471
|
-
|
|
472
|
-
This function modifies the supplied metadata to accommodate breaking changes
|
|
473
|
-
introduced in version 2.1.0. Specifically, it updates the date fields
|
|
474
|
-
'metadata_created_date' and 'metadata_last_updated_date' to ISO 8601 format
|
|
475
|
-
with UTC timezone. It also converts the 'data_source' field from a string to a
|
|
476
|
-
dictionary with language keys if necessary and removes the 'data_source_path'
|
|
477
|
-
field.
|
|
478
|
-
The 'document_version' is updated to "2.1.0".
|
|
479
|
-
|
|
480
|
-
Args:
|
|
481
|
-
supplied_metadata: The metadata dictionary to be updated.
|
|
482
|
-
|
|
483
|
-
Returns:
|
|
484
|
-
The updated metadata dictionary.
|
|
485
|
-
|
|
486
|
-
"""
|
|
487
|
-
datetime_fields = [("metadata_created_date"), ("metadata_last_updated_date")]
|
|
488
|
-
for field in datetime_fields:
|
|
489
|
-
if supplied_metadata["dataset"][field]:
|
|
490
|
-
supplied_metadata["dataset"][field] = datetime.isoformat(
|
|
491
|
-
datetime.fromisoformat(supplied_metadata["dataset"][field]).astimezone(
|
|
492
|
-
tz=timezone.utc,
|
|
493
|
-
),
|
|
494
|
-
timespec="seconds",
|
|
495
|
-
)
|
|
496
|
-
if isinstance(supplied_metadata["dataset"]["data_source"], str):
|
|
497
|
-
supplied_metadata["dataset"]["data_source"] = {
|
|
498
|
-
"en": supplied_metadata["dataset"]["data_source"],
|
|
499
|
-
"nn": "",
|
|
500
|
-
"nb": "",
|
|
501
|
-
}
|
|
502
|
-
|
|
503
|
-
_remove_element_from_model(supplied_metadata["dataset"], "data_source_path")
|
|
504
|
-
|
|
505
|
-
supplied_metadata["document_version"] = "2.1.0"
|
|
506
|
-
return supplied_metadata
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
def handle_version_0_1_1(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
|
|
510
|
-
"""Handle breaking changes for version 0.1.1.
|
|
511
|
-
|
|
512
|
-
This function modifies the supplied metadata to accommodate breaking changes
|
|
513
|
-
introduced in version 1.0.0. Specifically, it renames certain keys within the
|
|
514
|
-
`dataset` and `variables` sections, and replaces empty string values with
|
|
515
|
-
`None` for `dataset` keys.
|
|
516
|
-
|
|
517
|
-
Args:
|
|
518
|
-
supplied_metadata: The metadata dictionary that needs to be updated.
|
|
519
|
-
|
|
520
|
-
Returns:
|
|
521
|
-
The updated metadata dictionary.
|
|
522
|
-
|
|
523
|
-
References:
|
|
524
|
-
PR ref: https://github.com/statisticsnorway/ssb-datadoc-model/pull/4
|
|
525
|
-
"""
|
|
526
|
-
key_renaming = [
|
|
527
|
-
("metadata_created_date", "created_date"),
|
|
528
|
-
("metadata_created_by", "created_by"),
|
|
529
|
-
("metadata_last_updated_date", "last_updated_date"),
|
|
530
|
-
("metadata_last_updated_by", "last_updated_by"),
|
|
531
|
-
]
|
|
532
|
-
for new_key, old_key in key_renaming:
|
|
533
|
-
supplied_metadata["dataset"][new_key] = supplied_metadata["dataset"].pop(
|
|
534
|
-
old_key,
|
|
535
|
-
)
|
|
536
|
-
# Replace empty strings with None, empty strings are not valid for LanguageStrings values
|
|
537
|
-
supplied_metadata["dataset"] = {
|
|
538
|
-
k: None if v == "" else v for k, v in supplied_metadata["dataset"].items()
|
|
539
|
-
}
|
|
540
|
-
|
|
541
|
-
key_renaming = [("data_type", "datatype")]
|
|
542
|
-
|
|
543
|
-
for i in range(len(supplied_metadata["variables"])):
|
|
544
|
-
for new_key, old_key in key_renaming:
|
|
545
|
-
supplied_metadata["variables"][i][new_key] = supplied_metadata["variables"][
|
|
546
|
-
i
|
|
547
|
-
].pop(
|
|
548
|
-
old_key,
|
|
549
|
-
)
|
|
550
|
-
|
|
551
|
-
return supplied_metadata
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
# Register all the supported versions and their handlers.
|
|
555
|
-
# MUST be ordered from oldest to newest.
|
|
556
|
-
BackwardsCompatibleVersion(version="0.1.1", handler=handle_version_0_1_1)
|
|
557
|
-
BackwardsCompatibleVersion(version="1.0.0", handler=handle_version_1_0_0)
|
|
558
|
-
BackwardsCompatibleVersion(
|
|
559
|
-
version="2.1.0",
|
|
560
|
-
handler=handle_version_2_1_0,
|
|
561
|
-
) # A container must be created at this version
|
|
562
|
-
BackwardsCompatibleVersion(version="2.2.0", handler=handle_version_2_2_0)
|
|
563
|
-
BackwardsCompatibleVersion(version="3.1.0", handler=handle_version_3_1_0)
|
|
564
|
-
BackwardsCompatibleVersion(version="3.2.0", handler=handle_version_3_2_0)
|
|
565
|
-
BackwardsCompatibleVersion(version="3.3.0", handler=handle_version_3_3_0)
|
|
566
|
-
BackwardsCompatibleVersion(version="4.0.0", handler=handle_version_4_0_0)
|
|
567
|
-
BackwardsCompatibleVersion(version="5.0.1", handler=handle_current_version)
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
def upgrade_metadata(fresh_metadata: dict[str, Any]) -> dict[str, Any]:
|
|
571
|
-
"""Upgrade the metadata to the latest version using registered handlers.
|
|
572
|
-
|
|
573
|
-
This function checks the version of the provided metadata and applies a series
|
|
574
|
-
of upgrade handlers to migrate the metadata to the latest version.
|
|
575
|
-
It starts from the provided version and applies all subsequent handlers in
|
|
576
|
-
sequence. If the metadata is already in the latest version or the version
|
|
577
|
-
cannot be determined, appropriate actions are taken.
|
|
578
|
-
|
|
579
|
-
Args:
|
|
580
|
-
fresh_metadata: The metadata dictionary to be upgraded. This dictionary
|
|
581
|
-
must include version information that determines which handlers to apply.
|
|
582
|
-
|
|
583
|
-
Returns:
|
|
584
|
-
The upgraded metadata dictionary, after applying all necessary handlers.
|
|
585
|
-
|
|
586
|
-
Raises:
|
|
587
|
-
UnknownModelVersionError: If the metadata's version is unknown or unsupported.
|
|
588
|
-
"""
|
|
589
|
-
# Special case for current version, we expose the current_model_version parameter for test purposes
|
|
590
|
-
if is_metadata_in_container_structure(fresh_metadata):
|
|
591
|
-
if fresh_metadata["datadoc"] is None:
|
|
592
|
-
return fresh_metadata
|
|
593
|
-
supplied_version = fresh_metadata["datadoc"][VERSION_FIELD_NAME]
|
|
594
|
-
else:
|
|
595
|
-
supplied_version = fresh_metadata[VERSION_FIELD_NAME]
|
|
596
|
-
start_running_handlers = False
|
|
597
|
-
# Run all the handlers in order from the supplied version onwards
|
|
598
|
-
for k, v in SUPPORTED_VERSIONS.items():
|
|
599
|
-
if k == supplied_version:
|
|
600
|
-
start_running_handlers = True
|
|
601
|
-
if start_running_handlers:
|
|
602
|
-
fresh_metadata = v.handler(fresh_metadata)
|
|
603
|
-
if not start_running_handlers:
|
|
604
|
-
raise UnknownModelVersionError(supplied_version)
|
|
605
|
-
return fresh_metadata
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
def is_metadata_in_container_structure(
|
|
609
|
-
metadata: dict,
|
|
610
|
-
) -> bool:
|
|
611
|
-
"""Check if the metadata is in the container structure.
|
|
612
|
-
|
|
613
|
-
At a certain point a metadata 'container' was introduced.
|
|
614
|
-
The container provides a structure for different 'types' of metadata, such as
|
|
615
|
-
'datadoc', 'pseudonymization' etc.
|
|
616
|
-
This function determines if the given metadata dictionary follows this container
|
|
617
|
-
structure by checking for the presence of the 'datadoc' field.
|
|
618
|
-
|
|
619
|
-
Args:
|
|
620
|
-
metadata: The metadata dictionary to check.
|
|
621
|
-
|
|
622
|
-
Returns:
|
|
623
|
-
True if the metadata is in the container structure (i.e., contains the
|
|
624
|
-
'datadoc' field), False otherwise.
|
|
625
|
-
"""
|
|
626
|
-
return "datadoc" in metadata
|
{dapla_toolbelt_metadata-0.8.4.dist-info → dapla_toolbelt_metadata-0.9.0.dist-info/licenses}/LICENSE
RENAMED
|
File without changes
|