dapla-toolbelt-metadata 0.8.5__py3-none-any.whl → 0.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dapla-toolbelt-metadata might be problematic. Click here for more details.

@@ -121,7 +121,7 @@ class CodeList(GetExternalSource):
121
121
  classifications_dataframes[i] = (
122
122
  KlassClassification(
123
123
  str(self.classification_id),
124
- str(i), # type: ignore [arg-type]
124
+ i.lower(), # type: ignore [arg-type]
125
125
  )
126
126
  .get_codes()
127
127
  .data
@@ -0,0 +1,10 @@
1
+ """Model Backwards Compatibility.
2
+
3
+ This package contains code for upgrading existing metadata documents to the newest version of the model.
4
+ This is analogous to a Database Migration where the structure of the data has changed and we wish to
5
+ retain already persisted information.
6
+
7
+ """
8
+
9
+ from ._utils import is_metadata_in_container_structure
10
+ from .model_backwards_compatibility import upgrade_metadata
@@ -0,0 +1,363 @@
1
+ from datetime import datetime
2
+ from datetime import timezone
3
+ from typing import Any
4
+
5
+ from dapla_metadata.datasets.compatibility._utils import DATADOC_KEY
6
+ from dapla_metadata.datasets.compatibility._utils import DATASET_KEY
7
+ from dapla_metadata.datasets.compatibility._utils import DOCUMENT_VERSION_KEY
8
+ from dapla_metadata.datasets.compatibility._utils import PSEUDONYMIZATION_KEY
9
+ from dapla_metadata.datasets.compatibility._utils import VARIABLES_KEY
10
+ from dapla_metadata.datasets.compatibility._utils import add_container
11
+ from dapla_metadata.datasets.compatibility._utils import cast_to_date_type
12
+ from dapla_metadata.datasets.compatibility._utils import convert_datetime_to_date
13
+ from dapla_metadata.datasets.compatibility._utils import convert_is_personal_data
14
+ from dapla_metadata.datasets.compatibility._utils import copy_pseudonymization_metadata
15
+ from dapla_metadata.datasets.compatibility._utils import (
16
+ find_and_update_language_strings,
17
+ )
18
+ from dapla_metadata.datasets.compatibility._utils import remove_element_from_model
19
+
20
+
21
+ def handle_current_version(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
22
+ """Handle the current version of the metadata.
23
+
24
+ This function returns the supplied metadata unmodified.
25
+
26
+ Args:
27
+ supplied_metadata: The metadata for the current version.
28
+
29
+ Returns:
30
+ The unmodified supplied metadata.
31
+ """
32
+ return supplied_metadata
33
+
34
+
35
+ def handle_version_6_0_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
36
+ """Handle breaking changes for version 6.1.0.
37
+
38
+ This function modifies the supplied metadata to accommodate breaking changes
39
+ introduced in version 6.1.0. Specifically, it:
40
+ - Consolidates `use_restriction` and `use_restriction_date` into a list of
41
+ dictionaries under `use_restrictions`.
42
+ - Removes the old `use_restriction` and `use_restriction_date` fields.
43
+ - It also converts `use_restriction_date` from datetime to date.
44
+
45
+ Args:
46
+ supplied_metadata: The metadata dictionary to be updated.
47
+
48
+ Returns:
49
+ The upgraded metadata dictionary.
50
+ """
51
+ dataset = supplied_metadata[DATADOC_KEY][DATASET_KEY]
52
+
53
+ use_restriction = dataset.get("use_restriction")
54
+ if use_restriction is not None:
55
+ converted_date = convert_datetime_to_date(dataset.get("use_restriction_date"))
56
+ dataset["use_restrictions"] = [
57
+ {
58
+ "use_restriction_type": use_restriction,
59
+ "use_restriction_date": converted_date,
60
+ }
61
+ ]
62
+ else:
63
+ dataset["use_restrictions"] = []
64
+
65
+ for field in ("use_restriction", "use_restriction_date"):
66
+ remove_element_from_model(dataset, field)
67
+
68
+ return supplied_metadata
69
+
70
+
71
+ def handle_version_5_0_1(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
72
+ """Handle breaking changes for version 6.0.0.
73
+
74
+ This function modifies the supplied metadata to accommodate breaking changes
75
+ introduced in version 6.0.0. Specifically, it:
76
+ - Moves the following fields from dataset to variable level:
77
+ - contains_personal_data (becomes is_personal_data)
78
+ - unit_type
79
+ - data_source
80
+ - temporality_type
81
+
82
+ Args:
83
+ supplied_metadata: The metadata dictionary to be updated.
84
+
85
+ Returns:
86
+ The updated metadata dictionary.
87
+ """
88
+ fields = [
89
+ ("contains_personal_data", "is_personal_data"),
90
+ ("unit_type", "unit_type"),
91
+ ("data_source", "data_source"),
92
+ ("temporality_type", "temporality_type"),
93
+ ]
94
+
95
+ dataset: dict[str, Any] = supplied_metadata[DATADOC_KEY][DATASET_KEY]
96
+ variables: list[dict[str, Any]] = supplied_metadata[DATADOC_KEY][VARIABLES_KEY]
97
+
98
+ for f in fields:
99
+ dataset_level_field_value = dataset.pop(f[0], None)
100
+ for v in variables:
101
+ if v.get(f[1]) is None:
102
+ # Don't override any set values
103
+ v[f[1]] = dataset_level_field_value
104
+
105
+ return supplied_metadata
106
+
107
+
108
+ def handle_version_4_0_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
109
+ """Handle breaking changes for version 5.0.1.
110
+
111
+ This function modifies the supplied metadata to accommodate breaking changes
112
+ introduced in version 5.0.1. Specifically, it:
113
+ - Copies pseudonymization metadata if pseudonymization is enabled.
114
+ - Converts the 'is_personal_data' fields to be a bool.
115
+ - All 'pseudonymization' from the container is removed.
116
+ - It also updates the container version to 1.0.0 from 0.0.1
117
+
118
+ Args:
119
+ supplied_metadata: The metadata dictionary to be updated.
120
+
121
+ Returns:
122
+ The updated metadata dictionary.
123
+ """
124
+ if supplied_metadata.get(PSEUDONYMIZATION_KEY):
125
+ copy_pseudonymization_metadata(supplied_metadata)
126
+
127
+ convert_is_personal_data(supplied_metadata)
128
+
129
+ remove_element_from_model(supplied_metadata, PSEUDONYMIZATION_KEY)
130
+ supplied_metadata[DOCUMENT_VERSION_KEY] = "1.0.0"
131
+ return supplied_metadata
132
+
133
+
134
+ def handle_version_3_3_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
135
+ """Handle breaking changes for version 3.3.0.
136
+
137
+ This function modifies the supplied metadata to accommodate breaking changes
138
+ introduced in version 4.0.0. Specifically, it removes the
139
+ 'direct_person_identifying' field from each variable in 'datadoc.variables'.
140
+
141
+ Version 4.0.0 used an enum for is_personal_data, however this was changed to a bool again for version 5.0.1.
142
+ We skip setting the enum here and just keep the value it has.
143
+
144
+ Args:
145
+ supplied_metadata: The metadata dictionary to be updated.
146
+
147
+ Returns:
148
+ The updated metadata dictionary.
149
+ """
150
+ for variable in supplied_metadata[DATADOC_KEY][VARIABLES_KEY]:
151
+ variable["is_personal_data"] = variable["direct_person_identifying"]
152
+ remove_element_from_model(variable, "direct_person_identifying")
153
+
154
+ return supplied_metadata
155
+
156
+
157
+ def handle_version_3_2_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
158
+ """Handle breaking changes for version 3.2.0.
159
+
160
+ This function modifies the supplied metadata to accommodate breaking
161
+ changes introduced in version 3.3.0. Specifically, it updates the
162
+ 'contains_data_from' and 'contains_data_until' fields in both the 'dataset'
163
+ and 'variables' sections of the supplied metadata dictionary to ensure they
164
+ are stored as date strings.
165
+
166
+ Args:
167
+ supplied_metadata: The metadata dictionary to be updated.
168
+
169
+ Returns:
170
+ The updated metadata dictionary.
171
+ """
172
+ fields = ["contains_data_from", "contains_data_until"]
173
+ for field in fields:
174
+ supplied_metadata[DATADOC_KEY][DATASET_KEY][field] = cast_to_date_type(
175
+ supplied_metadata[DATADOC_KEY][DATASET_KEY].get(field, None),
176
+ )
177
+ for v in supplied_metadata[DATADOC_KEY][VARIABLES_KEY]:
178
+ v[field] = cast_to_date_type(v.get(field, None))
179
+
180
+ return supplied_metadata
181
+
182
+
183
+ def handle_version_3_1_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
184
+ """Handle breaking changes for version 3.1.0.
185
+
186
+ This function modifies the supplied metadata to accommodate breaking
187
+ changes introduced in version 3.2.0. Specifically, it updates the
188
+ 'data_source' field in both the 'dataset' and 'variables' sections of the
189
+ supplied metadata dictionary by converting value to string.
190
+
191
+ Args:
192
+ supplied_metadata: The metadata dictionary to be updated.
193
+
194
+ Returns:
195
+ The updated metadata dictionary.
196
+ """
197
+ data = supplied_metadata[DATADOC_KEY][DATASET_KEY]["data_source"]
198
+
199
+ if data is not None:
200
+ supplied_metadata[DATADOC_KEY][DATASET_KEY]["data_source"] = str(
201
+ data[0]["languageText"],
202
+ )
203
+
204
+ for i in range(len(supplied_metadata[DATADOC_KEY][VARIABLES_KEY])):
205
+ data = supplied_metadata[DATADOC_KEY][VARIABLES_KEY][i]["data_source"]
206
+ if data is not None:
207
+ supplied_metadata[DATADOC_KEY][VARIABLES_KEY][i]["data_source"] = str(
208
+ data[0]["languageText"],
209
+ )
210
+
211
+ return supplied_metadata
212
+
213
+
214
+ def handle_version_2_2_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
215
+ """Handle breaking changes for version 2.2.0.
216
+
217
+ This function modifies the supplied metadata to accommodate breaking changes
218
+ introduced in version 3.1.0. Specifically, it updates the 'subject_field' in
219
+ the 'dataset' section of the supplied metadata dictionary by converting it to
220
+ a string. It also removes the 'register_uri' field from the 'dataset'.
221
+ Additionally, it removes 'sentinel_value_uri' from each variable,
222
+ sets 'special_value' and 'custom_type' fields to None, and updates
223
+ language strings in the 'variables' and 'dataset' sections.
224
+
225
+ Args:
226
+ supplied_metadata: The metadata dictionary to be updated.
227
+
228
+ Returns:
229
+ The updated metadata dictionary.
230
+ """
231
+ if supplied_metadata[DATADOC_KEY][DATASET_KEY]["subject_field"] is not None:
232
+ data = supplied_metadata[DATADOC_KEY][DATASET_KEY]["subject_field"]
233
+ supplied_metadata[DATADOC_KEY][DATASET_KEY]["subject_field"] = str(
234
+ data["nb"] or data["nn"] or data["en"],
235
+ )
236
+
237
+ remove_element_from_model(
238
+ supplied_metadata[DATADOC_KEY][DATASET_KEY], "register_uri"
239
+ )
240
+
241
+ for i in range(len(supplied_metadata[DATADOC_KEY][VARIABLES_KEY])):
242
+ remove_element_from_model(
243
+ supplied_metadata[DATADOC_KEY][VARIABLES_KEY][i],
244
+ "sentinel_value_uri",
245
+ )
246
+ supplied_metadata[DATADOC_KEY][VARIABLES_KEY][i]["special_value"] = None
247
+ supplied_metadata[DATADOC_KEY][VARIABLES_KEY][i]["custom_type"] = None
248
+ supplied_metadata[DATADOC_KEY][VARIABLES_KEY][i] = (
249
+ find_and_update_language_strings(
250
+ supplied_metadata[DATADOC_KEY][VARIABLES_KEY][i],
251
+ )
252
+ )
253
+ supplied_metadata[DATADOC_KEY][DATASET_KEY]["custom_type"] = None
254
+ supplied_metadata[DATADOC_KEY][DATASET_KEY] = find_and_update_language_strings(
255
+ supplied_metadata[DATADOC_KEY][DATASET_KEY],
256
+ )
257
+ return supplied_metadata
258
+
259
+
260
+ def handle_version_2_1_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
261
+ """Handle breaking changes for version 2.1.0.
262
+
263
+ This function modifies the supplied metadata to accommodate breaking changes
264
+ introduced in version 2.2.0. Specifically, it updates the 'owner' field in
265
+ the 'dataset' section of the supplied metadata dictionary by converting it
266
+ from a LanguageStringType to a string.
267
+
268
+ Args:
269
+ supplied_metadata: The metadata dictionary to be updated.
270
+
271
+ Returns:
272
+ The updated metadata dictionary.
273
+ """
274
+ data = supplied_metadata[DATASET_KEY]["owner"]
275
+ supplied_metadata[DATASET_KEY]["owner"] = str(
276
+ data["nb"] or data["nn"] or data["en"]
277
+ )
278
+ return add_container(supplied_metadata)
279
+
280
+
281
+ def handle_version_1_0_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
282
+ """Handle breaking changes for version 1.0.0.
283
+
284
+ This function modifies the supplied metadata to accommodate breaking changes
285
+ introduced in version 2.1.0. Specifically, it updates the date fields
286
+ 'metadata_created_date' and 'metadata_last_updated_date' to ISO 8601 format
287
+ with UTC timezone. It also converts the 'data_source' field from a string to a
288
+ dictionary with language keys if necessary and removes the 'data_source_path'
289
+ field.
290
+
291
+ Args:
292
+ supplied_metadata: The metadata dictionary to be updated.
293
+
294
+ Returns:
295
+ The updated metadata dictionary.
296
+
297
+ """
298
+ datetime_fields = [("metadata_created_date"), ("metadata_last_updated_date")]
299
+ for field in datetime_fields:
300
+ if supplied_metadata[DATASET_KEY][field]:
301
+ supplied_metadata[DATASET_KEY][field] = datetime.isoformat(
302
+ datetime.fromisoformat(
303
+ supplied_metadata[DATASET_KEY][field]
304
+ ).astimezone(
305
+ tz=timezone.utc,
306
+ ),
307
+ timespec="seconds",
308
+ )
309
+ if isinstance(supplied_metadata[DATASET_KEY]["data_source"], str):
310
+ supplied_metadata[DATASET_KEY]["data_source"] = {
311
+ "en": supplied_metadata[DATASET_KEY]["data_source"],
312
+ "nn": "",
313
+ "nb": "",
314
+ }
315
+
316
+ remove_element_from_model(supplied_metadata[DATASET_KEY], "data_source_path")
317
+
318
+ return supplied_metadata
319
+
320
+
321
+ def handle_version_0_1_1(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
322
+ """Handle breaking changes for version 0.1.1.
323
+
324
+ This function modifies the supplied metadata to accommodate breaking changes
325
+ introduced in version 1.0.0. Specifically, it renames certain keys within the
326
+ `dataset` and `variables` sections, and replaces empty string values with
327
+ `None` for `dataset` keys.
328
+
329
+ Args:
330
+ supplied_metadata: The metadata dictionary that needs to be updated.
331
+
332
+ Returns:
333
+ The updated metadata dictionary.
334
+
335
+ References:
336
+ PR ref: https://github.com/statisticsnorway/ssb-datadoc-model/pull/4
337
+ """
338
+ key_renaming = [
339
+ ("metadata_created_date", "created_date"),
340
+ ("metadata_created_by", "created_by"),
341
+ ("metadata_last_updated_date", "last_updated_date"),
342
+ ("metadata_last_updated_by", "last_updated_by"),
343
+ ]
344
+ for new_key, old_key in key_renaming:
345
+ supplied_metadata[DATASET_KEY][new_key] = supplied_metadata[DATASET_KEY].pop(
346
+ old_key,
347
+ )
348
+ # Replace empty strings with None, empty strings are not valid for LanguageStrings values
349
+ supplied_metadata[DATASET_KEY] = {
350
+ k: None if v == "" else v for k, v in supplied_metadata[DATASET_KEY].items()
351
+ }
352
+
353
+ key_renaming = [("data_type", "datatype")]
354
+
355
+ for i in range(len(supplied_metadata[VARIABLES_KEY])):
356
+ for new_key, old_key in key_renaming:
357
+ supplied_metadata[VARIABLES_KEY][i][new_key] = supplied_metadata[
358
+ VARIABLES_KEY
359
+ ][i].pop(
360
+ old_key,
361
+ )
362
+
363
+ return supplied_metadata
@@ -0,0 +1,259 @@
1
+ from datetime import datetime
2
+ from typing import Any
3
+
4
+ import arrow
5
+
6
+ DOCUMENT_VERSION_KEY = "document_version"
7
+ DATADOC_KEY = "datadoc"
8
+ DATASET_KEY = "dataset"
9
+ VARIABLES_KEY = "variables"
10
+ PSEUDONYMIZATION_KEY = "pseudonymization"
11
+
12
+
13
+ class UnknownModelVersionError(Exception):
14
+ """Exception raised for unknown model versions.
15
+
16
+ This error is thrown when an unrecognized model version is encountered.
17
+ """
18
+
19
+ def __init__(
20
+ self,
21
+ supplied_version: str,
22
+ *args: tuple[Any, ...],
23
+ ) -> None:
24
+ """Initialize the exception with the supplied version.
25
+
26
+ Args:
27
+ supplied_version: The version of the model that was not recognized.
28
+ *args: Additional arguments for the Exception base class.
29
+ """
30
+ super().__init__(args)
31
+ self.supplied_version = supplied_version
32
+
33
+ def __str__(self) -> str:
34
+ """Return string representation."""
35
+ return f"Document Version ({self.supplied_version}) of discovered file is not supported"
36
+
37
+
38
+ def _convert_language_string_type(supplied_value: dict) -> list[dict[str, str]]:
39
+ """Convert a dictionary of language-specific strings to a list of dictionaries.
40
+
41
+ This function takes a dictionary with language codes as keys and
42
+ corresponding language-specific strings as values, and converts it to a list
43
+ of dictionaries with 'languageCode' and 'languageText' keys.
44
+
45
+ Args:
46
+ supplied_value: A dictionary containing language codes as keys and
47
+ language strings as values.
48
+
49
+ Returns:
50
+ A list of dictionaries, each containing 'languageCode' and 'languageText'
51
+ keys, representing the converted language strings.
52
+ """
53
+ return [
54
+ {
55
+ "languageCode": "en",
56
+ "languageText": supplied_value["en"],
57
+ },
58
+ {
59
+ "languageCode": "nn",
60
+ "languageText": supplied_value["nn"],
61
+ },
62
+ {
63
+ "languageCode": "nb",
64
+ "languageText": supplied_value["nb"],
65
+ },
66
+ ]
67
+
68
+
69
+ def find_and_update_language_strings(supplied_metadata: dict | None) -> dict | None:
70
+ """Find and update language-specific strings in the supplied metadata.
71
+
72
+ This function iterates through the supplied metadata dictionary.
73
+ For each key-value pair, if the value is a dictionary containing "en"
74
+ it is passed to the `_convert_language_string_type` function to potentially
75
+ update its format.
76
+
77
+ Args:
78
+ supplied_metadata: A metadata dictionary where values may include nested
79
+ dictionaries with language-specific strings.
80
+
81
+ Returns:
82
+ The updated metadata dictionary. If the supplied metadata is not a
83
+ dictionary, it returns `None`.
84
+ """
85
+ if isinstance(supplied_metadata, dict):
86
+ for key, value in supplied_metadata.items():
87
+ if isinstance(value, dict) and "en" in value:
88
+ supplied_metadata[key] = _convert_language_string_type(value)
89
+ return supplied_metadata
90
+ return None
91
+
92
+
93
+ def remove_element_from_model(
94
+ supplied_metadata: dict[str, Any],
95
+ element_to_remove: str,
96
+ ) -> None:
97
+ """Remove an element from the supplied metadata dictionary.
98
+
99
+ This function deletes a specified element from the supplied metadata dictionary
100
+ if it exists.
101
+
102
+ Args:
103
+ supplied_metadata: The metadata dictionary from which the element will be
104
+ removed.
105
+ element_to_remove: The key of the element to be removed from the metadata
106
+ dictionary.
107
+ """
108
+ supplied_metadata.pop(element_to_remove, None)
109
+
110
+
111
+ def cast_to_date_type(value_to_update: str | None) -> str | None:
112
+ """Convert a string to a date string in ISO format.
113
+
114
+ This function takes a string representing a date and converts it to a
115
+ date string in ISO format. If the input is `None`, it returns `None` without
116
+ modification.
117
+
118
+ Args:
119
+ value_to_update: A string representing a date or `None`.
120
+
121
+ Returns:
122
+ The date string in ISO format if the input was a valid date string, or
123
+ `None` if the input was `None`.
124
+ """
125
+ if value_to_update is None:
126
+ return value_to_update
127
+
128
+ return str(
129
+ arrow.get(
130
+ value_to_update,
131
+ ).date(),
132
+ )
133
+
134
+
135
+ def convert_is_personal_data(supplied_metadata: dict[str, Any]) -> None:
136
+ """Convert 'is_personal_data' values in the supplied metadata to boolean.
137
+
138
+ Iterates over variables in the supplied metadata and updates the
139
+ 'is_personal_data' field:
140
+ - Sets it to True for NON_PSEUDONYMISED_ENCRYPTED_PERSONAL_DATA and PSEUDONYMISED_ENCRYPTED_PERSONAL_DATA.
141
+ - Sets it to False for NOT_PERSONAL_DATA.
142
+
143
+ Args:
144
+ supplied_metadata: The metadata dictionary to be updated.
145
+ """
146
+ for variable in supplied_metadata[DATADOC_KEY][VARIABLES_KEY]:
147
+ value = variable["is_personal_data"]
148
+ if value in (
149
+ "NON_PSEUDONYMISED_ENCRYPTED_PERSONAL_DATA",
150
+ "PSEUDONYMISED_ENCRYPTED_PERSONAL_DATA",
151
+ ):
152
+ variable["is_personal_data"] = True
153
+ elif value == "NOT_PERSONAL_DATA":
154
+ variable["is_personal_data"] = False
155
+
156
+
157
+ def copy_pseudonymization_metadata(supplied_metadata: dict[str, Any]) -> None:
158
+ """Copies pseudonymization metadata from the old pseudonymization section into the corresponding variable.
159
+
160
+ For each variable in `supplied_metadata[DATADOC_KEY][VARIABLES_KEY]` that has a matching
161
+ `short_name` in `supplied_metadata[PSEUDONYMIZATION_KEY]["pseudo_variables"]`, this
162
+ function copies the following fields into the variable's 'pseudonymization' dictionary:
163
+
164
+ - stable_identifier_type
165
+ - stable_identifier_version
166
+ - encryption_algorithm
167
+ - encryption_key_reference
168
+ - encryption_algorithm_parameters
169
+
170
+ From the pseudo_dataset the value dataset_pseudo_time is copied to each variable as pseudonymization_time.
171
+
172
+ Args:
173
+ supplied_metadata: The metadata dictionary to be updated.
174
+ """
175
+ pseudo_vars = supplied_metadata.get(PSEUDONYMIZATION_KEY, {}).get(
176
+ "pseudo_variables", []
177
+ )
178
+ pseudo_dataset = (
179
+ supplied_metadata.get(PSEUDONYMIZATION_KEY, {}).get("pseudo_dataset") or {}
180
+ )
181
+ pseudo_time = pseudo_dataset.get("dataset_pseudo_time", None)
182
+ datadoc_vars = supplied_metadata.get(DATADOC_KEY, {}).get(VARIABLES_KEY, [])
183
+ pseudo_lookup = {var.get("short_name"): var for var in pseudo_vars}
184
+
185
+ for variable in datadoc_vars:
186
+ short_name = variable.get("short_name")
187
+ if short_name in pseudo_lookup:
188
+ pseudo_var = pseudo_lookup[short_name]
189
+ variable[PSEUDONYMIZATION_KEY] = variable.get(
190
+ PSEUDONYMIZATION_KEY, {}
191
+ ).copy()
192
+
193
+ for field in [
194
+ "stable_identifier_type",
195
+ "stable_identifier_version",
196
+ "encryption_algorithm",
197
+ "encryption_key_reference",
198
+ "encryption_algorithm_parameters",
199
+ ]:
200
+ variable[PSEUDONYMIZATION_KEY][field] = pseudo_var[field]
201
+ variable[PSEUDONYMIZATION_KEY]["pseudonymization_time"] = pseudo_time
202
+
203
+ else:
204
+ variable[PSEUDONYMIZATION_KEY] = None
205
+
206
+
207
+ def convert_datetime_to_date(date_value: str | None) -> str | None:
208
+ """Convert ISO datetime string to date string, handling None and invalid values."""
209
+ if not date_value or not isinstance(date_value, str):
210
+ return date_value
211
+
212
+ try:
213
+ dt = datetime.fromisoformat(date_value.replace("Z", "+00:00"))
214
+ return dt.date().isoformat()
215
+ except ValueError:
216
+ return date_value
217
+
218
+
219
+ def add_container(existing_metadata: dict) -> dict:
220
+ """Add container for previous versions.
221
+
222
+ Adds a container structure for previous versions of metadata.
223
+ This function wraps the existing metadata in a new container structure
224
+ that includes the 'document_version', 'datadoc', and 'pseudonymization'
225
+ fields. The 'document_version' is set to "0.0.1" and 'pseudonymization'
226
+ is set to None.
227
+
228
+ Args:
229
+ existing_metadata: The original metadata dictionary to be wrapped.
230
+
231
+ Returns:
232
+ A new dictionary containing the wrapped metadata with additional fields.
233
+ """
234
+ return {
235
+ DOCUMENT_VERSION_KEY: "0.0.1",
236
+ DATADOC_KEY: existing_metadata,
237
+ PSEUDONYMIZATION_KEY: None,
238
+ }
239
+
240
+
241
+ def is_metadata_in_container_structure(
242
+ metadata: dict,
243
+ ) -> bool:
244
+ """Check if the metadata is in the container structure.
245
+
246
+ At a certain point a metadata 'container' was introduced.
247
+ The container provides a structure for different 'types' of metadata, such as
248
+ 'datadoc', 'pseudonymization' etc.
249
+ This function determines if the given metadata dictionary follows this container
250
+ structure by checking for the presence of the 'datadoc' field.
251
+
252
+ Args:
253
+ metadata: The metadata dictionary to check.
254
+
255
+ Returns:
256
+ True if the metadata is in the container structure (i.e., contains the
257
+ 'datadoc' field), False otherwise.
258
+ """
259
+ return DATADOC_KEY in metadata