dapla-toolbelt-metadata 0.2.1__py3-none-any.whl → 0.9.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dapla-toolbelt-metadata might be problematic. Click here for more details.

Files changed (97) hide show
  1. dapla_metadata/__init__.py +11 -1
  2. dapla_metadata/_shared/__init__.py +1 -0
  3. dapla_metadata/_shared/config.py +109 -0
  4. dapla_metadata/_shared/enums.py +27 -0
  5. dapla_metadata/_shared/py.typed +0 -0
  6. dapla_metadata/dapla/__init__.py +4 -0
  7. dapla_metadata/dapla/user_info.py +138 -0
  8. dapla_metadata/datasets/__init__.py +1 -1
  9. dapla_metadata/datasets/_merge.py +333 -0
  10. dapla_metadata/datasets/code_list.py +5 -6
  11. dapla_metadata/datasets/compatibility/__init__.py +10 -0
  12. dapla_metadata/datasets/compatibility/_handlers.py +363 -0
  13. dapla_metadata/datasets/compatibility/_utils.py +259 -0
  14. dapla_metadata/datasets/compatibility/model_backwards_compatibility.py +135 -0
  15. dapla_metadata/datasets/core.py +136 -182
  16. dapla_metadata/datasets/dapla_dataset_path_info.py +145 -19
  17. dapla_metadata/datasets/dataset_parser.py +41 -28
  18. dapla_metadata/datasets/model_validation.py +29 -20
  19. dapla_metadata/datasets/statistic_subject_mapping.py +5 -1
  20. dapla_metadata/datasets/utility/constants.py +22 -15
  21. dapla_metadata/datasets/utility/enums.py +8 -20
  22. dapla_metadata/datasets/utility/urn.py +234 -0
  23. dapla_metadata/datasets/utility/utils.py +183 -111
  24. dapla_metadata/standards/__init__.py +4 -0
  25. dapla_metadata/standards/name_validator.py +250 -0
  26. dapla_metadata/standards/standard_validators.py +98 -0
  27. dapla_metadata/standards/utils/__init__.py +1 -0
  28. dapla_metadata/standards/utils/constants.py +49 -0
  29. dapla_metadata/variable_definitions/__init__.py +11 -0
  30. dapla_metadata/variable_definitions/_generated/.openapi-generator/FILES +20 -0
  31. dapla_metadata/variable_definitions/_generated/.openapi-generator/VERSION +1 -0
  32. dapla_metadata/variable_definitions/_generated/.openapi-generator-ignore +6 -0
  33. dapla_metadata/variable_definitions/_generated/README.md +148 -0
  34. dapla_metadata/variable_definitions/_generated/__init__.py +0 -0
  35. dapla_metadata/variable_definitions/_generated/vardef_client/__init__.py +47 -0
  36. dapla_metadata/variable_definitions/_generated/vardef_client/api/__init__.py +8 -0
  37. dapla_metadata/variable_definitions/_generated/vardef_client/api/data_migration_api.py +766 -0
  38. dapla_metadata/variable_definitions/_generated/vardef_client/api/draft_variable_definitions_api.py +888 -0
  39. dapla_metadata/variable_definitions/_generated/vardef_client/api/patches_api.py +888 -0
  40. dapla_metadata/variable_definitions/_generated/vardef_client/api/validity_periods_api.py +583 -0
  41. dapla_metadata/variable_definitions/_generated/vardef_client/api/variable_definitions_api.py +613 -0
  42. dapla_metadata/variable_definitions/_generated/vardef_client/api_client.py +779 -0
  43. dapla_metadata/variable_definitions/_generated/vardef_client/api_response.py +27 -0
  44. dapla_metadata/variable_definitions/_generated/vardef_client/configuration.py +474 -0
  45. dapla_metadata/variable_definitions/_generated/vardef_client/docs/CompleteResponse.md +51 -0
  46. dapla_metadata/variable_definitions/_generated/vardef_client/docs/Contact.md +30 -0
  47. dapla_metadata/variable_definitions/_generated/vardef_client/docs/DataMigrationApi.md +90 -0
  48. dapla_metadata/variable_definitions/_generated/vardef_client/docs/Draft.md +42 -0
  49. dapla_metadata/variable_definitions/_generated/vardef_client/docs/DraftVariableDefinitionsApi.md +259 -0
  50. dapla_metadata/variable_definitions/_generated/vardef_client/docs/LanguageStringType.md +31 -0
  51. dapla_metadata/variable_definitions/_generated/vardef_client/docs/Owner.md +31 -0
  52. dapla_metadata/variable_definitions/_generated/vardef_client/docs/Patch.md +43 -0
  53. dapla_metadata/variable_definitions/_generated/vardef_client/docs/PatchesApi.md +249 -0
  54. dapla_metadata/variable_definitions/_generated/vardef_client/docs/PublicApi.md +218 -0
  55. dapla_metadata/variable_definitions/_generated/vardef_client/docs/SupportedLanguages.md +15 -0
  56. dapla_metadata/variable_definitions/_generated/vardef_client/docs/UpdateDraft.md +44 -0
  57. dapla_metadata/variable_definitions/_generated/vardef_client/docs/ValidityPeriod.md +42 -0
  58. dapla_metadata/variable_definitions/_generated/vardef_client/docs/ValidityPeriodsApi.md +236 -0
  59. dapla_metadata/variable_definitions/_generated/vardef_client/docs/VariableDefinitionsApi.md +304 -0
  60. dapla_metadata/variable_definitions/_generated/vardef_client/docs/VariableStatus.md +17 -0
  61. dapla_metadata/variable_definitions/_generated/vardef_client/exceptions.py +193 -0
  62. dapla_metadata/variable_definitions/_generated/vardef_client/models/__init__.py +31 -0
  63. dapla_metadata/variable_definitions/_generated/vardef_client/models/complete_response.py +260 -0
  64. dapla_metadata/variable_definitions/_generated/vardef_client/models/contact.py +94 -0
  65. dapla_metadata/variable_definitions/_generated/vardef_client/models/draft.py +228 -0
  66. dapla_metadata/variable_definitions/_generated/vardef_client/models/get_vardok_vardef_mapping_by_id200_response.py +158 -0
  67. dapla_metadata/variable_definitions/_generated/vardef_client/models/language_string_type.py +101 -0
  68. dapla_metadata/variable_definitions/_generated/vardef_client/models/owner.py +87 -0
  69. dapla_metadata/variable_definitions/_generated/vardef_client/models/patch.py +244 -0
  70. dapla_metadata/variable_definitions/_generated/vardef_client/models/problem.py +118 -0
  71. dapla_metadata/variable_definitions/_generated/vardef_client/models/update_draft.py +274 -0
  72. dapla_metadata/variable_definitions/_generated/vardef_client/models/validity_period.py +225 -0
  73. dapla_metadata/variable_definitions/_generated/vardef_client/models/vardok_id_response.py +81 -0
  74. dapla_metadata/variable_definitions/_generated/vardef_client/models/vardok_vardef_id_pair_response.py +84 -0
  75. dapla_metadata/variable_definitions/_generated/vardef_client/models/variable_status.py +33 -0
  76. dapla_metadata/variable_definitions/_generated/vardef_client/py.typed +0 -0
  77. dapla_metadata/variable_definitions/_generated/vardef_client/rest.py +249 -0
  78. dapla_metadata/variable_definitions/_utils/__init__.py +1 -0
  79. dapla_metadata/variable_definitions/_utils/_client.py +32 -0
  80. dapla_metadata/variable_definitions/_utils/config.py +54 -0
  81. dapla_metadata/variable_definitions/_utils/constants.py +80 -0
  82. dapla_metadata/variable_definitions/_utils/files.py +309 -0
  83. dapla_metadata/variable_definitions/_utils/template_files.py +99 -0
  84. dapla_metadata/variable_definitions/_utils/variable_definition_files.py +143 -0
  85. dapla_metadata/variable_definitions/exceptions.py +255 -0
  86. dapla_metadata/variable_definitions/vardef.py +372 -0
  87. dapla_metadata/variable_definitions/vardok_id.py +48 -0
  88. dapla_metadata/variable_definitions/vardok_vardef_id_pair.py +47 -0
  89. dapla_metadata/variable_definitions/variable_definition.py +422 -0
  90. {dapla_toolbelt_metadata-0.2.1.dist-info → dapla_toolbelt_metadata-0.9.11.dist-info}/METADATA +34 -36
  91. dapla_toolbelt_metadata-0.9.11.dist-info/RECORD +97 -0
  92. {dapla_toolbelt_metadata-0.2.1.dist-info → dapla_toolbelt_metadata-0.9.11.dist-info}/WHEEL +1 -1
  93. dapla_metadata/datasets/config.py +0 -80
  94. dapla_metadata/datasets/model_backwards_compatibility.py +0 -520
  95. dapla_metadata/datasets/user_info.py +0 -88
  96. dapla_toolbelt_metadata-0.2.1.dist-info/RECORD +0 -22
  97. {dapla_toolbelt_metadata-0.2.1.dist-info → dapla_toolbelt_metadata-0.9.11.dist-info/licenses}/LICENSE +0 -0
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.9.0
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,80 +0,0 @@
1
- """Configuration management for dataset package."""
2
-
3
- from __future__ import annotations
4
-
5
- import logging
6
- import os
7
- from pathlib import Path
8
- from pprint import pformat
9
-
10
- from dotenv import dotenv_values
11
- from dotenv import load_dotenv
12
-
13
- from dapla_metadata.datasets.utility.constants import (
14
- DATADOC_STATISTICAL_SUBJECT_SOURCE_URL,
15
- )
16
- from dapla_metadata.datasets.utility.enums import DaplaRegion
17
- from dapla_metadata.datasets.utility.enums import DaplaService
18
-
19
- logger = logging.getLogger(__name__)
20
-
21
- DOT_ENV_FILE_PATH = Path(__file__).parent.joinpath(".env")
22
-
23
- JUPYTERHUB_USER = "JUPYTERHUB_USER"
24
- DAPLA_REGION = "DAPLA_REGION"
25
- DAPLA_SERVICE = "DAPLA_SERVICE"
26
-
27
- env_loaded = False
28
-
29
-
30
- def _load_dotenv_file() -> None:
31
- global env_loaded # noqa: PLW0603
32
- if not env_loaded and DOT_ENV_FILE_PATH.exists():
33
- load_dotenv(DOT_ENV_FILE_PATH)
34
- env_loaded = True
35
- logger.info(
36
- "Loaded .env file with config keys: \n%s",
37
- pformat(list(dotenv_values(DOT_ENV_FILE_PATH).keys())),
38
- )
39
-
40
-
41
- def _get_config_item(item: str) -> str | None:
42
- """Get a config item. Makes sure all access is logged."""
43
- _load_dotenv_file()
44
- value = os.getenv(item)
45
- logger.debug("Config accessed. %s", item)
46
- return value
47
-
48
-
49
- def get_jupyterhub_user() -> str | None:
50
- """Get the JupyterHub user name."""
51
- return _get_config_item(JUPYTERHUB_USER)
52
-
53
-
54
- def get_statistical_subject_source_url() -> str | None:
55
- """Get the URL to the statistical subject source."""
56
- return (
57
- _get_config_item("DATADOC_STATISTICAL_SUBJECT_SOURCE_URL")
58
- or DATADOC_STATISTICAL_SUBJECT_SOURCE_URL
59
- )
60
-
61
-
62
- def get_dapla_region() -> DaplaRegion | None:
63
- """Get the Dapla region we're running on."""
64
- if region := _get_config_item(DAPLA_REGION):
65
- return DaplaRegion(region)
66
-
67
- return None
68
-
69
-
70
- def get_dapla_service() -> DaplaService | None:
71
- """Get the Dapla service we're running on."""
72
- if service := _get_config_item(DAPLA_SERVICE):
73
- return DaplaService(service)
74
-
75
- return None
76
-
77
-
78
- def get_oidc_token() -> str | None:
79
- """Get the JWT token from the environment."""
80
- return _get_config_item("OIDC_TOKEN")
@@ -1,520 +0,0 @@
1
- """Upgrade old metadata files to be compatible with new versions.
2
-
3
- An important principle of Datadoc is that we ALWAYS guarantee backwards
4
- compatibility of existing metadata documents. This means that we guarantee
5
- that a user will never lose data, even if their document is decades old.
6
-
7
- For each document version we release with breaking changes, we implement a
8
- handler and register the version by defining a BackwardsCompatibleVersion
9
- instance. These documents will then be upgraded when they're opened in Datadoc.
10
-
11
- A test must also be implemented for each new version.
12
- """
13
-
14
- from __future__ import annotations
15
-
16
- from collections import OrderedDict
17
- from dataclasses import dataclass
18
- from datetime import datetime
19
- from datetime import timezone
20
- from typing import TYPE_CHECKING
21
- from typing import Any
22
-
23
- import arrow
24
-
25
- if TYPE_CHECKING:
26
- from collections.abc import Callable
27
-
28
- VERSION_FIELD_NAME = "document_version"
29
-
30
-
31
- class UnknownModelVersionError(Exception):
32
- """Exception raised for unknown model versions.
33
-
34
- This error is thrown when an unrecognized model version is encountered.
35
- """
36
-
37
- def __init__(
38
- self,
39
- supplied_version: str,
40
- *args: tuple[Any, ...],
41
- ) -> None:
42
- """Initialize the exception with the supplied version.
43
-
44
- Args:
45
- supplied_version: The version of the model that was not recognized.
46
- *args: Additional arguments for the Exception base class.
47
- """
48
- super().__init__(args)
49
- self.supplied_version = supplied_version
50
-
51
- def __str__(self) -> str:
52
- """Return string representation."""
53
- return f"Document Version ({self.supplied_version}) of discovered file is not supported"
54
-
55
-
56
- SUPPORTED_VERSIONS: OrderedDict[str, BackwardsCompatibleVersion] = OrderedDict()
57
-
58
-
59
- @dataclass()
60
- class BackwardsCompatibleVersion:
61
- """A version which we support with backwards compatibility.
62
-
63
- This class registers a version and its corresponding handler function
64
- for backwards compatibility.
65
- """
66
-
67
- version: str
68
- handler: Callable[[dict[str, Any]], dict[str, Any]]
69
-
70
- def __post_init__(self) -> None:
71
- """Register this version in the supported versions map.
72
-
73
- This method adds the instance to the `SUPPORTED_VERSIONS` dictionary
74
- using the version as the key.
75
- """
76
- SUPPORTED_VERSIONS[self.version] = self
77
-
78
-
79
- def handle_current_version(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
80
- """Handle the current version of the metadata.
81
-
82
- This function returns the supplied metadata unmodified.
83
-
84
- Args:
85
- supplied_metadata: The metadata for the current version.
86
-
87
- Returns:
88
- The unmodified supplied metadata.
89
- """
90
- return supplied_metadata
91
-
92
-
93
- def _find_and_update_language_strings(supplied_metadata: dict | None) -> dict | None:
94
- """Find and update language-specific strings in the supplied metadata.
95
-
96
- This function iterates through the supplied metadata dictionary.
97
- For each key-value pair, if the value is a dictionary containing "en"
98
- it is passed to the `_convert_language_string_type` function to potentially
99
- update its format.
100
-
101
- Args:
102
- supplied_metadata: A metadata dictionary where values may include nested
103
- dictionaries with language-specific strings.
104
-
105
- Returns:
106
- The updated metadata dictionary. If the supplied metadata is not a
107
- dictionary, it returns `None`.
108
- """
109
- if isinstance(supplied_metadata, dict):
110
- for key, value in supplied_metadata.items():
111
- if isinstance(value, dict) and "en" in value:
112
- supplied_metadata[key] = _convert_language_string_type(value)
113
- return supplied_metadata
114
- return None
115
-
116
-
117
- def _convert_language_string_type(supplied_value: dict) -> list[dict[str, str]]:
118
- """Convert a dictionary of language-specific strings to a list of dictionaries.
119
-
120
- This function takes a dictionary with language codes as keys and
121
- corresponding language-specific strings as values, and converts it to a list
122
- of dictionaries with 'languageCode' and 'languageText' keys.
123
-
124
- Args:
125
- supplied_value: A dictionary containing language codes as keys and
126
- language strings as values.
127
-
128
- Returns:
129
- A list of dictionaries, each containing 'languageCode' and 'languageText'
130
- keys, representing the converted language strings.
131
- """
132
- return [
133
- {
134
- "languageCode": "en",
135
- "languageText": supplied_value["en"],
136
- },
137
- {
138
- "languageCode": "nn",
139
- "languageText": supplied_value["nn"],
140
- },
141
- {
142
- "languageCode": "nb",
143
- "languageText": supplied_value["nb"],
144
- },
145
- ]
146
-
147
-
148
- def _remove_element_from_model(
149
- supplied_metadata: dict[str, Any],
150
- element_to_remove: str,
151
- ) -> None:
152
- """Remove an element from the supplied metadata dictionary.
153
-
154
- This function deletes a specified element from the supplied metadata dictionary
155
- if it exists.
156
-
157
- Args:
158
- supplied_metadata: The metadata dictionary from which the element will be
159
- removed.
160
- element_to_remove: The key of the element to be removed from the metadata
161
- dictionary.
162
- """
163
- if element_to_remove in supplied_metadata:
164
- del supplied_metadata[element_to_remove]
165
-
166
-
167
- def _cast_to_date_type(value_to_update: str | None) -> str | None:
168
- """Convert a string to a date string in ISO format.
169
-
170
- This function takes a string representing a date and converts it to a
171
- date string in ISO format. If the input is `None`, it returns `None` without
172
- modification.
173
-
174
- Args:
175
- value_to_update: A string representing a date or `None`.
176
-
177
- Returns:
178
- The date string in ISO format if the input was a valid date string, or
179
- `None` if the input was `None`.
180
- """
181
- if value_to_update is None:
182
- return value_to_update
183
-
184
- return str(
185
- arrow.get(
186
- value_to_update,
187
- ).date(),
188
- )
189
-
190
-
191
- def handle_version_3_3_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
192
- """Handle breaking changes for version 3.3.0.
193
-
194
- This function modifies the supplied metadata to accommodate breaking changes
195
- introduced in version 4.0.0. Specifically, it removes the
196
- 'direct_person_identifying' field from each variable in 'datadoc.variables'
197
- and updates the 'document_version' field to "4.0.0".
198
-
199
- Args:
200
- supplied_metadata: The metadata dictionary to be updated.
201
-
202
- Returns:
203
- The updated metadata dictionary.
204
- """
205
- for i in range(len(supplied_metadata["datadoc"]["variables"])):
206
- _remove_element_from_model(
207
- supplied_metadata["datadoc"]["variables"][i],
208
- "direct_person_identifying",
209
- )
210
- supplied_metadata["datadoc"]["document_version"] = "4.0.0"
211
- return supplied_metadata
212
-
213
-
214
- def handle_version_3_2_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
215
- """Handle breaking changes for version 3.2.0.
216
-
217
- This function modifies the supplied metadata to accommodate breaking
218
- changes introduced in version 3.3.0. Specifically, it updates the
219
- 'contains_data_from' and 'contains_data_until' fields in both the 'dataset'
220
- and 'variables' sections of the supplied metadata dictionary to ensure they
221
- are stored as date strings.
222
- It also updates the 'document_version' field to "3.3.0".
223
-
224
- Args:
225
- supplied_metadata: The metadata dictionary to be updated.
226
-
227
- Returns:
228
- The updated metadata dictionary.
229
- """
230
- fields = ["contains_data_from", "contains_data_until"]
231
- for field in fields:
232
- supplied_metadata["datadoc"]["dataset"][field] = _cast_to_date_type(
233
- supplied_metadata["datadoc"]["dataset"].get(field, None),
234
- )
235
- for v in supplied_metadata["datadoc"]["variables"]:
236
- v[field] = _cast_to_date_type(v.get(field, None))
237
-
238
- supplied_metadata["datadoc"]["document_version"] = "3.3.0"
239
- return supplied_metadata
240
-
241
-
242
- def handle_version_3_1_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
243
- """Handle breaking changes for version 3.1.0.
244
-
245
- This function modifies the supplied metadata to accommodate breaking
246
- changes introduced in version 3.2.0. Specifically, it updates the
247
- 'data_source' field in both the 'dataset' and 'variables' sections of the
248
- supplied metadata dictionary by converting value to string.
249
- The 'document_version' field is also updated to "3.2.0".
250
-
251
- Args:
252
- supplied_metadata: The metadata dictionary to be updated.
253
-
254
- Returns:
255
- The updated metadata dictionary.
256
- """
257
- data = supplied_metadata["datadoc"]["dataset"]["data_source"]
258
-
259
- if data is not None:
260
- supplied_metadata["datadoc"]["dataset"]["data_source"] = str(
261
- data[0]["languageText"],
262
- )
263
-
264
- for i in range(len(supplied_metadata["datadoc"]["variables"])):
265
- data = supplied_metadata["datadoc"]["variables"][i]["data_source"]
266
- if data is not None:
267
- supplied_metadata["datadoc"]["variables"][i]["data_source"] = str(
268
- data[0]["languageText"],
269
- )
270
-
271
- supplied_metadata["datadoc"]["document_version"] = "3.2.0"
272
- return supplied_metadata
273
-
274
-
275
- def handle_version_2_2_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
276
- """Handle breaking changes for version 2.2.0.
277
-
278
- This function modifies the supplied metadata to accommodate breaking changes
279
- introduced in version 3.1.0. Specifically, it updates the 'subject_field' in
280
- the 'dataset' section of the supplied metadata dictionary by converting it to
281
- a string. It also removes the 'register_uri' field from the 'dataset'.
282
- Additionally, it removes 'sentinel_value_uri' from each variable,
283
- sets 'special_value' and 'custom_type' fields to None, and updates
284
- language strings in the 'variables' and 'dataset' sections.
285
- The 'document_version' is updated to "3.1.0".
286
-
287
- Args:
288
- supplied_metadata: The metadata dictionary to be updated.
289
-
290
- Returns:
291
- The updated metadata dictionary.
292
- """
293
- if supplied_metadata["datadoc"]["dataset"]["subject_field"] is not None:
294
- data = supplied_metadata["datadoc"]["dataset"]["subject_field"]
295
- supplied_metadata["datadoc"]["dataset"]["subject_field"] = str(
296
- data["nb"] or data["nn"] or data["en"],
297
- )
298
-
299
- _remove_element_from_model(supplied_metadata["datadoc"]["dataset"], "register_uri")
300
-
301
- for i in range(len(supplied_metadata["datadoc"]["variables"])):
302
- _remove_element_from_model(
303
- supplied_metadata["datadoc"]["variables"][i],
304
- "sentinel_value_uri",
305
- )
306
- supplied_metadata["datadoc"]["variables"][i]["special_value"] = None
307
- supplied_metadata["datadoc"]["variables"][i]["custom_type"] = None
308
- supplied_metadata["datadoc"]["variables"][
309
- i
310
- ] = _find_and_update_language_strings(
311
- supplied_metadata["datadoc"]["variables"][i],
312
- )
313
- supplied_metadata["datadoc"]["dataset"]["custom_type"] = None
314
- supplied_metadata["datadoc"]["dataset"] = _find_and_update_language_strings(
315
- supplied_metadata["datadoc"]["dataset"],
316
- )
317
- supplied_metadata["datadoc"]["document_version"] = "3.1.0"
318
- return supplied_metadata
319
-
320
-
321
- def add_container(existing_metadata: dict) -> dict:
322
- """Add container for previous versions.
323
-
324
- Adds a container structure for previous versions of metadata.
325
- This function wraps the existing metadata in a new container structure
326
- that includes the 'document_version', 'datadoc', and 'pseudonymization'
327
- fields. The 'document_version' is set to "0.0.1" and 'pseudonymization'
328
- is set to None.
329
-
330
- Args:
331
- existing_metadata: The original metadata dictionary to be wrapped.
332
-
333
- Returns:
334
- A new dictionary containing the wrapped metadata with additional fields.
335
- """
336
- return {
337
- "document_version": "0.0.1",
338
- "datadoc": existing_metadata,
339
- "pseudonymization": None,
340
- }
341
-
342
-
343
- def handle_version_2_1_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
344
- """Handle breaking changes for version 2.1.0.
345
-
346
- This function modifies the supplied metadata to accommodate breaking changes
347
- introduced in version 2.2.0. Specifically, it updates the 'owner' field in
348
- the 'dataset' section of the supplied metadata dictionary by converting it
349
- from a LanguageStringType to a string.
350
- The 'document_version' is updated to "2.2.0".
351
-
352
- Args:
353
- supplied_metadata: The metadata dictionary to be updated.
354
-
355
- Returns:
356
- The updated metadata dictionary.
357
- """
358
- data = supplied_metadata["dataset"]["owner"]
359
- supplied_metadata["dataset"]["owner"] = str(data["nb"] or data["nn"] or data["en"])
360
- supplied_metadata["document_version"] = "2.2.0"
361
- return add_container(supplied_metadata)
362
-
363
-
364
- def handle_version_1_0_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
365
- """Handle breaking changes for version 1.0.0.
366
-
367
- This function modifies the supplied metadata to accommodate breaking changes
368
- introduced in version 2.1.0. Specifically, it updates the date fields
369
- 'metadata_created_date' and 'metadata_last_updated_date' to ISO 8601 format
370
- with UTC timezone. It also converts the 'data_source' field from a string to a
371
- dictionary with language keys if necessary and removes the 'data_source_path'
372
- field.
373
- The 'document_version' is updated to "2.1.0".
374
-
375
- Args:
376
- supplied_metadata: The metadata dictionary to be updated.
377
-
378
- Returns:
379
- The updated metadata dictionary.
380
-
381
- """
382
- datetime_fields = [("metadata_created_date"), ("metadata_last_updated_date")]
383
- for field in datetime_fields:
384
- if supplied_metadata["dataset"][field]:
385
- supplied_metadata["dataset"][field] = datetime.isoformat(
386
- datetime.fromisoformat(supplied_metadata["dataset"][field]).astimezone(
387
- tz=timezone.utc,
388
- ),
389
- timespec="seconds",
390
- )
391
- if isinstance(supplied_metadata["dataset"]["data_source"], str):
392
- supplied_metadata["dataset"]["data_source"] = {
393
- "en": supplied_metadata["dataset"]["data_source"],
394
- "nn": "",
395
- "nb": "",
396
- }
397
-
398
- _remove_element_from_model(supplied_metadata["dataset"], "data_source_path")
399
-
400
- supplied_metadata["document_version"] = "2.1.0"
401
- return supplied_metadata
402
-
403
-
404
- def handle_version_0_1_1(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
405
- """Handle breaking changes for version 0.1.1.
406
-
407
- This function modifies the supplied metadata to accommodate breaking changes
408
- introduced in version 1.0.0. Specifically, it renames certain keys within the
409
- `dataset` and `variables` sections, and replaces empty string values with
410
- `None` for `dataset` keys.
411
-
412
- Args:
413
- supplied_metadata: The metadata dictionary that needs to be updated.
414
-
415
- Returns:
416
- The updated metadata dictionary.
417
-
418
- References:
419
- PR ref: https://github.com/statisticsnorway/ssb-datadoc-model/pull/4
420
- """
421
- key_renaming = [
422
- ("metadata_created_date", "created_date"),
423
- ("metadata_created_by", "created_by"),
424
- ("metadata_last_updated_date", "last_updated_date"),
425
- ("metadata_last_updated_by", "last_updated_by"),
426
- ]
427
- for new_key, old_key in key_renaming:
428
- supplied_metadata["dataset"][new_key] = supplied_metadata["dataset"].pop(
429
- old_key,
430
- )
431
- # Replace empty strings with None, empty strings are not valid for LanguageStrings values
432
- supplied_metadata["dataset"] = {
433
- k: None if v == "" else v for k, v in supplied_metadata["dataset"].items()
434
- }
435
-
436
- key_renaming = [("data_type", "datatype")]
437
-
438
- for i in range(len(supplied_metadata["variables"])):
439
- for new_key, old_key in key_renaming:
440
- supplied_metadata["variables"][i][new_key] = supplied_metadata["variables"][
441
- i
442
- ].pop(
443
- old_key,
444
- )
445
-
446
- return supplied_metadata
447
-
448
-
449
- # Register all the supported versions and their handlers.
450
- # MUST be ordered from oldest to newest.
451
- BackwardsCompatibleVersion(version="0.1.1", handler=handle_version_0_1_1)
452
- BackwardsCompatibleVersion(version="1.0.0", handler=handle_version_1_0_0)
453
- BackwardsCompatibleVersion(
454
- version="2.1.0",
455
- handler=handle_version_2_1_0,
456
- ) # A container must be created at this version
457
- BackwardsCompatibleVersion(version="2.2.0", handler=handle_version_2_2_0)
458
- BackwardsCompatibleVersion(version="3.1.0", handler=handle_version_3_1_0)
459
- BackwardsCompatibleVersion(version="3.2.0", handler=handle_version_3_2_0)
460
- BackwardsCompatibleVersion(version="3.3.0", handler=handle_version_3_3_0)
461
- BackwardsCompatibleVersion(version="4.0.0", handler=handle_current_version)
462
-
463
-
464
- def upgrade_metadata(fresh_metadata: dict[str, Any]) -> dict[str, Any]:
465
- """Upgrade the metadata to the latest version using registered handlers.
466
-
467
- This function checks the version of the provided metadata and applies a series
468
- of upgrade handlers to migrate the metadata to the latest version.
469
- It starts from the provided version and applies all subsequent handlers in
470
- sequence. If the metadata is already in the latest version or the version
471
- cannot be determined, appropriate actions are taken.
472
-
473
- Args:
474
- fresh_metadata: The metadata dictionary to be upgraded. This dictionary
475
- must include version information that determines which handlers to apply.
476
-
477
- Returns:
478
- The upgraded metadata dictionary, after applying all necessary handlers.
479
-
480
- Raises:
481
- UnknownModelVersionError: If the metadata's version is unknown or unsupported.
482
- """
483
- # Special case for current version, we expose the current_model_version parameter for test purposes
484
- if is_metadata_in_container_structure(fresh_metadata):
485
- if fresh_metadata["datadoc"] is None:
486
- return fresh_metadata
487
- supplied_version = fresh_metadata["datadoc"][VERSION_FIELD_NAME]
488
- else:
489
- supplied_version = fresh_metadata[VERSION_FIELD_NAME]
490
- start_running_handlers = False
491
- # Run all the handlers in order from the supplied version onwards
492
- for k, v in SUPPORTED_VERSIONS.items():
493
- if k == supplied_version:
494
- start_running_handlers = True
495
- if start_running_handlers:
496
- fresh_metadata = v.handler(fresh_metadata)
497
- if not start_running_handlers:
498
- raise UnknownModelVersionError(supplied_version)
499
- return fresh_metadata
500
-
501
-
502
- def is_metadata_in_container_structure(
503
- metadata: dict,
504
- ) -> bool:
505
- """Check if the metadata is in the container structure.
506
-
507
- At a certain point a metadata 'container' was introduced.
508
- The container provides a structure for different 'types' of metadata, such as
509
- 'datadoc', 'pseudonymization' etc.
510
- This function determines if the given metadata dictionary follows this container
511
- structure by checking for the presence of the 'datadoc' field.
512
-
513
- Args:
514
- metadata: The metadata dictionary to check.
515
-
516
- Returns:
517
- True if the metadata is in the container structure (i.e., contains the
518
- 'datadoc' field), False otherwise.
519
- """
520
- return "datadoc" in metadata
@@ -1,88 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import contextlib
4
- import logging
5
- from typing import Protocol
6
-
7
- import jwt
8
-
9
- from dapla_metadata.datasets import config
10
- from dapla_metadata.datasets.utility.enums import DaplaRegion
11
- from dapla_metadata.datasets.utility.enums import DaplaService
12
-
13
- logger = logging.getLogger(__name__)
14
-
15
-
16
- PLACEHOLDER_EMAIL_ADDRESS = "default_user@ssb.no"
17
-
18
-
19
- class UserInfo(Protocol):
20
- """Information about the current user.
21
-
22
- Implementations may be provided for different platforms or testing.
23
- """
24
-
25
- @property
26
- def short_email(self) -> str | None:
27
- """Get the short email address."""
28
- ...
29
-
30
-
31
- class UnknownUserInfo:
32
- """Fallback when no implementation is found."""
33
-
34
- @property
35
- def short_email(self) -> str | None:
36
- """Unknown email address."""
37
- return None
38
-
39
-
40
- class TestUserInfo:
41
- """Information about the current user for local development and testing."""
42
-
43
- @property
44
- def short_email(self) -> str | None:
45
- """Get the short email address."""
46
- return PLACEHOLDER_EMAIL_ADDRESS
47
-
48
-
49
- class DaplaLabUserInfo:
50
- """Information about the current user when running on Dapla Lab."""
51
-
52
- @property
53
- def short_email(self) -> str | None:
54
- """Get the short email address."""
55
- encoded_jwt = config.get_oidc_token()
56
- if encoded_jwt:
57
- # The JWT has been verified by the platform prior to injection, no need to verify.
58
- decoded_jwt = jwt.decode(encoded_jwt, options={"verify_signature": False})
59
- with contextlib.suppress(KeyError):
60
- # If email can't be found in the JWT, fall through and return None
61
- return decoded_jwt["email"]
62
-
63
- logger.warning(
64
- "Could not access JWT from environment. Could not get short email address.",
65
- )
66
- return None
67
-
68
-
69
- class JupyterHubUserInfo:
70
- """Information about the current user when running on JupyterHub."""
71
-
72
- @property
73
- def short_email(self) -> str | None:
74
- """Get the short email address."""
75
- return config.get_jupyterhub_user()
76
-
77
-
78
- def get_user_info_for_current_platform() -> UserInfo:
79
- """Return the correct implementation of UserInfo for the current platform."""
80
- if config.get_dapla_region() == DaplaRegion.DAPLA_LAB:
81
- return DaplaLabUserInfo()
82
- elif config.get_dapla_service() == DaplaService.JUPYTERLAB: # noqa: RET505
83
- return JupyterHubUserInfo()
84
- else:
85
- logger.warning(
86
- "Was not possible to retrieve user information! Some fields may not be set.",
87
- )
88
- return UnknownUserInfo()