dapla-toolbelt-metadata 0.2.1__py3-none-any.whl → 0.9.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dapla-toolbelt-metadata might be problematic. Click here for more details.
- dapla_metadata/__init__.py +11 -1
- dapla_metadata/_shared/__init__.py +1 -0
- dapla_metadata/_shared/config.py +109 -0
- dapla_metadata/_shared/enums.py +27 -0
- dapla_metadata/_shared/py.typed +0 -0
- dapla_metadata/dapla/__init__.py +4 -0
- dapla_metadata/dapla/user_info.py +138 -0
- dapla_metadata/datasets/__init__.py +1 -1
- dapla_metadata/datasets/_merge.py +333 -0
- dapla_metadata/datasets/code_list.py +5 -6
- dapla_metadata/datasets/compatibility/__init__.py +10 -0
- dapla_metadata/datasets/compatibility/_handlers.py +363 -0
- dapla_metadata/datasets/compatibility/_utils.py +259 -0
- dapla_metadata/datasets/compatibility/model_backwards_compatibility.py +135 -0
- dapla_metadata/datasets/core.py +136 -182
- dapla_metadata/datasets/dapla_dataset_path_info.py +145 -19
- dapla_metadata/datasets/dataset_parser.py +41 -28
- dapla_metadata/datasets/model_validation.py +29 -20
- dapla_metadata/datasets/statistic_subject_mapping.py +5 -1
- dapla_metadata/datasets/utility/constants.py +22 -15
- dapla_metadata/datasets/utility/enums.py +8 -20
- dapla_metadata/datasets/utility/urn.py +234 -0
- dapla_metadata/datasets/utility/utils.py +183 -111
- dapla_metadata/standards/__init__.py +4 -0
- dapla_metadata/standards/name_validator.py +250 -0
- dapla_metadata/standards/standard_validators.py +98 -0
- dapla_metadata/standards/utils/__init__.py +1 -0
- dapla_metadata/standards/utils/constants.py +49 -0
- dapla_metadata/variable_definitions/__init__.py +11 -0
- dapla_metadata/variable_definitions/_generated/.openapi-generator/FILES +20 -0
- dapla_metadata/variable_definitions/_generated/.openapi-generator/VERSION +1 -0
- dapla_metadata/variable_definitions/_generated/.openapi-generator-ignore +6 -0
- dapla_metadata/variable_definitions/_generated/README.md +148 -0
- dapla_metadata/variable_definitions/_generated/__init__.py +0 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/__init__.py +47 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/api/__init__.py +8 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/api/data_migration_api.py +766 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/api/draft_variable_definitions_api.py +888 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/api/patches_api.py +888 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/api/validity_periods_api.py +583 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/api/variable_definitions_api.py +613 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/api_client.py +779 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/api_response.py +27 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/configuration.py +474 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/docs/CompleteResponse.md +51 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/docs/Contact.md +30 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/docs/DataMigrationApi.md +90 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/docs/Draft.md +42 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/docs/DraftVariableDefinitionsApi.md +259 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/docs/LanguageStringType.md +31 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/docs/Owner.md +31 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/docs/Patch.md +43 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/docs/PatchesApi.md +249 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/docs/PublicApi.md +218 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/docs/SupportedLanguages.md +15 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/docs/UpdateDraft.md +44 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/docs/ValidityPeriod.md +42 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/docs/ValidityPeriodsApi.md +236 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/docs/VariableDefinitionsApi.md +304 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/docs/VariableStatus.md +17 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/exceptions.py +193 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/models/__init__.py +31 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/models/complete_response.py +260 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/models/contact.py +94 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/models/draft.py +228 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/models/get_vardok_vardef_mapping_by_id200_response.py +158 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/models/language_string_type.py +101 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/models/owner.py +87 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/models/patch.py +244 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/models/problem.py +118 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/models/update_draft.py +274 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/models/validity_period.py +225 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/models/vardok_id_response.py +81 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/models/vardok_vardef_id_pair_response.py +84 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/models/variable_status.py +33 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/py.typed +0 -0
- dapla_metadata/variable_definitions/_generated/vardef_client/rest.py +249 -0
- dapla_metadata/variable_definitions/_utils/__init__.py +1 -0
- dapla_metadata/variable_definitions/_utils/_client.py +32 -0
- dapla_metadata/variable_definitions/_utils/config.py +54 -0
- dapla_metadata/variable_definitions/_utils/constants.py +80 -0
- dapla_metadata/variable_definitions/_utils/files.py +309 -0
- dapla_metadata/variable_definitions/_utils/template_files.py +99 -0
- dapla_metadata/variable_definitions/_utils/variable_definition_files.py +143 -0
- dapla_metadata/variable_definitions/exceptions.py +255 -0
- dapla_metadata/variable_definitions/vardef.py +372 -0
- dapla_metadata/variable_definitions/vardok_id.py +48 -0
- dapla_metadata/variable_definitions/vardok_vardef_id_pair.py +47 -0
- dapla_metadata/variable_definitions/variable_definition.py +422 -0
- {dapla_toolbelt_metadata-0.2.1.dist-info → dapla_toolbelt_metadata-0.9.11.dist-info}/METADATA +34 -36
- dapla_toolbelt_metadata-0.9.11.dist-info/RECORD +97 -0
- {dapla_toolbelt_metadata-0.2.1.dist-info → dapla_toolbelt_metadata-0.9.11.dist-info}/WHEEL +1 -1
- dapla_metadata/datasets/config.py +0 -80
- dapla_metadata/datasets/model_backwards_compatibility.py +0 -520
- dapla_metadata/datasets/user_info.py +0 -88
- dapla_toolbelt_metadata-0.2.1.dist-info/RECORD +0 -22
- {dapla_toolbelt_metadata-0.2.1.dist-info → dapla_toolbelt_metadata-0.9.11.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,80 +0,0 @@
|
|
|
1
|
-
"""Configuration management for dataset package."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import logging
|
|
6
|
-
import os
|
|
7
|
-
from pathlib import Path
|
|
8
|
-
from pprint import pformat
|
|
9
|
-
|
|
10
|
-
from dotenv import dotenv_values
|
|
11
|
-
from dotenv import load_dotenv
|
|
12
|
-
|
|
13
|
-
from dapla_metadata.datasets.utility.constants import (
|
|
14
|
-
DATADOC_STATISTICAL_SUBJECT_SOURCE_URL,
|
|
15
|
-
)
|
|
16
|
-
from dapla_metadata.datasets.utility.enums import DaplaRegion
|
|
17
|
-
from dapla_metadata.datasets.utility.enums import DaplaService
|
|
18
|
-
|
|
19
|
-
logger = logging.getLogger(__name__)
|
|
20
|
-
|
|
21
|
-
DOT_ENV_FILE_PATH = Path(__file__).parent.joinpath(".env")
|
|
22
|
-
|
|
23
|
-
JUPYTERHUB_USER = "JUPYTERHUB_USER"
|
|
24
|
-
DAPLA_REGION = "DAPLA_REGION"
|
|
25
|
-
DAPLA_SERVICE = "DAPLA_SERVICE"
|
|
26
|
-
|
|
27
|
-
env_loaded = False
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def _load_dotenv_file() -> None:
|
|
31
|
-
global env_loaded # noqa: PLW0603
|
|
32
|
-
if not env_loaded and DOT_ENV_FILE_PATH.exists():
|
|
33
|
-
load_dotenv(DOT_ENV_FILE_PATH)
|
|
34
|
-
env_loaded = True
|
|
35
|
-
logger.info(
|
|
36
|
-
"Loaded .env file with config keys: \n%s",
|
|
37
|
-
pformat(list(dotenv_values(DOT_ENV_FILE_PATH).keys())),
|
|
38
|
-
)
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
def _get_config_item(item: str) -> str | None:
|
|
42
|
-
"""Get a config item. Makes sure all access is logged."""
|
|
43
|
-
_load_dotenv_file()
|
|
44
|
-
value = os.getenv(item)
|
|
45
|
-
logger.debug("Config accessed. %s", item)
|
|
46
|
-
return value
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def get_jupyterhub_user() -> str | None:
|
|
50
|
-
"""Get the JupyterHub user name."""
|
|
51
|
-
return _get_config_item(JUPYTERHUB_USER)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def get_statistical_subject_source_url() -> str | None:
|
|
55
|
-
"""Get the URL to the statistical subject source."""
|
|
56
|
-
return (
|
|
57
|
-
_get_config_item("DATADOC_STATISTICAL_SUBJECT_SOURCE_URL")
|
|
58
|
-
or DATADOC_STATISTICAL_SUBJECT_SOURCE_URL
|
|
59
|
-
)
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def get_dapla_region() -> DaplaRegion | None:
|
|
63
|
-
"""Get the Dapla region we're running on."""
|
|
64
|
-
if region := _get_config_item(DAPLA_REGION):
|
|
65
|
-
return DaplaRegion(region)
|
|
66
|
-
|
|
67
|
-
return None
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
def get_dapla_service() -> DaplaService | None:
|
|
71
|
-
"""Get the Dapla service we're running on."""
|
|
72
|
-
if service := _get_config_item(DAPLA_SERVICE):
|
|
73
|
-
return DaplaService(service)
|
|
74
|
-
|
|
75
|
-
return None
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
def get_oidc_token() -> str | None:
|
|
79
|
-
"""Get the JWT token from the environment."""
|
|
80
|
-
return _get_config_item("OIDC_TOKEN")
|
|
@@ -1,520 +0,0 @@
|
|
|
1
|
-
"""Upgrade old metadata files to be compatible with new versions.
|
|
2
|
-
|
|
3
|
-
An important principle of Datadoc is that we ALWAYS guarantee backwards
|
|
4
|
-
compatibility of existing metadata documents. This means that we guarantee
|
|
5
|
-
that a user will never lose data, even if their document is decades old.
|
|
6
|
-
|
|
7
|
-
For each document version we release with breaking changes, we implement a
|
|
8
|
-
handler and register the version by defining a BackwardsCompatibleVersion
|
|
9
|
-
instance. These documents will then be upgraded when they're opened in Datadoc.
|
|
10
|
-
|
|
11
|
-
A test must also be implemented for each new version.
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
from __future__ import annotations
|
|
15
|
-
|
|
16
|
-
from collections import OrderedDict
|
|
17
|
-
from dataclasses import dataclass
|
|
18
|
-
from datetime import datetime
|
|
19
|
-
from datetime import timezone
|
|
20
|
-
from typing import TYPE_CHECKING
|
|
21
|
-
from typing import Any
|
|
22
|
-
|
|
23
|
-
import arrow
|
|
24
|
-
|
|
25
|
-
if TYPE_CHECKING:
|
|
26
|
-
from collections.abc import Callable
|
|
27
|
-
|
|
28
|
-
VERSION_FIELD_NAME = "document_version"
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class UnknownModelVersionError(Exception):
|
|
32
|
-
"""Exception raised for unknown model versions.
|
|
33
|
-
|
|
34
|
-
This error is thrown when an unrecognized model version is encountered.
|
|
35
|
-
"""
|
|
36
|
-
|
|
37
|
-
def __init__(
|
|
38
|
-
self,
|
|
39
|
-
supplied_version: str,
|
|
40
|
-
*args: tuple[Any, ...],
|
|
41
|
-
) -> None:
|
|
42
|
-
"""Initialize the exception with the supplied version.
|
|
43
|
-
|
|
44
|
-
Args:
|
|
45
|
-
supplied_version: The version of the model that was not recognized.
|
|
46
|
-
*args: Additional arguments for the Exception base class.
|
|
47
|
-
"""
|
|
48
|
-
super().__init__(args)
|
|
49
|
-
self.supplied_version = supplied_version
|
|
50
|
-
|
|
51
|
-
def __str__(self) -> str:
|
|
52
|
-
"""Return string representation."""
|
|
53
|
-
return f"Document Version ({self.supplied_version}) of discovered file is not supported"
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
SUPPORTED_VERSIONS: OrderedDict[str, BackwardsCompatibleVersion] = OrderedDict()
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
@dataclass()
|
|
60
|
-
class BackwardsCompatibleVersion:
|
|
61
|
-
"""A version which we support with backwards compatibility.
|
|
62
|
-
|
|
63
|
-
This class registers a version and its corresponding handler function
|
|
64
|
-
for backwards compatibility.
|
|
65
|
-
"""
|
|
66
|
-
|
|
67
|
-
version: str
|
|
68
|
-
handler: Callable[[dict[str, Any]], dict[str, Any]]
|
|
69
|
-
|
|
70
|
-
def __post_init__(self) -> None:
|
|
71
|
-
"""Register this version in the supported versions map.
|
|
72
|
-
|
|
73
|
-
This method adds the instance to the `SUPPORTED_VERSIONS` dictionary
|
|
74
|
-
using the version as the key.
|
|
75
|
-
"""
|
|
76
|
-
SUPPORTED_VERSIONS[self.version] = self
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
def handle_current_version(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
|
|
80
|
-
"""Handle the current version of the metadata.
|
|
81
|
-
|
|
82
|
-
This function returns the supplied metadata unmodified.
|
|
83
|
-
|
|
84
|
-
Args:
|
|
85
|
-
supplied_metadata: The metadata for the current version.
|
|
86
|
-
|
|
87
|
-
Returns:
|
|
88
|
-
The unmodified supplied metadata.
|
|
89
|
-
"""
|
|
90
|
-
return supplied_metadata
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
def _find_and_update_language_strings(supplied_metadata: dict | None) -> dict | None:
|
|
94
|
-
"""Find and update language-specific strings in the supplied metadata.
|
|
95
|
-
|
|
96
|
-
This function iterates through the supplied metadata dictionary.
|
|
97
|
-
For each key-value pair, if the value is a dictionary containing "en"
|
|
98
|
-
it is passed to the `_convert_language_string_type` function to potentially
|
|
99
|
-
update its format.
|
|
100
|
-
|
|
101
|
-
Args:
|
|
102
|
-
supplied_metadata: A metadata dictionary where values may include nested
|
|
103
|
-
dictionaries with language-specific strings.
|
|
104
|
-
|
|
105
|
-
Returns:
|
|
106
|
-
The updated metadata dictionary. If the supplied metadata is not a
|
|
107
|
-
dictionary, it returns `None`.
|
|
108
|
-
"""
|
|
109
|
-
if isinstance(supplied_metadata, dict):
|
|
110
|
-
for key, value in supplied_metadata.items():
|
|
111
|
-
if isinstance(value, dict) and "en" in value:
|
|
112
|
-
supplied_metadata[key] = _convert_language_string_type(value)
|
|
113
|
-
return supplied_metadata
|
|
114
|
-
return None
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
def _convert_language_string_type(supplied_value: dict) -> list[dict[str, str]]:
|
|
118
|
-
"""Convert a dictionary of language-specific strings to a list of dictionaries.
|
|
119
|
-
|
|
120
|
-
This function takes a dictionary with language codes as keys and
|
|
121
|
-
corresponding language-specific strings as values, and converts it to a list
|
|
122
|
-
of dictionaries with 'languageCode' and 'languageText' keys.
|
|
123
|
-
|
|
124
|
-
Args:
|
|
125
|
-
supplied_value: A dictionary containing language codes as keys and
|
|
126
|
-
language strings as values.
|
|
127
|
-
|
|
128
|
-
Returns:
|
|
129
|
-
A list of dictionaries, each containing 'languageCode' and 'languageText'
|
|
130
|
-
keys, representing the converted language strings.
|
|
131
|
-
"""
|
|
132
|
-
return [
|
|
133
|
-
{
|
|
134
|
-
"languageCode": "en",
|
|
135
|
-
"languageText": supplied_value["en"],
|
|
136
|
-
},
|
|
137
|
-
{
|
|
138
|
-
"languageCode": "nn",
|
|
139
|
-
"languageText": supplied_value["nn"],
|
|
140
|
-
},
|
|
141
|
-
{
|
|
142
|
-
"languageCode": "nb",
|
|
143
|
-
"languageText": supplied_value["nb"],
|
|
144
|
-
},
|
|
145
|
-
]
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
def _remove_element_from_model(
|
|
149
|
-
supplied_metadata: dict[str, Any],
|
|
150
|
-
element_to_remove: str,
|
|
151
|
-
) -> None:
|
|
152
|
-
"""Remove an element from the supplied metadata dictionary.
|
|
153
|
-
|
|
154
|
-
This function deletes a specified element from the supplied metadata dictionary
|
|
155
|
-
if it exists.
|
|
156
|
-
|
|
157
|
-
Args:
|
|
158
|
-
supplied_metadata: The metadata dictionary from which the element will be
|
|
159
|
-
removed.
|
|
160
|
-
element_to_remove: The key of the element to be removed from the metadata
|
|
161
|
-
dictionary.
|
|
162
|
-
"""
|
|
163
|
-
if element_to_remove in supplied_metadata:
|
|
164
|
-
del supplied_metadata[element_to_remove]
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
def _cast_to_date_type(value_to_update: str | None) -> str | None:
|
|
168
|
-
"""Convert a string to a date string in ISO format.
|
|
169
|
-
|
|
170
|
-
This function takes a string representing a date and converts it to a
|
|
171
|
-
date string in ISO format. If the input is `None`, it returns `None` without
|
|
172
|
-
modification.
|
|
173
|
-
|
|
174
|
-
Args:
|
|
175
|
-
value_to_update: A string representing a date or `None`.
|
|
176
|
-
|
|
177
|
-
Returns:
|
|
178
|
-
The date string in ISO format if the input was a valid date string, or
|
|
179
|
-
`None` if the input was `None`.
|
|
180
|
-
"""
|
|
181
|
-
if value_to_update is None:
|
|
182
|
-
return value_to_update
|
|
183
|
-
|
|
184
|
-
return str(
|
|
185
|
-
arrow.get(
|
|
186
|
-
value_to_update,
|
|
187
|
-
).date(),
|
|
188
|
-
)
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
def handle_version_3_3_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
|
|
192
|
-
"""Handle breaking changes for version 3.3.0.
|
|
193
|
-
|
|
194
|
-
This function modifies the supplied metadata to accommodate breaking changes
|
|
195
|
-
introduced in version 4.0.0. Specifically, it removes the
|
|
196
|
-
'direct_person_identifying' field from each variable in 'datadoc.variables'
|
|
197
|
-
and updates the 'document_version' field to "4.0.0".
|
|
198
|
-
|
|
199
|
-
Args:
|
|
200
|
-
supplied_metadata: The metadata dictionary to be updated.
|
|
201
|
-
|
|
202
|
-
Returns:
|
|
203
|
-
The updated metadata dictionary.
|
|
204
|
-
"""
|
|
205
|
-
for i in range(len(supplied_metadata["datadoc"]["variables"])):
|
|
206
|
-
_remove_element_from_model(
|
|
207
|
-
supplied_metadata["datadoc"]["variables"][i],
|
|
208
|
-
"direct_person_identifying",
|
|
209
|
-
)
|
|
210
|
-
supplied_metadata["datadoc"]["document_version"] = "4.0.0"
|
|
211
|
-
return supplied_metadata
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
def handle_version_3_2_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
|
|
215
|
-
"""Handle breaking changes for version 3.2.0.
|
|
216
|
-
|
|
217
|
-
This function modifies the supplied metadata to accommodate breaking
|
|
218
|
-
changes introduced in version 3.3.0. Specifically, it updates the
|
|
219
|
-
'contains_data_from' and 'contains_data_until' fields in both the 'dataset'
|
|
220
|
-
and 'variables' sections of the supplied metadata dictionary to ensure they
|
|
221
|
-
are stored as date strings.
|
|
222
|
-
It also updates the 'document_version' field to "3.3.0".
|
|
223
|
-
|
|
224
|
-
Args:
|
|
225
|
-
supplied_metadata: The metadata dictionary to be updated.
|
|
226
|
-
|
|
227
|
-
Returns:
|
|
228
|
-
The updated metadata dictionary.
|
|
229
|
-
"""
|
|
230
|
-
fields = ["contains_data_from", "contains_data_until"]
|
|
231
|
-
for field in fields:
|
|
232
|
-
supplied_metadata["datadoc"]["dataset"][field] = _cast_to_date_type(
|
|
233
|
-
supplied_metadata["datadoc"]["dataset"].get(field, None),
|
|
234
|
-
)
|
|
235
|
-
for v in supplied_metadata["datadoc"]["variables"]:
|
|
236
|
-
v[field] = _cast_to_date_type(v.get(field, None))
|
|
237
|
-
|
|
238
|
-
supplied_metadata["datadoc"]["document_version"] = "3.3.0"
|
|
239
|
-
return supplied_metadata
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
def handle_version_3_1_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
|
|
243
|
-
"""Handle breaking changes for version 3.1.0.
|
|
244
|
-
|
|
245
|
-
This function modifies the supplied metadata to accommodate breaking
|
|
246
|
-
changes introduced in version 3.2.0. Specifically, it updates the
|
|
247
|
-
'data_source' field in both the 'dataset' and 'variables' sections of the
|
|
248
|
-
supplied metadata dictionary by converting value to string.
|
|
249
|
-
The 'document_version' field is also updated to "3.2.0".
|
|
250
|
-
|
|
251
|
-
Args:
|
|
252
|
-
supplied_metadata: The metadata dictionary to be updated.
|
|
253
|
-
|
|
254
|
-
Returns:
|
|
255
|
-
The updated metadata dictionary.
|
|
256
|
-
"""
|
|
257
|
-
data = supplied_metadata["datadoc"]["dataset"]["data_source"]
|
|
258
|
-
|
|
259
|
-
if data is not None:
|
|
260
|
-
supplied_metadata["datadoc"]["dataset"]["data_source"] = str(
|
|
261
|
-
data[0]["languageText"],
|
|
262
|
-
)
|
|
263
|
-
|
|
264
|
-
for i in range(len(supplied_metadata["datadoc"]["variables"])):
|
|
265
|
-
data = supplied_metadata["datadoc"]["variables"][i]["data_source"]
|
|
266
|
-
if data is not None:
|
|
267
|
-
supplied_metadata["datadoc"]["variables"][i]["data_source"] = str(
|
|
268
|
-
data[0]["languageText"],
|
|
269
|
-
)
|
|
270
|
-
|
|
271
|
-
supplied_metadata["datadoc"]["document_version"] = "3.2.0"
|
|
272
|
-
return supplied_metadata
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
def handle_version_2_2_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
|
|
276
|
-
"""Handle breaking changes for version 2.2.0.
|
|
277
|
-
|
|
278
|
-
This function modifies the supplied metadata to accommodate breaking changes
|
|
279
|
-
introduced in version 3.1.0. Specifically, it updates the 'subject_field' in
|
|
280
|
-
the 'dataset' section of the supplied metadata dictionary by converting it to
|
|
281
|
-
a string. It also removes the 'register_uri' field from the 'dataset'.
|
|
282
|
-
Additionally, it removes 'sentinel_value_uri' from each variable,
|
|
283
|
-
sets 'special_value' and 'custom_type' fields to None, and updates
|
|
284
|
-
language strings in the 'variables' and 'dataset' sections.
|
|
285
|
-
The 'document_version' is updated to "3.1.0".
|
|
286
|
-
|
|
287
|
-
Args:
|
|
288
|
-
supplied_metadata: The metadata dictionary to be updated.
|
|
289
|
-
|
|
290
|
-
Returns:
|
|
291
|
-
The updated metadata dictionary.
|
|
292
|
-
"""
|
|
293
|
-
if supplied_metadata["datadoc"]["dataset"]["subject_field"] is not None:
|
|
294
|
-
data = supplied_metadata["datadoc"]["dataset"]["subject_field"]
|
|
295
|
-
supplied_metadata["datadoc"]["dataset"]["subject_field"] = str(
|
|
296
|
-
data["nb"] or data["nn"] or data["en"],
|
|
297
|
-
)
|
|
298
|
-
|
|
299
|
-
_remove_element_from_model(supplied_metadata["datadoc"]["dataset"], "register_uri")
|
|
300
|
-
|
|
301
|
-
for i in range(len(supplied_metadata["datadoc"]["variables"])):
|
|
302
|
-
_remove_element_from_model(
|
|
303
|
-
supplied_metadata["datadoc"]["variables"][i],
|
|
304
|
-
"sentinel_value_uri",
|
|
305
|
-
)
|
|
306
|
-
supplied_metadata["datadoc"]["variables"][i]["special_value"] = None
|
|
307
|
-
supplied_metadata["datadoc"]["variables"][i]["custom_type"] = None
|
|
308
|
-
supplied_metadata["datadoc"]["variables"][
|
|
309
|
-
i
|
|
310
|
-
] = _find_and_update_language_strings(
|
|
311
|
-
supplied_metadata["datadoc"]["variables"][i],
|
|
312
|
-
)
|
|
313
|
-
supplied_metadata["datadoc"]["dataset"]["custom_type"] = None
|
|
314
|
-
supplied_metadata["datadoc"]["dataset"] = _find_and_update_language_strings(
|
|
315
|
-
supplied_metadata["datadoc"]["dataset"],
|
|
316
|
-
)
|
|
317
|
-
supplied_metadata["datadoc"]["document_version"] = "3.1.0"
|
|
318
|
-
return supplied_metadata
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
def add_container(existing_metadata: dict) -> dict:
|
|
322
|
-
"""Add container for previous versions.
|
|
323
|
-
|
|
324
|
-
Adds a container structure for previous versions of metadata.
|
|
325
|
-
This function wraps the existing metadata in a new container structure
|
|
326
|
-
that includes the 'document_version', 'datadoc', and 'pseudonymization'
|
|
327
|
-
fields. The 'document_version' is set to "0.0.1" and 'pseudonymization'
|
|
328
|
-
is set to None.
|
|
329
|
-
|
|
330
|
-
Args:
|
|
331
|
-
existing_metadata: The original metadata dictionary to be wrapped.
|
|
332
|
-
|
|
333
|
-
Returns:
|
|
334
|
-
A new dictionary containing the wrapped metadata with additional fields.
|
|
335
|
-
"""
|
|
336
|
-
return {
|
|
337
|
-
"document_version": "0.0.1",
|
|
338
|
-
"datadoc": existing_metadata,
|
|
339
|
-
"pseudonymization": None,
|
|
340
|
-
}
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
def handle_version_2_1_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
|
|
344
|
-
"""Handle breaking changes for version 2.1.0.
|
|
345
|
-
|
|
346
|
-
This function modifies the supplied metadata to accommodate breaking changes
|
|
347
|
-
introduced in version 2.2.0. Specifically, it updates the 'owner' field in
|
|
348
|
-
the 'dataset' section of the supplied metadata dictionary by converting it
|
|
349
|
-
from a LanguageStringType to a string.
|
|
350
|
-
The 'document_version' is updated to "2.2.0".
|
|
351
|
-
|
|
352
|
-
Args:
|
|
353
|
-
supplied_metadata: The metadata dictionary to be updated.
|
|
354
|
-
|
|
355
|
-
Returns:
|
|
356
|
-
The updated metadata dictionary.
|
|
357
|
-
"""
|
|
358
|
-
data = supplied_metadata["dataset"]["owner"]
|
|
359
|
-
supplied_metadata["dataset"]["owner"] = str(data["nb"] or data["nn"] or data["en"])
|
|
360
|
-
supplied_metadata["document_version"] = "2.2.0"
|
|
361
|
-
return add_container(supplied_metadata)
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
def handle_version_1_0_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
|
|
365
|
-
"""Handle breaking changes for version 1.0.0.
|
|
366
|
-
|
|
367
|
-
This function modifies the supplied metadata to accommodate breaking changes
|
|
368
|
-
introduced in version 2.1.0. Specifically, it updates the date fields
|
|
369
|
-
'metadata_created_date' and 'metadata_last_updated_date' to ISO 8601 format
|
|
370
|
-
with UTC timezone. It also converts the 'data_source' field from a string to a
|
|
371
|
-
dictionary with language keys if necessary and removes the 'data_source_path'
|
|
372
|
-
field.
|
|
373
|
-
The 'document_version' is updated to "2.1.0".
|
|
374
|
-
|
|
375
|
-
Args:
|
|
376
|
-
supplied_metadata: The metadata dictionary to be updated.
|
|
377
|
-
|
|
378
|
-
Returns:
|
|
379
|
-
The updated metadata dictionary.
|
|
380
|
-
|
|
381
|
-
"""
|
|
382
|
-
datetime_fields = [("metadata_created_date"), ("metadata_last_updated_date")]
|
|
383
|
-
for field in datetime_fields:
|
|
384
|
-
if supplied_metadata["dataset"][field]:
|
|
385
|
-
supplied_metadata["dataset"][field] = datetime.isoformat(
|
|
386
|
-
datetime.fromisoformat(supplied_metadata["dataset"][field]).astimezone(
|
|
387
|
-
tz=timezone.utc,
|
|
388
|
-
),
|
|
389
|
-
timespec="seconds",
|
|
390
|
-
)
|
|
391
|
-
if isinstance(supplied_metadata["dataset"]["data_source"], str):
|
|
392
|
-
supplied_metadata["dataset"]["data_source"] = {
|
|
393
|
-
"en": supplied_metadata["dataset"]["data_source"],
|
|
394
|
-
"nn": "",
|
|
395
|
-
"nb": "",
|
|
396
|
-
}
|
|
397
|
-
|
|
398
|
-
_remove_element_from_model(supplied_metadata["dataset"], "data_source_path")
|
|
399
|
-
|
|
400
|
-
supplied_metadata["document_version"] = "2.1.0"
|
|
401
|
-
return supplied_metadata
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
def handle_version_0_1_1(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
|
|
405
|
-
"""Handle breaking changes for version 0.1.1.
|
|
406
|
-
|
|
407
|
-
This function modifies the supplied metadata to accommodate breaking changes
|
|
408
|
-
introduced in version 1.0.0. Specifically, it renames certain keys within the
|
|
409
|
-
`dataset` and `variables` sections, and replaces empty string values with
|
|
410
|
-
`None` for `dataset` keys.
|
|
411
|
-
|
|
412
|
-
Args:
|
|
413
|
-
supplied_metadata: The metadata dictionary that needs to be updated.
|
|
414
|
-
|
|
415
|
-
Returns:
|
|
416
|
-
The updated metadata dictionary.
|
|
417
|
-
|
|
418
|
-
References:
|
|
419
|
-
PR ref: https://github.com/statisticsnorway/ssb-datadoc-model/pull/4
|
|
420
|
-
"""
|
|
421
|
-
key_renaming = [
|
|
422
|
-
("metadata_created_date", "created_date"),
|
|
423
|
-
("metadata_created_by", "created_by"),
|
|
424
|
-
("metadata_last_updated_date", "last_updated_date"),
|
|
425
|
-
("metadata_last_updated_by", "last_updated_by"),
|
|
426
|
-
]
|
|
427
|
-
for new_key, old_key in key_renaming:
|
|
428
|
-
supplied_metadata["dataset"][new_key] = supplied_metadata["dataset"].pop(
|
|
429
|
-
old_key,
|
|
430
|
-
)
|
|
431
|
-
# Replace empty strings with None, empty strings are not valid for LanguageStrings values
|
|
432
|
-
supplied_metadata["dataset"] = {
|
|
433
|
-
k: None if v == "" else v for k, v in supplied_metadata["dataset"].items()
|
|
434
|
-
}
|
|
435
|
-
|
|
436
|
-
key_renaming = [("data_type", "datatype")]
|
|
437
|
-
|
|
438
|
-
for i in range(len(supplied_metadata["variables"])):
|
|
439
|
-
for new_key, old_key in key_renaming:
|
|
440
|
-
supplied_metadata["variables"][i][new_key] = supplied_metadata["variables"][
|
|
441
|
-
i
|
|
442
|
-
].pop(
|
|
443
|
-
old_key,
|
|
444
|
-
)
|
|
445
|
-
|
|
446
|
-
return supplied_metadata
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
# Register all the supported versions and their handlers.
|
|
450
|
-
# MUST be ordered from oldest to newest.
|
|
451
|
-
BackwardsCompatibleVersion(version="0.1.1", handler=handle_version_0_1_1)
|
|
452
|
-
BackwardsCompatibleVersion(version="1.0.0", handler=handle_version_1_0_0)
|
|
453
|
-
BackwardsCompatibleVersion(
|
|
454
|
-
version="2.1.0",
|
|
455
|
-
handler=handle_version_2_1_0,
|
|
456
|
-
) # A container must be created at this version
|
|
457
|
-
BackwardsCompatibleVersion(version="2.2.0", handler=handle_version_2_2_0)
|
|
458
|
-
BackwardsCompatibleVersion(version="3.1.0", handler=handle_version_3_1_0)
|
|
459
|
-
BackwardsCompatibleVersion(version="3.2.0", handler=handle_version_3_2_0)
|
|
460
|
-
BackwardsCompatibleVersion(version="3.3.0", handler=handle_version_3_3_0)
|
|
461
|
-
BackwardsCompatibleVersion(version="4.0.0", handler=handle_current_version)
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
def upgrade_metadata(fresh_metadata: dict[str, Any]) -> dict[str, Any]:
|
|
465
|
-
"""Upgrade the metadata to the latest version using registered handlers.
|
|
466
|
-
|
|
467
|
-
This function checks the version of the provided metadata and applies a series
|
|
468
|
-
of upgrade handlers to migrate the metadata to the latest version.
|
|
469
|
-
It starts from the provided version and applies all subsequent handlers in
|
|
470
|
-
sequence. If the metadata is already in the latest version or the version
|
|
471
|
-
cannot be determined, appropriate actions are taken.
|
|
472
|
-
|
|
473
|
-
Args:
|
|
474
|
-
fresh_metadata: The metadata dictionary to be upgraded. This dictionary
|
|
475
|
-
must include version information that determines which handlers to apply.
|
|
476
|
-
|
|
477
|
-
Returns:
|
|
478
|
-
The upgraded metadata dictionary, after applying all necessary handlers.
|
|
479
|
-
|
|
480
|
-
Raises:
|
|
481
|
-
UnknownModelVersionError: If the metadata's version is unknown or unsupported.
|
|
482
|
-
"""
|
|
483
|
-
# Special case for current version, we expose the current_model_version parameter for test purposes
|
|
484
|
-
if is_metadata_in_container_structure(fresh_metadata):
|
|
485
|
-
if fresh_metadata["datadoc"] is None:
|
|
486
|
-
return fresh_metadata
|
|
487
|
-
supplied_version = fresh_metadata["datadoc"][VERSION_FIELD_NAME]
|
|
488
|
-
else:
|
|
489
|
-
supplied_version = fresh_metadata[VERSION_FIELD_NAME]
|
|
490
|
-
start_running_handlers = False
|
|
491
|
-
# Run all the handlers in order from the supplied version onwards
|
|
492
|
-
for k, v in SUPPORTED_VERSIONS.items():
|
|
493
|
-
if k == supplied_version:
|
|
494
|
-
start_running_handlers = True
|
|
495
|
-
if start_running_handlers:
|
|
496
|
-
fresh_metadata = v.handler(fresh_metadata)
|
|
497
|
-
if not start_running_handlers:
|
|
498
|
-
raise UnknownModelVersionError(supplied_version)
|
|
499
|
-
return fresh_metadata
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
def is_metadata_in_container_structure(
|
|
503
|
-
metadata: dict,
|
|
504
|
-
) -> bool:
|
|
505
|
-
"""Check if the metadata is in the container structure.
|
|
506
|
-
|
|
507
|
-
At a certain point a metadata 'container' was introduced.
|
|
508
|
-
The container provides a structure for different 'types' of metadata, such as
|
|
509
|
-
'datadoc', 'pseudonymization' etc.
|
|
510
|
-
This function determines if the given metadata dictionary follows this container
|
|
511
|
-
structure by checking for the presence of the 'datadoc' field.
|
|
512
|
-
|
|
513
|
-
Args:
|
|
514
|
-
metadata: The metadata dictionary to check.
|
|
515
|
-
|
|
516
|
-
Returns:
|
|
517
|
-
True if the metadata is in the container structure (i.e., contains the
|
|
518
|
-
'datadoc' field), False otherwise.
|
|
519
|
-
"""
|
|
520
|
-
return "datadoc" in metadata
|
|
@@ -1,88 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import contextlib
|
|
4
|
-
import logging
|
|
5
|
-
from typing import Protocol
|
|
6
|
-
|
|
7
|
-
import jwt
|
|
8
|
-
|
|
9
|
-
from dapla_metadata.datasets import config
|
|
10
|
-
from dapla_metadata.datasets.utility.enums import DaplaRegion
|
|
11
|
-
from dapla_metadata.datasets.utility.enums import DaplaService
|
|
12
|
-
|
|
13
|
-
logger = logging.getLogger(__name__)
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
PLACEHOLDER_EMAIL_ADDRESS = "default_user@ssb.no"
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class UserInfo(Protocol):
|
|
20
|
-
"""Information about the current user.
|
|
21
|
-
|
|
22
|
-
Implementations may be provided for different platforms or testing.
|
|
23
|
-
"""
|
|
24
|
-
|
|
25
|
-
@property
|
|
26
|
-
def short_email(self) -> str | None:
|
|
27
|
-
"""Get the short email address."""
|
|
28
|
-
...
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class UnknownUserInfo:
|
|
32
|
-
"""Fallback when no implementation is found."""
|
|
33
|
-
|
|
34
|
-
@property
|
|
35
|
-
def short_email(self) -> str | None:
|
|
36
|
-
"""Unknown email address."""
|
|
37
|
-
return None
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
class TestUserInfo:
|
|
41
|
-
"""Information about the current user for local development and testing."""
|
|
42
|
-
|
|
43
|
-
@property
|
|
44
|
-
def short_email(self) -> str | None:
|
|
45
|
-
"""Get the short email address."""
|
|
46
|
-
return PLACEHOLDER_EMAIL_ADDRESS
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
class DaplaLabUserInfo:
|
|
50
|
-
"""Information about the current user when running on Dapla Lab."""
|
|
51
|
-
|
|
52
|
-
@property
|
|
53
|
-
def short_email(self) -> str | None:
|
|
54
|
-
"""Get the short email address."""
|
|
55
|
-
encoded_jwt = config.get_oidc_token()
|
|
56
|
-
if encoded_jwt:
|
|
57
|
-
# The JWT has been verified by the platform prior to injection, no need to verify.
|
|
58
|
-
decoded_jwt = jwt.decode(encoded_jwt, options={"verify_signature": False})
|
|
59
|
-
with contextlib.suppress(KeyError):
|
|
60
|
-
# If email can't be found in the JWT, fall through and return None
|
|
61
|
-
return decoded_jwt["email"]
|
|
62
|
-
|
|
63
|
-
logger.warning(
|
|
64
|
-
"Could not access JWT from environment. Could not get short email address.",
|
|
65
|
-
)
|
|
66
|
-
return None
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
class JupyterHubUserInfo:
|
|
70
|
-
"""Information about the current user when running on JupyterHub."""
|
|
71
|
-
|
|
72
|
-
@property
|
|
73
|
-
def short_email(self) -> str | None:
|
|
74
|
-
"""Get the short email address."""
|
|
75
|
-
return config.get_jupyterhub_user()
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
def get_user_info_for_current_platform() -> UserInfo:
|
|
79
|
-
"""Return the correct implementation of UserInfo for the current platform."""
|
|
80
|
-
if config.get_dapla_region() == DaplaRegion.DAPLA_LAB:
|
|
81
|
-
return DaplaLabUserInfo()
|
|
82
|
-
elif config.get_dapla_service() == DaplaService.JUPYTERLAB: # noqa: RET505
|
|
83
|
-
return JupyterHubUserInfo()
|
|
84
|
-
else:
|
|
85
|
-
logger.warning(
|
|
86
|
-
"Was not possible to retrieve user information! Some fields may not be set.",
|
|
87
|
-
)
|
|
88
|
-
return UnknownUserInfo()
|