dapla-toolbelt-metadata 0.9.5__py3-none-any.whl → 0.9.6.dev1759398171__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dapla-toolbelt-metadata might be problematic. Click here for more details.
- dapla_metadata/datasets/core.py +13 -15
- dapla_metadata/datasets/utility/utils.py +1 -1
- {dapla_toolbelt_metadata-0.9.5.dist-info → dapla_toolbelt_metadata-0.9.6.dev1759398171.dist-info}/METADATA +1 -1
- {dapla_toolbelt_metadata-0.9.5.dist-info → dapla_toolbelt_metadata-0.9.6.dev1759398171.dist-info}/RECORD +6 -7
- dapla_metadata/datasets/utility/urn.py +0 -129
- {dapla_toolbelt_metadata-0.9.5.dist-info → dapla_toolbelt_metadata-0.9.6.dev1759398171.dist-info}/WHEEL +0 -0
- {dapla_toolbelt_metadata-0.9.5.dist-info → dapla_toolbelt_metadata-0.9.6.dev1759398171.dist-info}/licenses/LICENSE +0 -0
dapla_metadata/datasets/core.py
CHANGED
|
@@ -33,10 +33,7 @@ from dapla_metadata.datasets.utility.constants import (
|
|
|
33
33
|
from dapla_metadata.datasets.utility.constants import METADATA_DOCUMENT_FILE_SUFFIX
|
|
34
34
|
from dapla_metadata.datasets.utility.constants import NUM_OBLIGATORY_DATASET_FIELDS
|
|
35
35
|
from dapla_metadata.datasets.utility.constants import NUM_OBLIGATORY_VARIABLES_FIELDS
|
|
36
|
-
from dapla_metadata.datasets.utility.urn import convert_definition_uris_to_urns
|
|
37
36
|
from dapla_metadata.datasets.utility.utils import OptionalDatadocMetadataType
|
|
38
|
-
from dapla_metadata.datasets.utility.utils import VariableListType
|
|
39
|
-
from dapla_metadata.datasets.utility.utils import VariableType
|
|
40
37
|
from dapla_metadata.datasets.utility.utils import calculate_percentage
|
|
41
38
|
from dapla_metadata.datasets.utility.utils import derive_assessment_from_state
|
|
42
39
|
from dapla_metadata.datasets.utility.utils import get_timestamp_now
|
|
@@ -111,8 +108,8 @@ class Datadoc:
|
|
|
111
108
|
self.container: all_optional_model.MetadataContainer | None = None
|
|
112
109
|
self.dataset_path: pathlib.Path | CloudPath | None = None
|
|
113
110
|
self.dataset = all_optional_model.Dataset()
|
|
114
|
-
self.variables:
|
|
115
|
-
self.variables_lookup: dict[str,
|
|
111
|
+
self.variables: list = []
|
|
112
|
+
self.variables_lookup: dict[str, all_optional_model.Variable] = {}
|
|
116
113
|
self.explicitly_defined_metadata_document = False
|
|
117
114
|
self.dataset_consistency_status: list[DatasetConsistencyStatus] = []
|
|
118
115
|
if metadata_document_path:
|
|
@@ -207,21 +204,22 @@ class Datadoc:
|
|
|
207
204
|
else:
|
|
208
205
|
self._set_metadata(existing_metadata or extracted_metadata)
|
|
209
206
|
|
|
207
|
+
set_default_values_variables(self.variables)
|
|
208
|
+
set_default_values_dataset(cast("all_optional_model.Dataset", self.dataset))
|
|
209
|
+
set_dataset_owner(self.dataset)
|
|
210
|
+
self._create_variables_lookup()
|
|
211
|
+
|
|
210
212
|
def _set_metadata(
|
|
211
213
|
self,
|
|
212
|
-
|
|
214
|
+
merged_metadata: OptionalDatadocMetadataType,
|
|
213
215
|
) -> None:
|
|
214
|
-
if not
|
|
216
|
+
if not merged_metadata or not (
|
|
217
|
+
merged_metadata.dataset and merged_metadata.variables
|
|
218
|
+
):
|
|
215
219
|
msg = "Could not read metadata"
|
|
216
220
|
raise ValueError(msg)
|
|
217
|
-
self.dataset = cast("all_optional_model.Dataset",
|
|
218
|
-
self.variables =
|
|
219
|
-
|
|
220
|
-
set_default_values_variables(self.variables)
|
|
221
|
-
set_default_values_dataset(cast("all_optional_model.Dataset", self.dataset))
|
|
222
|
-
set_dataset_owner(self.dataset)
|
|
223
|
-
convert_definition_uris_to_urns(self.variables)
|
|
224
|
-
self._create_variables_lookup()
|
|
221
|
+
self.dataset = cast("all_optional_model.Dataset", merged_metadata.dataset)
|
|
222
|
+
self.variables = merged_metadata.variables
|
|
225
223
|
|
|
226
224
|
def _create_variables_lookup(self) -> None:
|
|
227
225
|
self.variables_lookup = {
|
|
@@ -121,7 +121,7 @@ def derive_assessment_from_state(state: DataSetState) -> Assessment:
|
|
|
121
121
|
return Assessment.SENSITIVE
|
|
122
122
|
|
|
123
123
|
|
|
124
|
-
def set_default_values_variables(variables:
|
|
124
|
+
def set_default_values_variables(variables: list) -> None:
|
|
125
125
|
"""Set default values on variables.
|
|
126
126
|
|
|
127
127
|
Args:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dapla-toolbelt-metadata
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.6.dev1759398171
|
|
4
4
|
Summary: Dapla Toolbelt Metadata
|
|
5
5
|
Project-URL: homepage, https://github.com/statisticsnorway/dapla-toolbelt-metadata
|
|
6
6
|
Project-URL: repository, https://github.com/statisticsnorway/dapla-toolbelt-metadata
|
|
@@ -8,7 +8,7 @@ dapla_metadata/dapla/user_info.py,sha256=bENez-ICt9ySR8orYebO68Q3_2LkIW9QTL58DTc
|
|
|
8
8
|
dapla_metadata/datasets/__init__.py,sha256=an-REJgi7N8-S1SCz-MYO_8as6fMe03WvhjRP_hWWkg,293
|
|
9
9
|
dapla_metadata/datasets/_merge.py,sha256=Tk5wQz6xZGr8veUAHZb42O8HARU8ObBJ_E4afvVWdlo,12993
|
|
10
10
|
dapla_metadata/datasets/code_list.py,sha256=JtCE-5Q8grAKvkn0KKjzeGhO-96O7yGsastbuoakreg,9057
|
|
11
|
-
dapla_metadata/datasets/core.py,sha256=
|
|
11
|
+
dapla_metadata/datasets/core.py,sha256=p-2OJsAEWCUqBlzn0YIYkK-pAgtvMROdoxXvCyjfWYs,20434
|
|
12
12
|
dapla_metadata/datasets/dapla_dataset_path_info.py,sha256=WPeV_mwKk2B9sXd14SaP-kTb1bOQ_8W2KtrqOG7sJIY,26867
|
|
13
13
|
dapla_metadata/datasets/dataset_parser.py,sha256=3dtRXNy1C8SfG8zTYWdY26nV4l-dG25IC_0J5t2bYwI,8285
|
|
14
14
|
dapla_metadata/datasets/model_validation.py,sha256=6qqq1ueTWRWBPTwEGJD49Pv7ksMEaq0iDtuOXelaw-s,7223
|
|
@@ -23,8 +23,7 @@ dapla_metadata/datasets/external_sources/external_sources.py,sha256=9eIcOIUbaodN
|
|
|
23
23
|
dapla_metadata/datasets/utility/__init__.py,sha256=pp6tUcgUbo8iq9OPtFKQrTbLuI3uY7NHptwWSTpasOU,33
|
|
24
24
|
dapla_metadata/datasets/utility/constants.py,sha256=YKsn6GfNIkwLoBp0yq209o0TbsEhsA_jGaZLVR984JU,2933
|
|
25
25
|
dapla_metadata/datasets/utility/enums.py,sha256=i6dcxWya5k4LjLdGGIM_H37rRndizug3peaAgoE5UdM,652
|
|
26
|
-
dapla_metadata/datasets/utility/
|
|
27
|
-
dapla_metadata/datasets/utility/utils.py,sha256=q76UJI8W4j2aHSq1jz_AfYnJmLfygEflgUrQpqQEPnY,20157
|
|
26
|
+
dapla_metadata/datasets/utility/utils.py,sha256=Enlmhj1BA7C9Im1ju3EwS6_kV1cpT53wb2cCtBGs_lI,20145
|
|
28
27
|
dapla_metadata/standards/__init__.py,sha256=n8jnMrudLuScSdfQ4UMJorc-Ptg3Y1-ilT8zAaQnM70,179
|
|
29
28
|
dapla_metadata/standards/name_validator.py,sha256=6-DQE_EKVd6UjL--EXpFcZDQtusVbSFaWaUY-CfOV2c,9184
|
|
30
29
|
dapla_metadata/standards/standard_validators.py,sha256=tcCiCI76wUVtMzXA2oCgdauZc0uGgUi11FKu-t7KGwQ,3767
|
|
@@ -91,7 +90,7 @@ dapla_metadata/variable_definitions/_utils/constants.py,sha256=zr5FNVCEz6TM9PVEr
|
|
|
91
90
|
dapla_metadata/variable_definitions/_utils/files.py,sha256=JbPgPNQ7iA38juMqGEdcg5OjZZUwCb6NQtPL0AEspD0,10933
|
|
92
91
|
dapla_metadata/variable_definitions/_utils/template_files.py,sha256=7fcc7yEHOl5JUZ698kqj4IiikXPHBi3SrAVOk4wqQtw,3308
|
|
93
92
|
dapla_metadata/variable_definitions/_utils/variable_definition_files.py,sha256=sGhcSpckR9NtYGNh2oVkiCd5SI3bbJEBhc1PA2uShs0,4701
|
|
94
|
-
dapla_toolbelt_metadata-0.9.
|
|
95
|
-
dapla_toolbelt_metadata-0.9.
|
|
96
|
-
dapla_toolbelt_metadata-0.9.
|
|
97
|
-
dapla_toolbelt_metadata-0.9.
|
|
93
|
+
dapla_toolbelt_metadata-0.9.6.dev1759398171.dist-info/METADATA,sha256=kFTC8HmlIehHQghGFGBO0mXum-RNqrAm4jw3Tq9tIUE,4737
|
|
94
|
+
dapla_toolbelt_metadata-0.9.6.dev1759398171.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
95
|
+
dapla_toolbelt_metadata-0.9.6.dev1759398171.dist-info/licenses/LICENSE,sha256=np3IfD5m0ZUofn_kVzDZqliozuiO6wrktw3LRPjyEiI,1073
|
|
96
|
+
dapla_toolbelt_metadata-0.9.6.dev1759398171.dist-info/RECORD,,
|
|
@@ -1,129 +0,0 @@
|
|
|
1
|
-
"""Validate, parse and render URNs."""
|
|
2
|
-
|
|
3
|
-
import logging
|
|
4
|
-
import re
|
|
5
|
-
from dataclasses import dataclass
|
|
6
|
-
from enum import Enum
|
|
7
|
-
from enum import auto
|
|
8
|
-
|
|
9
|
-
from pydantic import AnyUrl
|
|
10
|
-
|
|
11
|
-
from dapla_metadata.datasets.utility.utils import VariableListType
|
|
12
|
-
|
|
13
|
-
logger = logging.getLogger(__name__)
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
VARDEF_URL_TEMPLATE = "https://{subdomain}.{domain}/variable-definitions"
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class SsbNaisDomains(str, Enum):
|
|
20
|
-
"""The available domains on SSBs Nais instance."""
|
|
21
|
-
|
|
22
|
-
TEST_EXTERNAL = "test.ssb.no"
|
|
23
|
-
TEST_INTERNAL = "intern.test.ssb.no"
|
|
24
|
-
PROD_EXTERNAL = "ssb.no"
|
|
25
|
-
PROD_INTERNAL = "intern.ssb.no"
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class ReferenceUrlTypes(Enum):
|
|
29
|
-
"""The general category of the URL.
|
|
30
|
-
|
|
31
|
-
This can be useful to refer to when constructing a URL from a URN for a
|
|
32
|
-
specific context.
|
|
33
|
-
"""
|
|
34
|
-
|
|
35
|
-
API = auto()
|
|
36
|
-
FRONTEND = auto()
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
@dataclass
|
|
40
|
-
class UrnConverter:
|
|
41
|
-
"""Converts URLs to URNs and vice versa.
|
|
42
|
-
|
|
43
|
-
Fields:
|
|
44
|
-
urn_base: The format for the URN, up to the identifier.
|
|
45
|
-
id_pattern: A capturing group pattern which matches identifiers for this resource.
|
|
46
|
-
url_bases: The list of all the different URL representations for a resource. There
|
|
47
|
-
will typically be a number of URL representations for a particular resource,
|
|
48
|
-
depending on which system or technology they are accessed through and other
|
|
49
|
-
technical factors. This list defines which concrete URLs can be considered
|
|
50
|
-
equivalent to a URN.
|
|
51
|
-
"""
|
|
52
|
-
|
|
53
|
-
urn_base: str
|
|
54
|
-
id_pattern: str
|
|
55
|
-
url_bases: list[tuple[ReferenceUrlTypes, str]]
|
|
56
|
-
|
|
57
|
-
def _extract_id(self, url: str, pattern: re.Pattern[str]) -> str | None:
|
|
58
|
-
if match := pattern.match(url):
|
|
59
|
-
return match.group(1)
|
|
60
|
-
return None
|
|
61
|
-
|
|
62
|
-
def _build_pattern(self, url_base: str) -> re.Pattern[str]:
|
|
63
|
-
return re.compile(f"^{url_base}/{self.id_pattern}")
|
|
64
|
-
|
|
65
|
-
def build_urn(self, identifier: str) -> str:
|
|
66
|
-
"""Build a URN for the given identifier."""
|
|
67
|
-
return f"{self.urn_base}:{identifier}"
|
|
68
|
-
|
|
69
|
-
def convert_to_urn(self, url: str | AnyUrl | None) -> str | None:
|
|
70
|
-
"""Convert a URL to a generalized URN for that same resource.
|
|
71
|
-
|
|
72
|
-
Args:
|
|
73
|
-
url (str | None): The URL to convert.
|
|
74
|
-
|
|
75
|
-
Returns:
|
|
76
|
-
str | None: The URN or None if it can't be converted.
|
|
77
|
-
"""
|
|
78
|
-
if not url:
|
|
79
|
-
return None
|
|
80
|
-
|
|
81
|
-
patterns = (self._build_pattern(url[-1]) for url in self.url_bases)
|
|
82
|
-
matches = (self._extract_id(str(url), p) for p in patterns)
|
|
83
|
-
identifier = next((m for m in matches if m), None)
|
|
84
|
-
if identifier:
|
|
85
|
-
return self.build_urn(identifier)
|
|
86
|
-
|
|
87
|
-
return None
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
vardef_urn_converter = UrnConverter(
|
|
91
|
-
urn_base="urn:ssb:variable-definition:vardef",
|
|
92
|
-
id_pattern=r"([a-z0-9]{8})",
|
|
93
|
-
url_bases=[
|
|
94
|
-
*[
|
|
95
|
-
(
|
|
96
|
-
ReferenceUrlTypes.API,
|
|
97
|
-
VARDEF_URL_TEMPLATE.format(
|
|
98
|
-
subdomain="metadata", domain=nais_domain.value
|
|
99
|
-
),
|
|
100
|
-
)
|
|
101
|
-
for nais_domain in SsbNaisDomains
|
|
102
|
-
],
|
|
103
|
-
*[
|
|
104
|
-
(
|
|
105
|
-
ReferenceUrlTypes.FRONTEND,
|
|
106
|
-
VARDEF_URL_TEMPLATE.format(
|
|
107
|
-
subdomain="catalog", domain=nais_domain.value
|
|
108
|
-
),
|
|
109
|
-
)
|
|
110
|
-
for nais_domain in SsbNaisDomains
|
|
111
|
-
],
|
|
112
|
-
],
|
|
113
|
-
)
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
def convert_definition_uris_to_urns(variables: VariableListType) -> None:
|
|
117
|
-
"""Where definition URIs are recognized URLs, convert them to URNs.
|
|
118
|
-
|
|
119
|
-
Where the value is not a known URL we preserve the value as it is and log an
|
|
120
|
-
ERROR level message.
|
|
121
|
-
|
|
122
|
-
Args:
|
|
123
|
-
variables (VariableListType): The list of variables.
|
|
124
|
-
"""
|
|
125
|
-
for v in variables:
|
|
126
|
-
if urn := vardef_urn_converter.convert_to_urn(v.definition_uri):
|
|
127
|
-
v.definition_uri = urn # type: ignore [assignment]
|
|
128
|
-
else:
|
|
129
|
-
logger.error("Could not convert value to URN: %s", v.definition_uri)
|
|
File without changes
|
|
File without changes
|