dapla-toolbelt-metadata 0.6.5__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dapla-toolbelt-metadata might be problematic. Click here for more details.
- dapla_metadata/__init__.py +1 -1
- dapla_metadata/datasets/core.py +95 -49
- dapla_metadata/datasets/dapla_dataset_path_info.py +1 -1
- dapla_metadata/datasets/dataset_parser.py +4 -4
- dapla_metadata/datasets/statistic_subject_mapping.py +5 -1
- dapla_metadata/datasets/utility/constants.py +2 -2
- dapla_metadata/datasets/utility/utils.py +44 -17
- dapla_metadata/variable_definitions/_utils/config.py +0 -18
- dapla_metadata/variable_definitions/_utils/constants.py +0 -2
- dapla_metadata/variable_definitions/_utils/files.py +8 -68
- {dapla_toolbelt_metadata-0.6.5.dist-info → dapla_toolbelt_metadata-0.7.0.dist-info}/METADATA +2 -2
- {dapla_toolbelt_metadata-0.6.5.dist-info → dapla_toolbelt_metadata-0.7.0.dist-info}/RECORD +14 -16
- dapla_metadata/variable_definitions/_utils/descriptions.py +0 -89
- dapla_metadata/variable_definitions/resources/vardef_model_descriptions_nb.yaml +0 -109
- {dapla_toolbelt_metadata-0.6.5.dist-info → dapla_toolbelt_metadata-0.7.0.dist-info}/LICENSE +0 -0
- {dapla_toolbelt_metadata-0.6.5.dist-info → dapla_toolbelt_metadata-0.7.0.dist-info}/WHEEL +0 -0
dapla_metadata/__init__.py
CHANGED
|
@@ -7,7 +7,7 @@ warnings.filterwarnings(
|
|
|
7
7
|
message="As the c extension couldn't be imported, `google-crc32c` is using a pure python implementation that is significantly slower.",
|
|
8
8
|
)
|
|
9
9
|
|
|
10
|
-
import datadoc_model.model as datadoc_model
|
|
10
|
+
import datadoc_model.all_optional.model as datadoc_model
|
|
11
11
|
|
|
12
12
|
from . import dapla
|
|
13
13
|
from . import datasets
|
dapla_metadata/datasets/core.py
CHANGED
|
@@ -9,9 +9,11 @@ import warnings
|
|
|
9
9
|
from concurrent.futures import ThreadPoolExecutor
|
|
10
10
|
from pathlib import Path
|
|
11
11
|
from typing import TYPE_CHECKING
|
|
12
|
+
from typing import cast
|
|
12
13
|
|
|
13
|
-
|
|
14
|
-
|
|
14
|
+
import datadoc_model.all_optional.model as all_optional_model
|
|
15
|
+
import datadoc_model.required.model as required_model
|
|
16
|
+
from datadoc_model.all_optional.model import DataSetStatus
|
|
15
17
|
|
|
16
18
|
from dapla_metadata._shared import config
|
|
17
19
|
from dapla_metadata.dapla import user_info
|
|
@@ -30,6 +32,8 @@ from dapla_metadata.datasets.utility.constants import INCONSISTENCIES_MESSAGE
|
|
|
30
32
|
from dapla_metadata.datasets.utility.constants import METADATA_DOCUMENT_FILE_SUFFIX
|
|
31
33
|
from dapla_metadata.datasets.utility.constants import NUM_OBLIGATORY_DATASET_FIELDS
|
|
32
34
|
from dapla_metadata.datasets.utility.constants import NUM_OBLIGATORY_VARIABLES_FIELDS
|
|
35
|
+
from dapla_metadata.datasets.utility.utils import ExistingPseudonymizationMetadataType
|
|
36
|
+
from dapla_metadata.datasets.utility.utils import OptionalDatadocMetadataType
|
|
33
37
|
from dapla_metadata.datasets.utility.utils import calculate_percentage
|
|
34
38
|
from dapla_metadata.datasets.utility.utils import derive_assessment_from_state
|
|
35
39
|
from dapla_metadata.datasets.utility.utils import get_timestamp_now
|
|
@@ -84,8 +88,8 @@ class Datadoc:
|
|
|
84
88
|
dataset_path: str | None = None,
|
|
85
89
|
metadata_document_path: str | None = None,
|
|
86
90
|
statistic_subject_mapping: StatisticSubjectMapping | None = None,
|
|
87
|
-
*,
|
|
88
91
|
errors_as_warnings: bool = False,
|
|
92
|
+
validate_required_fields_on_existing_metadata: bool = False,
|
|
89
93
|
) -> None:
|
|
90
94
|
"""Initialize the Datadoc instance.
|
|
91
95
|
|
|
@@ -101,17 +105,23 @@ class Datadoc:
|
|
|
101
105
|
Defaults to None
|
|
102
106
|
errors_as_warnings: Disable raising exceptions if inconsistencies
|
|
103
107
|
are found between existing and extracted metadata.
|
|
108
|
+
validate_required_fields_on_existing_metadata: Use a Pydantic model
|
|
109
|
+
which validates whether required fields are present when reading
|
|
110
|
+
in an existing metadata file.
|
|
104
111
|
"""
|
|
105
112
|
self._statistic_subject_mapping = statistic_subject_mapping
|
|
106
113
|
self.errors_as_warnings = errors_as_warnings
|
|
114
|
+
self.validate_required_fields_on_existing_metadata = (
|
|
115
|
+
validate_required_fields_on_existing_metadata
|
|
116
|
+
)
|
|
107
117
|
self.metadata_document: pathlib.Path | CloudPath | None = None
|
|
108
|
-
self.container:
|
|
118
|
+
self.container: all_optional_model.MetadataContainer | None = None
|
|
109
119
|
self.dataset_path: pathlib.Path | CloudPath | None = None
|
|
110
|
-
self.dataset =
|
|
120
|
+
self.dataset = all_optional_model.Dataset()
|
|
111
121
|
self.variables: list = []
|
|
112
|
-
self.pseudo_variables: list[
|
|
113
|
-
self.variables_lookup: dict[str,
|
|
114
|
-
self.pseudo_variables_lookup: dict[str,
|
|
122
|
+
self.pseudo_variables: list[all_optional_model.PseudoVariable] = []
|
|
123
|
+
self.variables_lookup: dict[str, all_optional_model.Variable] = {}
|
|
124
|
+
self.pseudo_variables_lookup: dict[str, all_optional_model.PseudoVariable] = {}
|
|
115
125
|
self.explicitly_defined_metadata_document = False
|
|
116
126
|
self.dataset_consistency_status: list = []
|
|
117
127
|
if metadata_document_path:
|
|
@@ -149,9 +159,9 @@ class Datadoc:
|
|
|
149
159
|
- The 'contains_personal_data' attribute is set to False if not specified.
|
|
150
160
|
- A lookup dictionary for variables is created based on their short names.
|
|
151
161
|
"""
|
|
152
|
-
extracted_metadata:
|
|
153
|
-
existing_metadata:
|
|
154
|
-
existing_pseudonymization:
|
|
162
|
+
extracted_metadata: all_optional_model.DatadocMetadata | None = None
|
|
163
|
+
existing_metadata: OptionalDatadocMetadataType = None
|
|
164
|
+
existing_pseudonymization: ExistingPseudonymizationMetadataType = None
|
|
155
165
|
|
|
156
166
|
if self.metadata_document and self.metadata_document.exists():
|
|
157
167
|
existing_metadata = self._extract_metadata_from_existing_document(
|
|
@@ -166,11 +176,26 @@ class Datadoc:
|
|
|
166
176
|
|
|
167
177
|
if (
|
|
168
178
|
self.dataset_path is not None
|
|
169
|
-
and self.dataset ==
|
|
179
|
+
and self.dataset == all_optional_model.Dataset()
|
|
170
180
|
and len(self.variables) == 0
|
|
171
181
|
):
|
|
172
182
|
extracted_metadata = self._extract_metadata_from_dataset(self.dataset_path)
|
|
173
183
|
|
|
184
|
+
if extracted_metadata is not None:
|
|
185
|
+
existing_file_path = self._get_existing_file_path(extracted_metadata)
|
|
186
|
+
if (
|
|
187
|
+
self.dataset_path
|
|
188
|
+
and existing_file_path is not None
|
|
189
|
+
and extracted_metadata is not None
|
|
190
|
+
and existing_metadata is not None
|
|
191
|
+
):
|
|
192
|
+
self.dataset_consistency_status = self._check_dataset_consistency(
|
|
193
|
+
self.dataset_path,
|
|
194
|
+
Path(existing_file_path),
|
|
195
|
+
extracted_metadata,
|
|
196
|
+
existing_metadata,
|
|
197
|
+
)
|
|
198
|
+
|
|
174
199
|
if (
|
|
175
200
|
self.dataset_path
|
|
176
201
|
and self.explicitly_defined_metadata_document
|
|
@@ -179,13 +204,6 @@ class Datadoc:
|
|
|
179
204
|
and extracted_metadata is not None
|
|
180
205
|
and existing_metadata is not None
|
|
181
206
|
):
|
|
182
|
-
existing_file_path = self._get_existing_file_path(extracted_metadata)
|
|
183
|
-
self.dataset_consistency_status = self._check_dataset_consistency(
|
|
184
|
-
self.dataset_path,
|
|
185
|
-
Path(existing_file_path),
|
|
186
|
-
extracted_metadata,
|
|
187
|
-
existing_metadata,
|
|
188
|
-
)
|
|
189
207
|
self._check_ready_to_merge(
|
|
190
208
|
self.dataset_consistency_status,
|
|
191
209
|
errors_as_warnings=self.errors_as_warnings,
|
|
@@ -207,14 +225,14 @@ class Datadoc:
|
|
|
207
225
|
self._set_pseudonymization_metadata(existing_pseudonymization)
|
|
208
226
|
|
|
209
227
|
set_default_values_variables(self.variables)
|
|
210
|
-
set_default_values_dataset(self.dataset)
|
|
228
|
+
set_default_values_dataset(cast("all_optional_model.Dataset", self.dataset))
|
|
211
229
|
set_dataset_owner(self.dataset)
|
|
212
230
|
self._create_variables_lookup()
|
|
213
231
|
self._create_pseudo_variables_lookup()
|
|
214
232
|
|
|
215
233
|
def _get_existing_file_path(
|
|
216
234
|
self,
|
|
217
|
-
extracted_metadata:
|
|
235
|
+
extracted_metadata: all_optional_model.DatadocMetadata | None,
|
|
218
236
|
) -> str:
|
|
219
237
|
if (
|
|
220
238
|
extracted_metadata is not None
|
|
@@ -227,19 +245,19 @@ class Datadoc:
|
|
|
227
245
|
|
|
228
246
|
def _set_metadata(
|
|
229
247
|
self,
|
|
230
|
-
merged_metadata:
|
|
248
|
+
merged_metadata: OptionalDatadocMetadataType,
|
|
231
249
|
) -> None:
|
|
232
250
|
if not merged_metadata or not (
|
|
233
251
|
merged_metadata.dataset and merged_metadata.variables
|
|
234
252
|
):
|
|
235
253
|
msg = "Could not read metadata"
|
|
236
254
|
raise ValueError(msg)
|
|
237
|
-
self.dataset = merged_metadata.dataset
|
|
255
|
+
self.dataset = cast("all_optional_model.Dataset", merged_metadata.dataset)
|
|
238
256
|
self.variables = merged_metadata.variables
|
|
239
257
|
|
|
240
258
|
def _set_pseudonymization_metadata(
|
|
241
259
|
self,
|
|
242
|
-
existing_pseudonymization:
|
|
260
|
+
existing_pseudonymization: ExistingPseudonymizationMetadataType,
|
|
243
261
|
) -> None:
|
|
244
262
|
if not existing_pseudonymization or not (
|
|
245
263
|
existing_pseudonymization.pseudo_variables is not None
|
|
@@ -247,7 +265,10 @@ class Datadoc:
|
|
|
247
265
|
msg = "Error reading pseudonymization metadata"
|
|
248
266
|
logger.error(msg)
|
|
249
267
|
return
|
|
250
|
-
self.pseudo_variables =
|
|
268
|
+
self.pseudo_variables = cast(
|
|
269
|
+
"list[all_optional_model.PseudoVariable]",
|
|
270
|
+
existing_pseudonymization.pseudo_variables,
|
|
271
|
+
)
|
|
251
272
|
|
|
252
273
|
def _create_variables_lookup(self) -> None:
|
|
253
274
|
self.variables_lookup = {
|
|
@@ -264,8 +285,8 @@ class Datadoc:
|
|
|
264
285
|
def _check_dataset_consistency(
|
|
265
286
|
new_dataset_path: Path | CloudPath,
|
|
266
287
|
existing_dataset_path: Path,
|
|
267
|
-
extracted_metadata:
|
|
268
|
-
existing_metadata:
|
|
288
|
+
extracted_metadata: all_optional_model.DatadocMetadata,
|
|
289
|
+
existing_metadata: OptionalDatadocMetadataType,
|
|
269
290
|
) -> list[dict[str, object]]:
|
|
270
291
|
"""Run consistency tests.
|
|
271
292
|
|
|
@@ -312,14 +333,16 @@ class Datadoc:
|
|
|
312
333
|
{
|
|
313
334
|
"name": "Variable names",
|
|
314
335
|
"success": (
|
|
315
|
-
|
|
336
|
+
existing_metadata is not None
|
|
337
|
+
and {v.short_name for v in extracted_metadata.variables or []}
|
|
316
338
|
== {v.short_name for v in existing_metadata.variables or []}
|
|
317
339
|
),
|
|
318
340
|
},
|
|
319
341
|
{
|
|
320
342
|
"name": "Variable datatypes",
|
|
321
343
|
"success": (
|
|
322
|
-
|
|
344
|
+
existing_metadata is not None
|
|
345
|
+
and [v.data_type for v in extracted_metadata.variables or []]
|
|
323
346
|
== [v.data_type for v in existing_metadata.variables or []]
|
|
324
347
|
),
|
|
325
348
|
},
|
|
@@ -353,27 +376,29 @@ class Datadoc:
|
|
|
353
376
|
|
|
354
377
|
@staticmethod
|
|
355
378
|
def _merge_metadata(
|
|
356
|
-
extracted_metadata:
|
|
357
|
-
existing_metadata:
|
|
358
|
-
) ->
|
|
379
|
+
extracted_metadata: all_optional_model.DatadocMetadata | None,
|
|
380
|
+
existing_metadata: OptionalDatadocMetadataType,
|
|
381
|
+
) -> all_optional_model.DatadocMetadata:
|
|
359
382
|
if not existing_metadata:
|
|
360
383
|
logger.warning(
|
|
361
384
|
"No existing metadata found, no merge to perform. Continuing with extracted metadata.",
|
|
362
385
|
)
|
|
363
|
-
return extracted_metadata or
|
|
386
|
+
return extracted_metadata or all_optional_model.DatadocMetadata()
|
|
364
387
|
|
|
365
388
|
if not extracted_metadata:
|
|
366
|
-
return existing_metadata
|
|
389
|
+
return cast("all_optional_model.DatadocMetadata", existing_metadata)
|
|
367
390
|
|
|
368
391
|
# Use the extracted metadata as a base
|
|
369
|
-
merged_metadata =
|
|
392
|
+
merged_metadata = all_optional_model.DatadocMetadata(
|
|
370
393
|
dataset=copy.deepcopy(extracted_metadata.dataset),
|
|
371
394
|
variables=[],
|
|
372
395
|
)
|
|
373
396
|
|
|
374
397
|
override_dataset_fields(
|
|
375
398
|
merged_metadata=merged_metadata,
|
|
376
|
-
existing_metadata=
|
|
399
|
+
existing_metadata=cast(
|
|
400
|
+
"all_optional_model.DatadocMetadata", existing_metadata
|
|
401
|
+
),
|
|
377
402
|
)
|
|
378
403
|
|
|
379
404
|
# Merge variables.
|
|
@@ -387,7 +412,7 @@ class Datadoc:
|
|
|
387
412
|
def _extract_metadata_from_existing_document(
|
|
388
413
|
self,
|
|
389
414
|
document: pathlib.Path | CloudPath,
|
|
390
|
-
) ->
|
|
415
|
+
) -> OptionalDatadocMetadataType:
|
|
391
416
|
"""Read metadata from an existing metadata document.
|
|
392
417
|
|
|
393
418
|
If an existing metadata document is available, this method reads and
|
|
@@ -402,7 +427,13 @@ class Datadoc:
|
|
|
402
427
|
|
|
403
428
|
Raises:
|
|
404
429
|
json.JSONDecodeError: If the metadata document cannot be parsed.
|
|
430
|
+
pydantic.ValidationError: If the data does not successfully validate.
|
|
405
431
|
"""
|
|
432
|
+
metadata_model = (
|
|
433
|
+
required_model
|
|
434
|
+
if self.validate_required_fields_on_existing_metadata
|
|
435
|
+
else all_optional_model
|
|
436
|
+
)
|
|
406
437
|
fresh_metadata = {}
|
|
407
438
|
try:
|
|
408
439
|
with document.open(mode="r", encoding="utf-8") as file:
|
|
@@ -412,7 +443,7 @@ class Datadoc:
|
|
|
412
443
|
fresh_metadata,
|
|
413
444
|
)
|
|
414
445
|
if is_metadata_in_container_structure(fresh_metadata):
|
|
415
|
-
self.container =
|
|
446
|
+
self.container = metadata_model.MetadataContainer.model_validate_json(
|
|
416
447
|
json.dumps(fresh_metadata),
|
|
417
448
|
)
|
|
418
449
|
datadoc_metadata = fresh_metadata["datadoc"]
|
|
@@ -420,7 +451,7 @@ class Datadoc:
|
|
|
420
451
|
datadoc_metadata = fresh_metadata
|
|
421
452
|
if datadoc_metadata is None:
|
|
422
453
|
return None
|
|
423
|
-
return
|
|
454
|
+
return metadata_model.DatadocMetadata.model_validate_json(
|
|
424
455
|
json.dumps(datadoc_metadata),
|
|
425
456
|
)
|
|
426
457
|
except json.JSONDecodeError:
|
|
@@ -435,7 +466,11 @@ class Datadoc:
|
|
|
435
466
|
def _extract_pseudonymization_from_existing_document(
|
|
436
467
|
self,
|
|
437
468
|
document: pathlib.Path | CloudPath,
|
|
438
|
-
) ->
|
|
469
|
+
) -> (
|
|
470
|
+
all_optional_model.PseudonymizationMetadata
|
|
471
|
+
| required_model.PseudonymizationMetadata
|
|
472
|
+
| None
|
|
473
|
+
):
|
|
439
474
|
"""Read pseudo metadata from an existing metadata document.
|
|
440
475
|
|
|
441
476
|
If there is pseudo metadata in the document supplied, the method validates and returns the pseudonymization structure.
|
|
@@ -445,7 +480,14 @@ class Datadoc:
|
|
|
445
480
|
|
|
446
481
|
Raises:
|
|
447
482
|
json.JSONDecodeError: If the metadata document cannot be parsed.
|
|
483
|
+
pydantic.ValidationError: If the data does not successfully validate.
|
|
448
484
|
"""
|
|
485
|
+
metadata_model = (
|
|
486
|
+
required_model
|
|
487
|
+
if self.validate_required_fields_on_existing_metadata
|
|
488
|
+
else all_optional_model
|
|
489
|
+
)
|
|
490
|
+
|
|
449
491
|
try:
|
|
450
492
|
with document.open(mode="r", encoding="utf-8") as file:
|
|
451
493
|
fresh_metadata = json.load(file)
|
|
@@ -464,7 +506,7 @@ class Datadoc:
|
|
|
464
506
|
if pseudonymization_metadata is None:
|
|
465
507
|
return None
|
|
466
508
|
|
|
467
|
-
return
|
|
509
|
+
return metadata_model.PseudonymizationMetadata.model_validate_json(
|
|
468
510
|
json.dumps(pseudonymization_metadata),
|
|
469
511
|
)
|
|
470
512
|
|
|
@@ -500,7 +542,7 @@ class Datadoc:
|
|
|
500
542
|
def _extract_metadata_from_dataset(
|
|
501
543
|
self,
|
|
502
544
|
dataset: pathlib.Path | CloudPath,
|
|
503
|
-
) ->
|
|
545
|
+
) -> all_optional_model.DatadocMetadata:
|
|
504
546
|
"""Obtain what metadata we can from the dataset itself.
|
|
505
547
|
|
|
506
548
|
This makes it easier for the user by 'pre-filling' certain fields.
|
|
@@ -520,9 +562,9 @@ class Datadoc:
|
|
|
520
562
|
- variables: A list of fields extracted from the dataset schema.
|
|
521
563
|
"""
|
|
522
564
|
dapla_dataset_path_info = DaplaDatasetPathInfo(dataset)
|
|
523
|
-
metadata =
|
|
565
|
+
metadata = all_optional_model.DatadocMetadata()
|
|
524
566
|
|
|
525
|
-
metadata.dataset =
|
|
567
|
+
metadata.dataset = all_optional_model.Dataset(
|
|
526
568
|
short_name=dapla_dataset_path_info.dataset_short_name,
|
|
527
569
|
dataset_state=dapla_dataset_path_info.dataset_state,
|
|
528
570
|
dataset_status=DataSetStatus.DRAFT,
|
|
@@ -586,12 +628,14 @@ class Datadoc:
|
|
|
586
628
|
if self.container:
|
|
587
629
|
self.container.datadoc = datadoc
|
|
588
630
|
if not self.container.pseudonymization:
|
|
589
|
-
self.container.pseudonymization =
|
|
590
|
-
|
|
631
|
+
self.container.pseudonymization = (
|
|
632
|
+
all_optional_model.PseudonymizationMetadata(
|
|
633
|
+
pseudo_dataset=all_optional_model.PseudoDataset()
|
|
634
|
+
)
|
|
591
635
|
)
|
|
592
636
|
self.container.pseudonymization.pseudo_variables = self.pseudo_variables
|
|
593
637
|
else:
|
|
594
|
-
self.container =
|
|
638
|
+
self.container = all_optional_model.MetadataContainer(datadoc=datadoc)
|
|
595
639
|
if self.metadata_document:
|
|
596
640
|
content = self.container.model_dump_json(indent=4)
|
|
597
641
|
self.metadata_document.write_text(content)
|
|
@@ -623,12 +667,14 @@ class Datadoc:
|
|
|
623
667
|
def add_pseudo_variable(self, variable_short_name: str) -> None:
|
|
624
668
|
"""Adds a new pseudo variable to the list of pseudonymized variables."""
|
|
625
669
|
if self.variables_lookup[variable_short_name] is not None:
|
|
626
|
-
pseudo_variable =
|
|
670
|
+
pseudo_variable = all_optional_model.PseudoVariable(
|
|
671
|
+
short_name=variable_short_name
|
|
672
|
+
)
|
|
627
673
|
self.pseudo_variables.append(pseudo_variable)
|
|
628
674
|
self.pseudo_variables_lookup[variable_short_name] = pseudo_variable
|
|
629
675
|
|
|
630
676
|
def get_pseudo_variable(
|
|
631
677
|
self, variable_short_name: str
|
|
632
|
-
) ->
|
|
678
|
+
) -> all_optional_model.PseudoVariable | None:
|
|
633
679
|
"""Finds a pseudo variable by shortname."""
|
|
634
680
|
return self.pseudo_variables_lookup.get(variable_short_name)
|
|
@@ -12,10 +12,10 @@ from abc import abstractmethod
|
|
|
12
12
|
from typing import TYPE_CHECKING
|
|
13
13
|
|
|
14
14
|
import pandas as pd
|
|
15
|
-
from datadoc_model.model import DataType
|
|
16
|
-
from datadoc_model.model import LanguageStringType
|
|
17
|
-
from datadoc_model.model import LanguageStringTypeItem
|
|
18
|
-
from datadoc_model.model import Variable
|
|
15
|
+
from datadoc_model.all_optional.model import DataType
|
|
16
|
+
from datadoc_model.all_optional.model import LanguageStringType
|
|
17
|
+
from datadoc_model.all_optional.model import LanguageStringTypeItem
|
|
18
|
+
from datadoc_model.all_optional.model import Variable
|
|
19
19
|
from pyarrow import parquet as pq
|
|
20
20
|
|
|
21
21
|
from dapla_metadata.datasets.utility.enums import SupportedLanguages
|
|
@@ -140,7 +140,11 @@ class StatisticSubjectMapping(GetExternalSource):
|
|
|
140
140
|
SecondarySubject(
|
|
141
141
|
self._extract_titles(s.titler),
|
|
142
142
|
s["emnekode"],
|
|
143
|
-
[
|
|
143
|
+
[
|
|
144
|
+
statistikk["kortnavn"]
|
|
145
|
+
for statistikk in s.find_all("Statistikk")
|
|
146
|
+
if statistikk["isPrimaerPlassering"] == "true"
|
|
147
|
+
],
|
|
144
148
|
)
|
|
145
149
|
for s in p.find_all("delemne")
|
|
146
150
|
]
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""Repository for constant values in Datadoc backend."""
|
|
2
2
|
|
|
3
|
-
from datadoc_model.model import LanguageStringType
|
|
4
|
-
from datadoc_model.model import LanguageStringTypeItem
|
|
3
|
+
from datadoc_model.all_optional.model import LanguageStringType
|
|
4
|
+
from datadoc_model.all_optional.model import LanguageStringTypeItem
|
|
5
5
|
|
|
6
6
|
VALIDATION_ERROR = "Validation error: "
|
|
7
7
|
|
|
@@ -4,15 +4,19 @@ import datetime # import is needed in xdoctest
|
|
|
4
4
|
import logging
|
|
5
5
|
import pathlib
|
|
6
6
|
import uuid
|
|
7
|
+
from typing import cast
|
|
7
8
|
|
|
9
|
+
import datadoc_model
|
|
10
|
+
import datadoc_model.all_optional.model as all_optional_model
|
|
11
|
+
import datadoc_model.required.model as required_model
|
|
8
12
|
import google.auth
|
|
9
13
|
from cloudpathlib import CloudPath
|
|
10
14
|
from cloudpathlib import GSClient
|
|
11
15
|
from cloudpathlib import GSPath
|
|
12
16
|
from datadoc_model import model
|
|
13
|
-
from datadoc_model.model import Assessment
|
|
14
|
-
from datadoc_model.model import DataSetState
|
|
15
|
-
from datadoc_model.model import VariableRole
|
|
17
|
+
from datadoc_model.all_optional.model import Assessment
|
|
18
|
+
from datadoc_model.all_optional.model import DataSetState
|
|
19
|
+
from datadoc_model.all_optional.model import VariableRole
|
|
16
20
|
|
|
17
21
|
from dapla_metadata.dapla import user_info
|
|
18
22
|
from dapla_metadata.datasets.utility.constants import (
|
|
@@ -34,6 +38,17 @@ from dapla_metadata.datasets.utility.constants import (
|
|
|
34
38
|
|
|
35
39
|
logger = logging.getLogger(__name__)
|
|
36
40
|
|
|
41
|
+
DatadocMetadataType = (
|
|
42
|
+
all_optional_model.DatadocMetadata | required_model.DatadocMetadata
|
|
43
|
+
)
|
|
44
|
+
DatasetType = all_optional_model.Dataset | required_model.Dataset
|
|
45
|
+
OptionalDatadocMetadataType = DatadocMetadataType | None
|
|
46
|
+
ExistingPseudonymizationMetadataType = (
|
|
47
|
+
all_optional_model.PseudonymizationMetadata
|
|
48
|
+
| required_model.PseudonymizationMetadata
|
|
49
|
+
| None
|
|
50
|
+
)
|
|
51
|
+
|
|
37
52
|
|
|
38
53
|
def get_timestamp_now() -> datetime.datetime:
|
|
39
54
|
"""Return a timestamp for the current moment."""
|
|
@@ -119,7 +134,9 @@ def set_default_values_variables(variables: list) -> None:
|
|
|
119
134
|
v.variable_role = VariableRole.MEASURE
|
|
120
135
|
|
|
121
136
|
|
|
122
|
-
def set_default_values_dataset(
|
|
137
|
+
def set_default_values_dataset(
|
|
138
|
+
dataset: DatasetType,
|
|
139
|
+
) -> None:
|
|
123
140
|
"""Set default values on dataset.
|
|
124
141
|
|
|
125
142
|
Args:
|
|
@@ -140,7 +157,9 @@ def set_default_values_dataset(dataset: model.Dataset) -> None:
|
|
|
140
157
|
dataset.contains_personal_data = False
|
|
141
158
|
|
|
142
159
|
|
|
143
|
-
def set_dataset_owner(
|
|
160
|
+
def set_dataset_owner(
|
|
161
|
+
dataset: DatasetType,
|
|
162
|
+
) -> None:
|
|
144
163
|
"""Sets the owner of the dataset from the DAPLA_GROUP_CONTEXT enviornment variable.
|
|
145
164
|
|
|
146
165
|
Args:
|
|
@@ -153,7 +172,7 @@ def set_dataset_owner(dataset: model.Dataset) -> None:
|
|
|
153
172
|
|
|
154
173
|
|
|
155
174
|
def set_variables_inherit_from_dataset(
|
|
156
|
-
dataset:
|
|
175
|
+
dataset: DatasetType,
|
|
157
176
|
variables: list,
|
|
158
177
|
) -> None:
|
|
159
178
|
"""Set specific dataset values on a list of variable objects.
|
|
@@ -283,7 +302,9 @@ def _is_missing_metadata(
|
|
|
283
302
|
)
|
|
284
303
|
|
|
285
304
|
|
|
286
|
-
def num_obligatory_dataset_fields_completed(
|
|
305
|
+
def num_obligatory_dataset_fields_completed(
|
|
306
|
+
dataset: DatasetType,
|
|
307
|
+
) -> int:
|
|
287
308
|
"""Count the number of completed obligatory dataset fields.
|
|
288
309
|
|
|
289
310
|
This function returns the total count of obligatory fields in the dataset that
|
|
@@ -345,7 +366,9 @@ def num_obligatory_variable_fields_completed(variable: model.Variable) -> int:
|
|
|
345
366
|
return NUM_OBLIGATORY_VARIABLES_FIELDS - len(missing_metadata)
|
|
346
367
|
|
|
347
368
|
|
|
348
|
-
def get_missing_obligatory_dataset_fields(
|
|
369
|
+
def get_missing_obligatory_dataset_fields(
|
|
370
|
+
dataset: DatasetType,
|
|
371
|
+
) -> list:
|
|
349
372
|
"""Identify all obligatory dataset fields that are missing values.
|
|
350
373
|
|
|
351
374
|
This function checks for obligatory fields that are either directly missing
|
|
@@ -422,8 +445,9 @@ def running_in_notebook() -> bool:
|
|
|
422
445
|
|
|
423
446
|
|
|
424
447
|
def override_dataset_fields(
|
|
425
|
-
merged_metadata:
|
|
426
|
-
existing_metadata:
|
|
448
|
+
merged_metadata: all_optional_model.DatadocMetadata,
|
|
449
|
+
existing_metadata: all_optional_model.DatadocMetadata
|
|
450
|
+
| required_model.DatadocMetadata,
|
|
427
451
|
) -> None:
|
|
428
452
|
"""Overrides specific fields in the dataset of `merged_metadata` with values from the dataset of `existing_metadata`.
|
|
429
453
|
|
|
@@ -449,10 +473,10 @@ def override_dataset_fields(
|
|
|
449
473
|
|
|
450
474
|
|
|
451
475
|
def merge_variables(
|
|
452
|
-
existing_metadata:
|
|
453
|
-
extracted_metadata:
|
|
454
|
-
merged_metadata:
|
|
455
|
-
) ->
|
|
476
|
+
existing_metadata: OptionalDatadocMetadataType,
|
|
477
|
+
extracted_metadata: all_optional_model.DatadocMetadata,
|
|
478
|
+
merged_metadata: all_optional_model.DatadocMetadata,
|
|
479
|
+
) -> all_optional_model.DatadocMetadata:
|
|
456
480
|
"""Merges variables from the extracted metadata into the existing metadata and updates the merged metadata.
|
|
457
481
|
|
|
458
482
|
This function compares the variables from `extracted_metadata` with those in `existing_metadata`.
|
|
@@ -466,11 +490,12 @@ def merge_variables(
|
|
|
466
490
|
merged_metadata: The metadata object that will contain the result of the merge.
|
|
467
491
|
|
|
468
492
|
Returns:
|
|
469
|
-
|
|
493
|
+
all_optional_model.DatadocMetadata: The `merged_metadata` object containing variables from both `existing_metadata`
|
|
470
494
|
and `extracted_metadata`.
|
|
471
495
|
"""
|
|
472
496
|
if (
|
|
473
|
-
existing_metadata
|
|
497
|
+
existing_metadata is not None
|
|
498
|
+
and existing_metadata.variables is not None
|
|
474
499
|
and extracted_metadata is not None
|
|
475
500
|
and extracted_metadata.variables is not None
|
|
476
501
|
and merged_metadata.variables is not None
|
|
@@ -494,7 +519,9 @@ def merge_variables(
|
|
|
494
519
|
existing.contains_data_until = (
|
|
495
520
|
extracted.contains_data_until or existing.contains_data_until
|
|
496
521
|
)
|
|
497
|
-
merged_metadata.variables.append(
|
|
522
|
+
merged_metadata.variables.append(
|
|
523
|
+
cast("datadoc_model.all_optional.model.Variable", existing)
|
|
524
|
+
)
|
|
498
525
|
else:
|
|
499
526
|
# If there is no existing metadata for this variable, we just use what we have extracted
|
|
500
527
|
merged_metadata.variables.append(extracted)
|
|
@@ -9,24 +9,6 @@ from dapla_metadata.variable_definitions._generated.vardef_client.configuration
|
|
|
9
9
|
|
|
10
10
|
VARDEF_HOST_TEST = "https://metadata.intern.test.ssb.no"
|
|
11
11
|
WORKSPACE_DIR = "WORKSPACE_DIR"
|
|
12
|
-
VARDEF_DESCRIPTIONS_FILE_PATH = "VARDEF_DESCRIPTIONS_FILE_PATH"
|
|
13
|
-
VARDEF_DEFAULT_DESCRIPTION_PATH = (
|
|
14
|
-
"variable_definitions/resources/vardef_model_descriptions_nb.yaml"
|
|
15
|
-
)
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def get_descriptions_path() -> str:
|
|
19
|
-
"""Get the relative file path from the repo root to the Norwegian descriptions.
|
|
20
|
-
|
|
21
|
-
First checks the `VARDEF_DESCRIPTIONS_FILE_PATH` environment variable; if not set, returns a default path.
|
|
22
|
-
|
|
23
|
-
Returns:
|
|
24
|
-
str: The file path to the descriptions.
|
|
25
|
-
"""
|
|
26
|
-
return (
|
|
27
|
-
get_config_item(VARDEF_DESCRIPTIONS_FILE_PATH)
|
|
28
|
-
or VARDEF_DEFAULT_DESCRIPTION_PATH
|
|
29
|
-
)
|
|
30
12
|
|
|
31
13
|
|
|
32
14
|
def get_workspace_dir() -> str | None:
|
|
@@ -3,12 +3,8 @@
|
|
|
3
3
|
import logging
|
|
4
4
|
from datetime import datetime
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import TYPE_CHECKING
|
|
7
|
-
from typing import Any
|
|
8
|
-
from typing import cast
|
|
9
6
|
|
|
10
7
|
import pytz
|
|
11
|
-
from pydantic.config import JsonDict
|
|
12
8
|
from ruamel.yaml import YAML
|
|
13
9
|
from ruamel.yaml import CommentedMap
|
|
14
10
|
from ruamel.yaml import RoundTripRepresenter
|
|
@@ -27,10 +23,7 @@ from dapla_metadata.variable_definitions._utils.constants import DOUBLE_QUOTE_FI
|
|
|
27
23
|
from dapla_metadata.variable_definitions._utils.constants import (
|
|
28
24
|
MACHINE_GENERATED_FIELDS,
|
|
29
25
|
)
|
|
30
|
-
from dapla_metadata.variable_definitions._utils.constants import NORWEGIAN_DESCRIPTIONS
|
|
31
|
-
from dapla_metadata.variable_definitions._utils.constants import OPTIONAL_FIELD
|
|
32
26
|
from dapla_metadata.variable_definitions._utils.constants import OWNER_FIELD_NAME
|
|
33
|
-
from dapla_metadata.variable_definitions._utils.constants import REQUIRED_FIELD
|
|
34
27
|
from dapla_metadata.variable_definitions._utils.constants import (
|
|
35
28
|
TEMPLATE_SECTION_HEADER_MACHINE_GENERATED,
|
|
36
29
|
)
|
|
@@ -47,14 +40,8 @@ from dapla_metadata.variable_definitions._utils.constants import (
|
|
|
47
40
|
VARIABLE_STATUS_FIELD_NAME,
|
|
48
41
|
)
|
|
49
42
|
from dapla_metadata.variable_definitions._utils.constants import YAML_STR_TAG
|
|
50
|
-
from dapla_metadata.variable_definitions._utils.descriptions import (
|
|
51
|
-
apply_norwegian_descriptions_to_model,
|
|
52
|
-
)
|
|
53
43
|
from dapla_metadata.variable_definitions.exceptions import VardefFileError
|
|
54
44
|
|
|
55
|
-
if TYPE_CHECKING:
|
|
56
|
-
from pydantic import JsonValue
|
|
57
|
-
|
|
58
45
|
logger = logging.getLogger(__name__)
|
|
59
46
|
|
|
60
47
|
|
|
@@ -119,41 +106,6 @@ def _get_variable_definitions_dir():
|
|
|
119
106
|
return folder_path
|
|
120
107
|
|
|
121
108
|
|
|
122
|
-
def _set_field_requirement(field_name: str, field: Any) -> str | None:
|
|
123
|
-
"""Determine the field requirement status."""
|
|
124
|
-
if field_name not in MACHINE_GENERATED_FIELDS:
|
|
125
|
-
if field.is_required() or field_name == VARIABLE_STATUS_FIELD_NAME:
|
|
126
|
-
return REQUIRED_FIELD
|
|
127
|
-
return OPTIONAL_FIELD
|
|
128
|
-
return None
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
def _populate_commented_map(
|
|
132
|
-
field_name: str,
|
|
133
|
-
value: str,
|
|
134
|
-
commented_map: CommentedMap,
|
|
135
|
-
model_instance: CompleteResponse,
|
|
136
|
-
) -> None:
|
|
137
|
-
"""Add data to a CommentedMap."""
|
|
138
|
-
commented_map[field_name] = value
|
|
139
|
-
field = type(model_instance).model_fields[field_name]
|
|
140
|
-
description: JsonValue = cast(
|
|
141
|
-
JsonDict,
|
|
142
|
-
field.json_schema_extra,
|
|
143
|
-
)[NORWEGIAN_DESCRIPTIONS]
|
|
144
|
-
field_requirement: str | None = _set_field_requirement(field_name, field)
|
|
145
|
-
if description is not None:
|
|
146
|
-
new_description = (
|
|
147
|
-
("\n" + field_requirement + "\n" + str(description))
|
|
148
|
-
if field_requirement
|
|
149
|
-
else ("\n" + str(description))
|
|
150
|
-
)
|
|
151
|
-
commented_map.yaml_set_comment_before_after_key(
|
|
152
|
-
field_name,
|
|
153
|
-
before=new_description,
|
|
154
|
-
)
|
|
155
|
-
|
|
156
|
-
|
|
157
109
|
def _validate_and_create_directory(custom_directory: Path) -> Path:
|
|
158
110
|
"""Ensure that the given path is a valid directory, creating it if necessary.
|
|
159
111
|
|
|
@@ -290,9 +242,9 @@ def _model_to_yaml_with_comments(
|
|
|
290
242
|
start_comment: str,
|
|
291
243
|
custom_directory: Path | None = None,
|
|
292
244
|
) -> Path:
|
|
293
|
-
"""Convert a model instance to a structured YAML file
|
|
245
|
+
"""Convert a model instance to a structured YAML file.
|
|
294
246
|
|
|
295
|
-
|
|
247
|
+
Organizes fields into sections with headers and saves
|
|
296
248
|
the YAML file with a structured format and timestamped filename.
|
|
297
249
|
|
|
298
250
|
Args:
|
|
@@ -307,13 +259,6 @@ def _model_to_yaml_with_comments(
|
|
|
307
259
|
yaml = YAML()
|
|
308
260
|
configure_yaml(yaml)
|
|
309
261
|
|
|
310
|
-
from dapla_metadata.variable_definitions.variable_definition import (
|
|
311
|
-
VariableDefinition,
|
|
312
|
-
)
|
|
313
|
-
|
|
314
|
-
# Apply new fields to model
|
|
315
|
-
apply_norwegian_descriptions_to_model(VariableDefinition)
|
|
316
|
-
|
|
317
262
|
# Convert Pydantic model instance to dictionary
|
|
318
263
|
data = model_instance.model_dump(
|
|
319
264
|
serialize_as_any=True,
|
|
@@ -326,21 +271,16 @@ def _model_to_yaml_with_comments(
|
|
|
326
271
|
status_map = CommentedMap()
|
|
327
272
|
owner_map = CommentedMap()
|
|
328
273
|
|
|
329
|
-
# Loop through all fields in the model and
|
|
274
|
+
# Loop through all fields in the model and assigne to commented maps
|
|
330
275
|
for field_name, value in data.items():
|
|
331
276
|
if field_name == VARIABLE_STATUS_FIELD_NAME:
|
|
332
|
-
|
|
277
|
+
status_map[field_name] = value
|
|
333
278
|
elif field_name == OWNER_FIELD_NAME:
|
|
334
|
-
|
|
279
|
+
owner_map[field_name] = value
|
|
335
280
|
elif field_name in MACHINE_GENERATED_FIELDS:
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
machine_generated_map,
|
|
340
|
-
model_instance,
|
|
341
|
-
)
|
|
342
|
-
elif field_name not in {VARIABLE_STATUS_FIELD_NAME, OWNER_FIELD_NAME}:
|
|
343
|
-
_populate_commented_map(field_name, value, commented_map, model_instance)
|
|
281
|
+
machine_generated_map[field_name] = value
|
|
282
|
+
else:
|
|
283
|
+
commented_map[field_name] = value
|
|
344
284
|
|
|
345
285
|
base_path = (
|
|
346
286
|
_get_variable_definitions_dir()
|
{dapla_toolbelt_metadata-0.6.5.dist-info → dapla_toolbelt_metadata-0.7.0.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: dapla-toolbelt-metadata
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.0
|
|
4
4
|
Summary: Dapla Toolbelt Metadata
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Team Metadata
|
|
@@ -24,7 +24,7 @@ Requires-Dist: pyjwt (>=2.8.0)
|
|
|
24
24
|
Requires-Dist: python-dotenv (>=1.0.1)
|
|
25
25
|
Requires-Dist: requests (>=2.31.0)
|
|
26
26
|
Requires-Dist: ruamel-yaml (>=0.18.10)
|
|
27
|
-
Requires-Dist: ssb-datadoc-model (==6.
|
|
27
|
+
Requires-Dist: ssb-datadoc-model (==6.1.0)
|
|
28
28
|
Requires-Dist: ssb-klass-python (>=1.0.1)
|
|
29
29
|
Requires-Dist: typing-extensions (>=4.12.2)
|
|
30
30
|
Project-URL: Changelog, https://github.com/statisticsnorway/dapla-toolbelt-metadata/releases
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
dapla_metadata/__init__.py,sha256=
|
|
1
|
+
dapla_metadata/__init__.py,sha256=37yh9XWYQoLIVIS_fDdwNN8OXzbYY-6kMYwvjQrLMJQ,428
|
|
2
2
|
dapla_metadata/_shared/__init__.py,sha256=qUFgnVhBVlPRQP0ePmY76c8FvWRrJ-9c5GvzibwERnQ,103
|
|
3
3
|
dapla_metadata/_shared/config.py,sha256=QqXcmP66AfXF8wi6FMsa7et7kH2k4EJPOF4IELKuQig,3213
|
|
4
4
|
dapla_metadata/_shared/enums.py,sha256=WHkH1d8xw41gOly6au_izZB1_-6XTcKu5rhBWUImjp8,509
|
|
@@ -7,19 +7,19 @@ dapla_metadata/dapla/__init__.py,sha256=tkapF-YwmruPPrKvN3pEoCZqb7xvJx_ogBM8XyGM
|
|
|
7
7
|
dapla_metadata/dapla/user_info.py,sha256=bENez-ICt9ySR8orYebO68Q3_2LkIW9QTL58DTctmEQ,4833
|
|
8
8
|
dapla_metadata/datasets/__init__.py,sha256=TvzskpdFC6hGcC9_55URT5jr5wNAPzXuISd2UjJWM_8,280
|
|
9
9
|
dapla_metadata/datasets/code_list.py,sha256=kp1O6sUiUAP9WKlWY8IgHWx_1IOzJA63WveHqolgKmg,9082
|
|
10
|
-
dapla_metadata/datasets/core.py,sha256=
|
|
11
|
-
dapla_metadata/datasets/dapla_dataset_path_info.py,sha256=
|
|
12
|
-
dapla_metadata/datasets/dataset_parser.py,sha256=
|
|
10
|
+
dapla_metadata/datasets/core.py,sha256=Kc248-U1XoyjFgGo2uOAiOrHrCIo-2-4P53SM8FDKHo,28090
|
|
11
|
+
dapla_metadata/datasets/dapla_dataset_path_info.py,sha256=WPeV_mwKk2B9sXd14SaP-kTb1bOQ_8W2KtrqOG7sJIY,26867
|
|
12
|
+
dapla_metadata/datasets/dataset_parser.py,sha256=3dtRXNy1C8SfG8zTYWdY26nV4l-dG25IC_0J5t2bYwI,8285
|
|
13
13
|
dapla_metadata/datasets/external_sources/__init__.py,sha256=qvIdXwqyEmXNUCB94ZtZXRzifdW4hiXASFFPtC70f6E,83
|
|
14
14
|
dapla_metadata/datasets/external_sources/external_sources.py,sha256=9eIcOIUbaodNX1w9Tj2wl4U4wUmr5kF1R0i01fKUzGs,2974
|
|
15
15
|
dapla_metadata/datasets/model_backwards_compatibility.py,sha256=RKhi6cjqmPKW8lTYQ0mIXTAwhMGo_X-QMad4Y5tvq_0,19136
|
|
16
16
|
dapla_metadata/datasets/model_validation.py,sha256=pGT-jqaQQY4z7jz-7UQd0BQoTWDxDWPYAnDoRC2vd_c,6818
|
|
17
17
|
dapla_metadata/datasets/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
|
-
dapla_metadata/datasets/statistic_subject_mapping.py,sha256=
|
|
18
|
+
dapla_metadata/datasets/statistic_subject_mapping.py,sha256=ovT-bZv6eGPD3L0UIs5nIw4AjJrfZn0hyWyD72JBmhs,6395
|
|
19
19
|
dapla_metadata/datasets/utility/__init__.py,sha256=pp6tUcgUbo8iq9OPtFKQrTbLuI3uY7NHptwWSTpasOU,33
|
|
20
|
-
dapla_metadata/datasets/utility/constants.py,sha256=
|
|
20
|
+
dapla_metadata/datasets/utility/constants.py,sha256=YEs2ECLNJMM1SSORPTDnzNep_Qut5YbJ5JJx_oP3ios,2470
|
|
21
21
|
dapla_metadata/datasets/utility/enums.py,sha256=SpV4xlmP1YMaJPbmX03hqRLHUOhXIk5gquTeJ8G_5OE,432
|
|
22
|
-
dapla_metadata/datasets/utility/utils.py,sha256=
|
|
22
|
+
dapla_metadata/datasets/utility/utils.py,sha256=JpJuvYEXmNUXTgaxPhUg24aiiZS201wRNnAOWyH_DO0,19210
|
|
23
23
|
dapla_metadata/standards/__init__.py,sha256=n8jnMrudLuScSdfQ4UMJorc-Ptg3Y1-ilT8zAaQnM70,179
|
|
24
24
|
dapla_metadata/standards/name_validator.py,sha256=6-DQE_EKVd6UjL--EXpFcZDQtusVbSFaWaUY-CfOV2c,9184
|
|
25
25
|
dapla_metadata/standards/standard_validators.py,sha256=tcCiCI76wUVtMzXA2oCgdauZc0uGgUi11FKu-t7KGwQ,3767
|
|
@@ -73,17 +73,15 @@ dapla_metadata/variable_definitions/_generated/vardef_client/py.typed,sha256=47D
|
|
|
73
73
|
dapla_metadata/variable_definitions/_generated/vardef_client/rest.py,sha256=x4PWmg3IYQBr8OgnrWr3l4Ke2rElHP3zAEVxk2U-mOc,12022
|
|
74
74
|
dapla_metadata/variable_definitions/_utils/__init__.py,sha256=qAhRLJoTBqtR3f9xRXTRhD7-5Xg0Opk1Ks5F4AUYnpA,45
|
|
75
75
|
dapla_metadata/variable_definitions/_utils/_client.py,sha256=v1-9VjrdPI6-sroam5vXMPEV1dQMPsYk7KyGd48HjYw,971
|
|
76
|
-
dapla_metadata/variable_definitions/_utils/config.py,sha256=
|
|
77
|
-
dapla_metadata/variable_definitions/_utils/constants.py,sha256=
|
|
78
|
-
dapla_metadata/variable_definitions/_utils/
|
|
79
|
-
dapla_metadata/variable_definitions/_utils/files.py,sha256=qdO9D0l-6FnSGZImTtyMsrFfauFqvQyCWz0knLSklbo,13193
|
|
76
|
+
dapla_metadata/variable_definitions/_utils/config.py,sha256=h5MtmueCdAgg82c5upvQUC9QSzK0TOs40KwQj5mTrE8,1822
|
|
77
|
+
dapla_metadata/variable_definitions/_utils/constants.py,sha256=Jy9xFa4ZpTUxpDZ_vdUaFlB-cPnQpFArwS9VtEIG0SY,1834
|
|
78
|
+
dapla_metadata/variable_definitions/_utils/files.py,sha256=JbPgPNQ7iA38juMqGEdcg5OjZZUwCb6NQtPL0AEspD0,10933
|
|
80
79
|
dapla_metadata/variable_definitions/_utils/template_files.py,sha256=-PgYs4TG4vrXLQgk47pow9ZsqlZqhtO755LnEmvN4MA,3405
|
|
81
80
|
dapla_metadata/variable_definitions/_utils/variable_definition_files.py,sha256=PbqsFdHxsq0EWBg9s2Y57LqVP7aPmGD5-FZfnzuOw2Q,4078
|
|
82
81
|
dapla_metadata/variable_definitions/exceptions.py,sha256=z6Gtd84FboDu7vWjC3wathIF7I0gF0imtRhwMkr16lY,7851
|
|
83
|
-
dapla_metadata/variable_definitions/resources/vardef_model_descriptions_nb.yaml,sha256=VNglLU6jBLbfoM12fc2fiby_pi2GAgA-4t30yKypeuY,5474
|
|
84
82
|
dapla_metadata/variable_definitions/vardef.py,sha256=KYd31nCGhxuzC0hpKR6foQjO39Tlb3vu9IDqUoMvTeY,11352
|
|
85
83
|
dapla_metadata/variable_definitions/variable_definition.py,sha256=sj49uot0e4UJW4QJ3dEJGgjY4yfCHOkxS2NdD2t60b8,14883
|
|
86
|
-
dapla_toolbelt_metadata-0.
|
|
87
|
-
dapla_toolbelt_metadata-0.
|
|
88
|
-
dapla_toolbelt_metadata-0.
|
|
89
|
-
dapla_toolbelt_metadata-0.
|
|
84
|
+
dapla_toolbelt_metadata-0.7.0.dist-info/LICENSE,sha256=np3IfD5m0ZUofn_kVzDZqliozuiO6wrktw3LRPjyEiI,1073
|
|
85
|
+
dapla_toolbelt_metadata-0.7.0.dist-info/METADATA,sha256=BvUM_PyKUxWsZcS1wBYh1n6aC0nEA9A14NQUbphZwIw,4905
|
|
86
|
+
dapla_toolbelt_metadata-0.7.0.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
|
|
87
|
+
dapla_toolbelt_metadata-0.7.0.dist-info/RECORD,,
|
|
@@ -1,89 +0,0 @@
|
|
|
1
|
-
"""Utilities for dynamically adding extra fields to Pydantic models, specifically Norwegian descriptions."""
|
|
2
|
-
|
|
3
|
-
import logging
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
from typing import TYPE_CHECKING
|
|
6
|
-
from typing import cast
|
|
7
|
-
|
|
8
|
-
import ruamel.yaml
|
|
9
|
-
from pydantic import BaseModel
|
|
10
|
-
from pydantic import Field
|
|
11
|
-
|
|
12
|
-
from dapla_metadata.variable_definitions._utils.config import get_descriptions_path
|
|
13
|
-
|
|
14
|
-
if TYPE_CHECKING:
|
|
15
|
-
from pydantic.config import JsonDict
|
|
16
|
-
|
|
17
|
-
logger = logging.getLogger(__name__)
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def get_package_root() -> Path:
|
|
21
|
-
"""Get an absolute Path to the root of the package (dapla_metadata)."""
|
|
22
|
-
number_of_directories_up_from_descriptions_file = 2
|
|
23
|
-
return (
|
|
24
|
-
Path(__file__)
|
|
25
|
-
.resolve()
|
|
26
|
-
.parents[number_of_directories_up_from_descriptions_file]
|
|
27
|
-
)
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def load_descriptions(file_path: Path) -> dict:
|
|
31
|
-
"""Load and return the contents of a YAML file as a dictionary.
|
|
32
|
-
|
|
33
|
-
Args:
|
|
34
|
-
file_path (Path): Path to the YAML file.
|
|
35
|
-
|
|
36
|
-
Returns:
|
|
37
|
-
dict: Parsed contents of the YAML file.
|
|
38
|
-
"""
|
|
39
|
-
with Path.open(file_path, encoding="utf-8") as f:
|
|
40
|
-
return ruamel.yaml.YAML().load(f)
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def apply_norwegian_descriptions_to_model(
|
|
44
|
-
model: type[BaseModel],
|
|
45
|
-
) -> None:
|
|
46
|
-
"""Add Norwegian descriptions to the fields of a Pydantic model.
|
|
47
|
-
|
|
48
|
-
This function globally modifies the model fields by inserting a Norwegian description
|
|
49
|
-
from a predefined dictionary. If a field does not have a corresponding
|
|
50
|
-
Norwegian description, a default message is used.
|
|
51
|
-
|
|
52
|
-
Args:
|
|
53
|
-
model (BaseModel): A Pydantic model instance to be updated.
|
|
54
|
-
|
|
55
|
-
Returns:
|
|
56
|
-
None: The function modifies the model in place.
|
|
57
|
-
"""
|
|
58
|
-
new_fields = {}
|
|
59
|
-
|
|
60
|
-
descriptions = load_descriptions(
|
|
61
|
-
get_package_root() / get_descriptions_path(),
|
|
62
|
-
)
|
|
63
|
-
|
|
64
|
-
for field_name, field_info in model.model_fields.items():
|
|
65
|
-
new_description: str = descriptions.get(
|
|
66
|
-
field_name,
|
|
67
|
-
f"No description in norwegian found for {field_name}",
|
|
68
|
-
)
|
|
69
|
-
if "No description in norwegian found" in new_description:
|
|
70
|
-
logger.warning("Missing description for %s", field_name)
|
|
71
|
-
else:
|
|
72
|
-
logger.debug("Field %s: %s", field_name, new_description)
|
|
73
|
-
|
|
74
|
-
new_fields[field_name] = Field( # type: ignore[call-overload]
|
|
75
|
-
default=field_info.default,
|
|
76
|
-
alias=field_info.alias,
|
|
77
|
-
title=field_info.title,
|
|
78
|
-
description=field_info.description,
|
|
79
|
-
json_schema_extra=cast(
|
|
80
|
-
"JsonDict",
|
|
81
|
-
{
|
|
82
|
-
"norwegian_description": new_description,
|
|
83
|
-
"annotation": field_info.annotation,
|
|
84
|
-
},
|
|
85
|
-
),
|
|
86
|
-
)
|
|
87
|
-
|
|
88
|
-
model.model_fields.update(new_fields) # Apply changes
|
|
89
|
-
model.model_rebuild()
|
|
@@ -1,109 +0,0 @@
|
|
|
1
|
-
# --- Variabel definisjoner ---
|
|
2
|
-
# ref: https://statistics-norway.atlassian.net/wiki/spaces/MPD/pages/3009839199/VarDef+-+Krav+til+dokumentasjon+av+variabler
|
|
3
|
-
name: |
|
|
4
|
-
Variabelens navn. Dette skal ikke være en mer “teknisk” forkortelse, men et navn som er forståelig for mennesker.
|
|
5
|
-
-------------------------
|
|
6
|
-
>>> EKSEMPEL:
|
|
7
|
-
name:
|
|
8
|
-
nb: |-
|
|
9
|
-
Lønnsinntekter
|
|
10
|
-
short_name: |
|
|
11
|
-
Dette er variabelens kortnavn, som kan være en mer “teknisk” forkortelse, f.eks. wlonn (kortnavnet til Lønnsinntekter). Kortnavnet til en variabel i Vardef skal være unikt.
|
|
12
|
-
Kravet til kortnavnet er at det kan inneholde a-z (kun små bokstaver), 0-9 og _ (understrek). Minimumslengden på kortnavnet er 2 tegn.
|
|
13
|
-
Bokstavene “æ”, “ø” og “å” kan ikke brukes. Disse anbefales erstattet med hhv. “ae”, “oe” og “aa"
|
|
14
|
-
definition: |
|
|
15
|
-
En definisjon skal beskrive hva variabelen betyr og være så kort og presis som mulig. Mer utfyllende opplysninger kan legges i Merknad-feltet.
|
|
16
|
-
-------------------------
|
|
17
|
-
>>> EKSEMPEL:
|
|
18
|
-
definition:
|
|
19
|
-
nb: |-
|
|
20
|
-
Yrkesinntekter, kapitalinntekter, skattepliktige og skattefrie overføringer, i løpet av kalenderåret.
|
|
21
|
-
classification_reference: |
|
|
22
|
-
ID av en klassifikasjon eller kodeliste fra KLASS som beskriver verdiene variabelen kan anta.
|
|
23
|
-
For eksempel vil variabelen 'Sivilstand' ha klassifikasjon 'Standard for sivilstand' (kan vises på https://www.ssb.no/klass/klassifikasjoner/19 ) som har ID 19.
|
|
24
|
-
-------------------------
|
|
25
|
-
>>> EKSEMPEL:
|
|
26
|
-
classification_reference: "19"
|
|
27
|
-
unit_types: |
|
|
28
|
-
Enhetstyper - enhetene som beskrives av denne variabelen. Variabelen “sivilstand” vil f.eks. ha enhetstypen person,
|
|
29
|
-
mens f.eks. “Produsentpris for tjenester” vil ha både foretak og bedrift som enhetstyper siden variabelen kan beskrive begge.
|
|
30
|
-
Verdier skal være koder fra: https://www.ssb.no/klass/klassifikasjoner/702.
|
|
31
|
-
-------------------------
|
|
32
|
-
>>> EKSEMPEL:
|
|
33
|
-
unit_types:
|
|
34
|
-
- "20"
|
|
35
|
-
subject_fields: |
|
|
36
|
-
Statistikkområder som variabelen brukes innenfor. For eksempel tilhører variabelen “Sivilstand” statistikkområdet “Befolkning”.
|
|
37
|
-
Verdier skal være koder fra https://www.ssb.no/klass/klassifikasjoner/618.
|
|
38
|
-
-------------------------
|
|
39
|
-
>>> EKSEMPEL:
|
|
40
|
-
subject_fields:
|
|
41
|
-
- "bf"
|
|
42
|
-
- "be"
|
|
43
|
-
contains_special_categories_of_personal_data: |
|
|
44
|
-
Viser om variabelen inneholder spesielt sensitive personopplysninger.
|
|
45
|
-
-------------------------
|
|
46
|
-
>>> EKSEMPEL:
|
|
47
|
-
contains_special_categories_of_personal_data: true
|
|
48
|
-
measurement_type: |
|
|
49
|
-
Måletype som en kvantitativ variabelen tilhører, f.eks. valuta, areal osv.
|
|
50
|
-
Verdien skal være en kode fra: https://www.ssb.no/klass/klassifikasjoner/303
|
|
51
|
-
-------------------------
|
|
52
|
-
>>> EKSEMPEL:
|
|
53
|
-
measurement_type: "03"
|
|
54
|
-
valid_from: |
|
|
55
|
-
Datoen variabeldefinisjonen er gyldig f.o.m.
|
|
56
|
-
-------------------------
|
|
57
|
-
>>> EKSEMPEL:
|
|
58
|
-
valid_from: 1999-01-30
|
|
59
|
-
valid_until: |
|
|
60
|
-
Datoen variabeldefinisjonens var gyldig t.o.m. Settes hvis definisjonen skal erstattet av en ny definisjon (med en ny gyldighetsperiode), eller variabelen ikke lenger skal brukes.
|
|
61
|
-
-------------------------
|
|
62
|
-
>>> EKSEMPEL:
|
|
63
|
-
valid_until: 2024-10-23
|
|
64
|
-
external_reference_uri: |
|
|
65
|
-
En peker (URI) til ekstern definisjon/dokumentasjon, f.eks. ei webside som er relevant for variabelen.
|
|
66
|
-
-----------------------------------------------------
|
|
67
|
-
>>> EKSEMPEL:
|
|
68
|
-
external_reference_uri: "https://www.landbruksdirektoratet.com"
|
|
69
|
-
comment: |
|
|
70
|
-
Her kan en sette inn eventuelle tilleggsopplysninger som ikke hører hjemme i selve definisjonen.
|
|
71
|
-
Variabelen “Landbakgrunn” har f.eks. merknaden “Fra og med 1.1.2003 ble definisjon endret til også å trekke inn besteforeldrenes fødeland”.
|
|
72
|
-
-----------------------------------------------------------------------------------------------
|
|
73
|
-
>>> EKSEMPEL:
|
|
74
|
-
comment:
|
|
75
|
-
nb: |-
|
|
76
|
-
Fra og med 1.1.2003 ble definisjon endret til også å trekke inn besteforeldrenes fødeland.
|
|
77
|
-
related_variable_definition_uris: |
|
|
78
|
-
Her kan en legge inn URIer til andre variabler som er relevante. Eksempelvis er variabelen “Inntekt etter skatt” en beregnet variabel der “Yrkesinntekter” og “Kapitalinntekter” inngår i beregningen.
|
|
79
|
-
En kan da legge inn deres URI-er i dette feltet.
|
|
80
|
-
-------------------------
|
|
81
|
-
>>> EKSEMPEL:
|
|
82
|
-
related_variable_definition_uris:
|
|
83
|
-
- "https://example.com/"
|
|
84
|
-
contact: |
|
|
85
|
-
Her dokumenterer en navn og epost for person eller gruppe som kan svare på spørsmål.
|
|
86
|
-
-------------------------
|
|
87
|
-
>>> EKSEMPEL:
|
|
88
|
-
contact:
|
|
89
|
-
title:
|
|
90
|
-
nb: |-
|
|
91
|
-
Seksjonsleder
|
|
92
|
-
email: leder@ssb.no
|
|
93
|
-
variable_status: |
|
|
94
|
-
Livssyklus for variabelen.
|
|
95
|
-
id: |
|
|
96
|
-
Unik SSB identifikator for variabeldefinisjonen. Denne blir maskingenerert.
|
|
97
|
-
Variabeldefinisjoner med ulike gyldighetsperioder har samme ID (og samme kortnavn).
|
|
98
|
-
patch_id: |
|
|
99
|
-
Løpenummer som identifiserer en patch, endring, for en variabeldefinisjon.
|
|
100
|
-
owner: |
|
|
101
|
-
Eier av variabelen dvs. ansvarlig Dapla-team (statistikk-team) og informasjon om tilgangsstyringsgrupper. Team-tilhørighet settes automatisk til det samme som teamtilhørigheten til den som oppretter variabelen.
|
|
102
|
-
created_at: |
|
|
103
|
-
Tidsstempelet da variabelen ble opprettet. Denne er maskingenerert.
|
|
104
|
-
created_by: |
|
|
105
|
-
Personen som har opprettet variabelen. Dette er maskingenerert.
|
|
106
|
-
last_updated_at: |
|
|
107
|
-
Tidsstempelet da variabelen sist ble oppdatert. Denne er maskingenerert.
|
|
108
|
-
last_updated_by: |
|
|
109
|
-
Personen som sist utførte en endring i variabelen. Denne er maskingenerert.
|
|
File without changes
|
|
File without changes
|