dapla-toolbelt-metadata 0.6.5__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dapla-toolbelt-metadata might be problematic. Click here for more details.

Files changed (89) hide show
  1. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/PKG-INFO +2 -2
  2. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/pyproject.toml +3 -15
  3. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/__init__.py +1 -1
  4. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/datasets/core.py +95 -49
  5. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/datasets/dapla_dataset_path_info.py +1 -1
  6. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/datasets/dataset_parser.py +4 -4
  7. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/datasets/statistic_subject_mapping.py +5 -1
  8. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/datasets/utility/constants.py +2 -2
  9. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/datasets/utility/utils.py +44 -17
  10. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_utils/config.py +0 -18
  11. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_utils/constants.py +0 -2
  12. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_utils/files.py +8 -68
  13. dapla_toolbelt_metadata-0.6.5/src/dapla_metadata/variable_definitions/_utils/descriptions.py +0 -89
  14. dapla_toolbelt_metadata-0.6.5/src/dapla_metadata/variable_definitions/resources/vardef_model_descriptions_nb.yaml +0 -109
  15. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/LICENSE +0 -0
  16. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/README.md +0 -0
  17. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/_shared/__init__.py +0 -0
  18. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/_shared/config.py +0 -0
  19. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/_shared/enums.py +0 -0
  20. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/_shared/py.typed +0 -0
  21. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/dapla/__init__.py +0 -0
  22. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/dapla/user_info.py +0 -0
  23. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/datasets/__init__.py +0 -0
  24. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/datasets/code_list.py +0 -0
  25. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/datasets/external_sources/__init__.py +0 -0
  26. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/datasets/external_sources/external_sources.py +0 -0
  27. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/datasets/model_backwards_compatibility.py +0 -0
  28. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/datasets/model_validation.py +0 -0
  29. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/datasets/py.typed +0 -0
  30. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/datasets/utility/__init__.py +0 -0
  31. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/datasets/utility/enums.py +0 -0
  32. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/standards/__init__.py +0 -0
  33. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/standards/name_validator.py +0 -0
  34. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/standards/standard_validators.py +0 -0
  35. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/standards/utils/__init__.py +0 -0
  36. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/standards/utils/constants.py +0 -0
  37. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/__init__.py +0 -0
  38. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/.openapi-generator/FILES +0 -0
  39. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/.openapi-generator/VERSION +0 -0
  40. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/.openapi-generator-ignore +0 -0
  41. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/README.md +0 -0
  42. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/__init__.py +0 -0
  43. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/__init__.py +0 -0
  44. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api/__init__.py +0 -0
  45. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api/data_migration_api.py +0 -0
  46. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api/draft_variable_definitions_api.py +0 -0
  47. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api/patches_api.py +0 -0
  48. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api/validity_periods_api.py +0 -0
  49. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api/variable_definitions_api.py +0 -0
  50. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api_client.py +0 -0
  51. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api_response.py +0 -0
  52. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/configuration.py +0 -0
  53. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/CompleteResponse.md +0 -0
  54. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/Contact.md +0 -0
  55. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/DataMigrationApi.md +0 -0
  56. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/Draft.md +0 -0
  57. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/DraftVariableDefinitionsApi.md +0 -0
  58. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/LanguageStringType.md +0 -0
  59. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/Owner.md +0 -0
  60. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/Patch.md +0 -0
  61. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/PatchesApi.md +0 -0
  62. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/PublicApi.md +0 -0
  63. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/SupportedLanguages.md +0 -0
  64. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/UpdateDraft.md +0 -0
  65. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/ValidityPeriod.md +0 -0
  66. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/ValidityPeriodsApi.md +0 -0
  67. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/VariableDefinitionsApi.md +0 -0
  68. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/VariableStatus.md +0 -0
  69. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/exceptions.py +0 -0
  70. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/__init__.py +0 -0
  71. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/complete_response.py +0 -0
  72. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/contact.py +0 -0
  73. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/draft.py +0 -0
  74. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/language_string_type.py +0 -0
  75. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/owner.py +0 -0
  76. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/patch.py +0 -0
  77. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/problem.py +0 -0
  78. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/update_draft.py +0 -0
  79. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/validity_period.py +0 -0
  80. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/variable_status.py +0 -0
  81. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/py.typed +0 -0
  82. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_generated/vardef_client/rest.py +0 -0
  83. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_utils/__init__.py +0 -0
  84. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_utils/_client.py +0 -0
  85. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_utils/template_files.py +0 -0
  86. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/_utils/variable_definition_files.py +0 -0
  87. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/exceptions.py +0 -0
  88. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/vardef.py +0 -0
  89. {dapla_toolbelt_metadata-0.6.5 → dapla_toolbelt_metadata-0.7.0}/src/dapla_metadata/variable_definitions/variable_definition.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: dapla-toolbelt-metadata
3
- Version: 0.6.5
3
+ Version: 0.7.0
4
4
  Summary: Dapla Toolbelt Metadata
5
5
  License: MIT
6
6
  Author: Team Metadata
@@ -24,7 +24,7 @@ Requires-Dist: pyjwt (>=2.8.0)
24
24
  Requires-Dist: python-dotenv (>=1.0.1)
25
25
  Requires-Dist: requests (>=2.31.0)
26
26
  Requires-Dist: ruamel-yaml (>=0.18.10)
27
- Requires-Dist: ssb-datadoc-model (==6.0.0)
27
+ Requires-Dist: ssb-datadoc-model (==6.1.0)
28
28
  Requires-Dist: ssb-klass-python (>=1.0.1)
29
29
  Requires-Dist: typing-extensions (>=4.12.2)
30
30
  Project-URL: Changelog, https://github.com/statisticsnorway/dapla-toolbelt-metadata/releases
@@ -2,7 +2,7 @@
2
2
  name = "dapla-toolbelt-metadata"
3
3
  description = "Dapla Toolbelt Metadata"
4
4
  license = "MIT"
5
- version = "0.6.5"
5
+ version = "0.7.0"
6
6
  dynamic = ["classifiers"]
7
7
  readme = "README.md"
8
8
  authors = [{ name = "Team Metadata", email = "metadata@ssb.no" }]
@@ -17,7 +17,7 @@ dependencies = [
17
17
  'cloudpathlib[gs] >=0.17.0',
18
18
  'pyjwt >=2.8.0',
19
19
  'ssb-klass-python >=1.0.1',
20
- 'ssb-datadoc-model ==6.0.0',
20
+ 'ssb-datadoc-model ==6.1.0',
21
21
  'typing-extensions >=4.12.2',
22
22
  'ruamel-yaml >=0.18.10',
23
23
  'google-auth >=2.38.0',
@@ -112,19 +112,7 @@ disable_error_code = ["unreachable"]
112
112
 
113
113
  [[tool.mypy.overrides]]
114
114
  # Allow missing type hints in third-party libraries without type information.
115
- module = [
116
- "nox",
117
- "dapla",
118
- "gcsfs",
119
- "pyarrow",
120
- "pyarrow.parquet",
121
- "datadoc_model",
122
- "datadoc_model.model",
123
- "pytest_mock",
124
- "testcontainers.*",
125
- "httpx",
126
- "ruamel.*",
127
- ]
115
+ module = ["nox", "dapla", "gcsfs", "pyarrow", "pyarrow.parquet", "pytest_mock", "testcontainers.*", "httpx", "ruamel.*"]
128
116
  ignore_missing_imports = true
129
117
 
130
118
  # Disable specific error codes in the 'tests' package
@@ -7,7 +7,7 @@ warnings.filterwarnings(
7
7
  message="As the c extension couldn't be imported, `google-crc32c` is using a pure python implementation that is significantly slower.",
8
8
  )
9
9
 
10
- import datadoc_model.model as datadoc_model
10
+ import datadoc_model.all_optional.model as datadoc_model
11
11
 
12
12
  from . import dapla
13
13
  from . import datasets
@@ -9,9 +9,11 @@ import warnings
9
9
  from concurrent.futures import ThreadPoolExecutor
10
10
  from pathlib import Path
11
11
  from typing import TYPE_CHECKING
12
+ from typing import cast
12
13
 
13
- from datadoc_model import model
14
- from datadoc_model.model import DataSetStatus
14
+ import datadoc_model.all_optional.model as all_optional_model
15
+ import datadoc_model.required.model as required_model
16
+ from datadoc_model.all_optional.model import DataSetStatus
15
17
 
16
18
  from dapla_metadata._shared import config
17
19
  from dapla_metadata.dapla import user_info
@@ -30,6 +32,8 @@ from dapla_metadata.datasets.utility.constants import INCONSISTENCIES_MESSAGE
30
32
  from dapla_metadata.datasets.utility.constants import METADATA_DOCUMENT_FILE_SUFFIX
31
33
  from dapla_metadata.datasets.utility.constants import NUM_OBLIGATORY_DATASET_FIELDS
32
34
  from dapla_metadata.datasets.utility.constants import NUM_OBLIGATORY_VARIABLES_FIELDS
35
+ from dapla_metadata.datasets.utility.utils import ExistingPseudonymizationMetadataType
36
+ from dapla_metadata.datasets.utility.utils import OptionalDatadocMetadataType
33
37
  from dapla_metadata.datasets.utility.utils import calculate_percentage
34
38
  from dapla_metadata.datasets.utility.utils import derive_assessment_from_state
35
39
  from dapla_metadata.datasets.utility.utils import get_timestamp_now
@@ -84,8 +88,8 @@ class Datadoc:
84
88
  dataset_path: str | None = None,
85
89
  metadata_document_path: str | None = None,
86
90
  statistic_subject_mapping: StatisticSubjectMapping | None = None,
87
- *,
88
91
  errors_as_warnings: bool = False,
92
+ validate_required_fields_on_existing_metadata: bool = False,
89
93
  ) -> None:
90
94
  """Initialize the Datadoc instance.
91
95
 
@@ -101,17 +105,23 @@ class Datadoc:
101
105
  Defaults to None
102
106
  errors_as_warnings: Disable raising exceptions if inconsistencies
103
107
  are found between existing and extracted metadata.
108
+ validate_required_fields_on_existing_metadata: Use a Pydantic model
109
+ which validates whether required fields are present when reading
110
+ in an existing metadata file.
104
111
  """
105
112
  self._statistic_subject_mapping = statistic_subject_mapping
106
113
  self.errors_as_warnings = errors_as_warnings
114
+ self.validate_required_fields_on_existing_metadata = (
115
+ validate_required_fields_on_existing_metadata
116
+ )
107
117
  self.metadata_document: pathlib.Path | CloudPath | None = None
108
- self.container: model.MetadataContainer | None = None
118
+ self.container: all_optional_model.MetadataContainer | None = None
109
119
  self.dataset_path: pathlib.Path | CloudPath | None = None
110
- self.dataset = model.Dataset()
120
+ self.dataset = all_optional_model.Dataset()
111
121
  self.variables: list = []
112
- self.pseudo_variables: list[model.PseudoVariable] = []
113
- self.variables_lookup: dict[str, model.Variable] = {}
114
- self.pseudo_variables_lookup: dict[str, model.PseudoVariable] = {}
122
+ self.pseudo_variables: list[all_optional_model.PseudoVariable] = []
123
+ self.variables_lookup: dict[str, all_optional_model.Variable] = {}
124
+ self.pseudo_variables_lookup: dict[str, all_optional_model.PseudoVariable] = {}
115
125
  self.explicitly_defined_metadata_document = False
116
126
  self.dataset_consistency_status: list = []
117
127
  if metadata_document_path:
@@ -149,9 +159,9 @@ class Datadoc:
149
159
  - The 'contains_personal_data' attribute is set to False if not specified.
150
160
  - A lookup dictionary for variables is created based on their short names.
151
161
  """
152
- extracted_metadata: model.DatadocMetadata | None = None
153
- existing_metadata: model.DatadocMetadata | None = None
154
- existing_pseudonymization: model.PseudonymizationMetadata | None = None
162
+ extracted_metadata: all_optional_model.DatadocMetadata | None = None
163
+ existing_metadata: OptionalDatadocMetadataType = None
164
+ existing_pseudonymization: ExistingPseudonymizationMetadataType = None
155
165
 
156
166
  if self.metadata_document and self.metadata_document.exists():
157
167
  existing_metadata = self._extract_metadata_from_existing_document(
@@ -166,11 +176,26 @@ class Datadoc:
166
176
 
167
177
  if (
168
178
  self.dataset_path is not None
169
- and self.dataset == model.Dataset()
179
+ and self.dataset == all_optional_model.Dataset()
170
180
  and len(self.variables) == 0
171
181
  ):
172
182
  extracted_metadata = self._extract_metadata_from_dataset(self.dataset_path)
173
183
 
184
+ if extracted_metadata is not None:
185
+ existing_file_path = self._get_existing_file_path(extracted_metadata)
186
+ if (
187
+ self.dataset_path
188
+ and existing_file_path is not None
189
+ and extracted_metadata is not None
190
+ and existing_metadata is not None
191
+ ):
192
+ self.dataset_consistency_status = self._check_dataset_consistency(
193
+ self.dataset_path,
194
+ Path(existing_file_path),
195
+ extracted_metadata,
196
+ existing_metadata,
197
+ )
198
+
174
199
  if (
175
200
  self.dataset_path
176
201
  and self.explicitly_defined_metadata_document
@@ -179,13 +204,6 @@ class Datadoc:
179
204
  and extracted_metadata is not None
180
205
  and existing_metadata is not None
181
206
  ):
182
- existing_file_path = self._get_existing_file_path(extracted_metadata)
183
- self.dataset_consistency_status = self._check_dataset_consistency(
184
- self.dataset_path,
185
- Path(existing_file_path),
186
- extracted_metadata,
187
- existing_metadata,
188
- )
189
207
  self._check_ready_to_merge(
190
208
  self.dataset_consistency_status,
191
209
  errors_as_warnings=self.errors_as_warnings,
@@ -207,14 +225,14 @@ class Datadoc:
207
225
  self._set_pseudonymization_metadata(existing_pseudonymization)
208
226
 
209
227
  set_default_values_variables(self.variables)
210
- set_default_values_dataset(self.dataset)
228
+ set_default_values_dataset(cast("all_optional_model.Dataset", self.dataset))
211
229
  set_dataset_owner(self.dataset)
212
230
  self._create_variables_lookup()
213
231
  self._create_pseudo_variables_lookup()
214
232
 
215
233
  def _get_existing_file_path(
216
234
  self,
217
- extracted_metadata: model.DatadocMetadata | None,
235
+ extracted_metadata: all_optional_model.DatadocMetadata | None,
218
236
  ) -> str:
219
237
  if (
220
238
  extracted_metadata is not None
@@ -227,19 +245,19 @@ class Datadoc:
227
245
 
228
246
  def _set_metadata(
229
247
  self,
230
- merged_metadata: model.DatadocMetadata | None,
248
+ merged_metadata: OptionalDatadocMetadataType,
231
249
  ) -> None:
232
250
  if not merged_metadata or not (
233
251
  merged_metadata.dataset and merged_metadata.variables
234
252
  ):
235
253
  msg = "Could not read metadata"
236
254
  raise ValueError(msg)
237
- self.dataset = merged_metadata.dataset
255
+ self.dataset = cast("all_optional_model.Dataset", merged_metadata.dataset)
238
256
  self.variables = merged_metadata.variables
239
257
 
240
258
  def _set_pseudonymization_metadata(
241
259
  self,
242
- existing_pseudonymization: model.PseudonymizationMetadata | None,
260
+ existing_pseudonymization: ExistingPseudonymizationMetadataType,
243
261
  ) -> None:
244
262
  if not existing_pseudonymization or not (
245
263
  existing_pseudonymization.pseudo_variables is not None
@@ -247,7 +265,10 @@ class Datadoc:
247
265
  msg = "Error reading pseudonymization metadata"
248
266
  logger.error(msg)
249
267
  return
250
- self.pseudo_variables = existing_pseudonymization.pseudo_variables
268
+ self.pseudo_variables = cast(
269
+ "list[all_optional_model.PseudoVariable]",
270
+ existing_pseudonymization.pseudo_variables,
271
+ )
251
272
 
252
273
  def _create_variables_lookup(self) -> None:
253
274
  self.variables_lookup = {
@@ -264,8 +285,8 @@ class Datadoc:
264
285
  def _check_dataset_consistency(
265
286
  new_dataset_path: Path | CloudPath,
266
287
  existing_dataset_path: Path,
267
- extracted_metadata: model.DatadocMetadata,
268
- existing_metadata: model.DatadocMetadata,
288
+ extracted_metadata: all_optional_model.DatadocMetadata,
289
+ existing_metadata: OptionalDatadocMetadataType,
269
290
  ) -> list[dict[str, object]]:
270
291
  """Run consistency tests.
271
292
 
@@ -312,14 +333,16 @@ class Datadoc:
312
333
  {
313
334
  "name": "Variable names",
314
335
  "success": (
315
- {v.short_name for v in extracted_metadata.variables or []}
336
+ existing_metadata is not None
337
+ and {v.short_name for v in extracted_metadata.variables or []}
316
338
  == {v.short_name for v in existing_metadata.variables or []}
317
339
  ),
318
340
  },
319
341
  {
320
342
  "name": "Variable datatypes",
321
343
  "success": (
322
- [v.data_type for v in extracted_metadata.variables or []]
344
+ existing_metadata is not None
345
+ and [v.data_type for v in extracted_metadata.variables or []]
323
346
  == [v.data_type for v in existing_metadata.variables or []]
324
347
  ),
325
348
  },
@@ -353,27 +376,29 @@ class Datadoc:
353
376
 
354
377
  @staticmethod
355
378
  def _merge_metadata(
356
- extracted_metadata: model.DatadocMetadata | None,
357
- existing_metadata: model.DatadocMetadata | None,
358
- ) -> model.DatadocMetadata:
379
+ extracted_metadata: all_optional_model.DatadocMetadata | None,
380
+ existing_metadata: OptionalDatadocMetadataType,
381
+ ) -> all_optional_model.DatadocMetadata:
359
382
  if not existing_metadata:
360
383
  logger.warning(
361
384
  "No existing metadata found, no merge to perform. Continuing with extracted metadata.",
362
385
  )
363
- return extracted_metadata or model.DatadocMetadata()
386
+ return extracted_metadata or all_optional_model.DatadocMetadata()
364
387
 
365
388
  if not extracted_metadata:
366
- return existing_metadata
389
+ return cast("all_optional_model.DatadocMetadata", existing_metadata)
367
390
 
368
391
  # Use the extracted metadata as a base
369
- merged_metadata = model.DatadocMetadata(
392
+ merged_metadata = all_optional_model.DatadocMetadata(
370
393
  dataset=copy.deepcopy(extracted_metadata.dataset),
371
394
  variables=[],
372
395
  )
373
396
 
374
397
  override_dataset_fields(
375
398
  merged_metadata=merged_metadata,
376
- existing_metadata=existing_metadata,
399
+ existing_metadata=cast(
400
+ "all_optional_model.DatadocMetadata", existing_metadata
401
+ ),
377
402
  )
378
403
 
379
404
  # Merge variables.
@@ -387,7 +412,7 @@ class Datadoc:
387
412
  def _extract_metadata_from_existing_document(
388
413
  self,
389
414
  document: pathlib.Path | CloudPath,
390
- ) -> model.DatadocMetadata | None:
415
+ ) -> OptionalDatadocMetadataType:
391
416
  """Read metadata from an existing metadata document.
392
417
 
393
418
  If an existing metadata document is available, this method reads and
@@ -402,7 +427,13 @@ class Datadoc:
402
427
 
403
428
  Raises:
404
429
  json.JSONDecodeError: If the metadata document cannot be parsed.
430
+ pydantic.ValidationError: If the data does not successfully validate.
405
431
  """
432
+ metadata_model = (
433
+ required_model
434
+ if self.validate_required_fields_on_existing_metadata
435
+ else all_optional_model
436
+ )
406
437
  fresh_metadata = {}
407
438
  try:
408
439
  with document.open(mode="r", encoding="utf-8") as file:
@@ -412,7 +443,7 @@ class Datadoc:
412
443
  fresh_metadata,
413
444
  )
414
445
  if is_metadata_in_container_structure(fresh_metadata):
415
- self.container = model.MetadataContainer.model_validate_json(
446
+ self.container = metadata_model.MetadataContainer.model_validate_json(
416
447
  json.dumps(fresh_metadata),
417
448
  )
418
449
  datadoc_metadata = fresh_metadata["datadoc"]
@@ -420,7 +451,7 @@ class Datadoc:
420
451
  datadoc_metadata = fresh_metadata
421
452
  if datadoc_metadata is None:
422
453
  return None
423
- return model.DatadocMetadata.model_validate_json(
454
+ return metadata_model.DatadocMetadata.model_validate_json(
424
455
  json.dumps(datadoc_metadata),
425
456
  )
426
457
  except json.JSONDecodeError:
@@ -435,7 +466,11 @@ class Datadoc:
435
466
  def _extract_pseudonymization_from_existing_document(
436
467
  self,
437
468
  document: pathlib.Path | CloudPath,
438
- ) -> model.PseudonymizationMetadata | None:
469
+ ) -> (
470
+ all_optional_model.PseudonymizationMetadata
471
+ | required_model.PseudonymizationMetadata
472
+ | None
473
+ ):
439
474
  """Read pseudo metadata from an existing metadata document.
440
475
 
441
476
  If there is pseudo metadata in the document supplied, the method validates and returns the pseudonymization structure.
@@ -445,7 +480,14 @@ class Datadoc:
445
480
 
446
481
  Raises:
447
482
  json.JSONDecodeError: If the metadata document cannot be parsed.
483
+ pydantic.ValidationError: If the data does not successfully validate.
448
484
  """
485
+ metadata_model = (
486
+ required_model
487
+ if self.validate_required_fields_on_existing_metadata
488
+ else all_optional_model
489
+ )
490
+
449
491
  try:
450
492
  with document.open(mode="r", encoding="utf-8") as file:
451
493
  fresh_metadata = json.load(file)
@@ -464,7 +506,7 @@ class Datadoc:
464
506
  if pseudonymization_metadata is None:
465
507
  return None
466
508
 
467
- return model.PseudonymizationMetadata.model_validate_json(
509
+ return metadata_model.PseudonymizationMetadata.model_validate_json(
468
510
  json.dumps(pseudonymization_metadata),
469
511
  )
470
512
 
@@ -500,7 +542,7 @@ class Datadoc:
500
542
  def _extract_metadata_from_dataset(
501
543
  self,
502
544
  dataset: pathlib.Path | CloudPath,
503
- ) -> model.DatadocMetadata:
545
+ ) -> all_optional_model.DatadocMetadata:
504
546
  """Obtain what metadata we can from the dataset itself.
505
547
 
506
548
  This makes it easier for the user by 'pre-filling' certain fields.
@@ -520,9 +562,9 @@ class Datadoc:
520
562
  - variables: A list of fields extracted from the dataset schema.
521
563
  """
522
564
  dapla_dataset_path_info = DaplaDatasetPathInfo(dataset)
523
- metadata = model.DatadocMetadata()
565
+ metadata = all_optional_model.DatadocMetadata()
524
566
 
525
- metadata.dataset = model.Dataset(
567
+ metadata.dataset = all_optional_model.Dataset(
526
568
  short_name=dapla_dataset_path_info.dataset_short_name,
527
569
  dataset_state=dapla_dataset_path_info.dataset_state,
528
570
  dataset_status=DataSetStatus.DRAFT,
@@ -586,12 +628,14 @@ class Datadoc:
586
628
  if self.container:
587
629
  self.container.datadoc = datadoc
588
630
  if not self.container.pseudonymization:
589
- self.container.pseudonymization = model.PseudonymizationMetadata(
590
- pseudo_dataset=model.PseudoDataset()
631
+ self.container.pseudonymization = (
632
+ all_optional_model.PseudonymizationMetadata(
633
+ pseudo_dataset=all_optional_model.PseudoDataset()
634
+ )
591
635
  )
592
636
  self.container.pseudonymization.pseudo_variables = self.pseudo_variables
593
637
  else:
594
- self.container = model.MetadataContainer(datadoc=datadoc)
638
+ self.container = all_optional_model.MetadataContainer(datadoc=datadoc)
595
639
  if self.metadata_document:
596
640
  content = self.container.model_dump_json(indent=4)
597
641
  self.metadata_document.write_text(content)
@@ -623,12 +667,14 @@ class Datadoc:
623
667
  def add_pseudo_variable(self, variable_short_name: str) -> None:
624
668
  """Adds a new pseudo variable to the list of pseudonymized variables."""
625
669
  if self.variables_lookup[variable_short_name] is not None:
626
- pseudo_variable = model.PseudoVariable(short_name=variable_short_name)
670
+ pseudo_variable = all_optional_model.PseudoVariable(
671
+ short_name=variable_short_name
672
+ )
627
673
  self.pseudo_variables.append(pseudo_variable)
628
674
  self.pseudo_variables_lookup[variable_short_name] = pseudo_variable
629
675
 
630
676
  def get_pseudo_variable(
631
677
  self, variable_short_name: str
632
- ) -> model.PseudoVariable | None:
678
+ ) -> all_optional_model.PseudoVariable | None:
633
679
  """Finds a pseudo variable by shortname."""
634
680
  return self.pseudo_variables_lookup.get(variable_short_name)
@@ -14,7 +14,7 @@ from typing import Literal
14
14
 
15
15
  import arrow
16
16
  from cloudpathlib import GSPath
17
- from datadoc_model.model import DataSetState
17
+ from datadoc_model.all_optional.model import DataSetState
18
18
 
19
19
  if TYPE_CHECKING:
20
20
  import datetime
@@ -12,10 +12,10 @@ from abc import abstractmethod
12
12
  from typing import TYPE_CHECKING
13
13
 
14
14
  import pandas as pd
15
- from datadoc_model.model import DataType
16
- from datadoc_model.model import LanguageStringType
17
- from datadoc_model.model import LanguageStringTypeItem
18
- from datadoc_model.model import Variable
15
+ from datadoc_model.all_optional.model import DataType
16
+ from datadoc_model.all_optional.model import LanguageStringType
17
+ from datadoc_model.all_optional.model import LanguageStringTypeItem
18
+ from datadoc_model.all_optional.model import Variable
19
19
  from pyarrow import parquet as pq
20
20
 
21
21
  from dapla_metadata.datasets.utility.enums import SupportedLanguages
@@ -140,7 +140,11 @@ class StatisticSubjectMapping(GetExternalSource):
140
140
  SecondarySubject(
141
141
  self._extract_titles(s.titler),
142
142
  s["emnekode"],
143
- [statistikk["kortnavn"] for statistikk in s.find_all("Statistikk")],
143
+ [
144
+ statistikk["kortnavn"]
145
+ for statistikk in s.find_all("Statistikk")
146
+ if statistikk["isPrimaerPlassering"] == "true"
147
+ ],
144
148
  )
145
149
  for s in p.find_all("delemne")
146
150
  ]
@@ -1,7 +1,7 @@
1
1
  """Repository for constant values in Datadoc backend."""
2
2
 
3
- from datadoc_model.model import LanguageStringType
4
- from datadoc_model.model import LanguageStringTypeItem
3
+ from datadoc_model.all_optional.model import LanguageStringType
4
+ from datadoc_model.all_optional.model import LanguageStringTypeItem
5
5
 
6
6
  VALIDATION_ERROR = "Validation error: "
7
7
 
@@ -4,15 +4,19 @@ import datetime # import is needed in xdoctest
4
4
  import logging
5
5
  import pathlib
6
6
  import uuid
7
+ from typing import cast
7
8
 
9
+ import datadoc_model
10
+ import datadoc_model.all_optional.model as all_optional_model
11
+ import datadoc_model.required.model as required_model
8
12
  import google.auth
9
13
  from cloudpathlib import CloudPath
10
14
  from cloudpathlib import GSClient
11
15
  from cloudpathlib import GSPath
12
16
  from datadoc_model import model
13
- from datadoc_model.model import Assessment
14
- from datadoc_model.model import DataSetState
15
- from datadoc_model.model import VariableRole
17
+ from datadoc_model.all_optional.model import Assessment
18
+ from datadoc_model.all_optional.model import DataSetState
19
+ from datadoc_model.all_optional.model import VariableRole
16
20
 
17
21
  from dapla_metadata.dapla import user_info
18
22
  from dapla_metadata.datasets.utility.constants import (
@@ -34,6 +38,17 @@ from dapla_metadata.datasets.utility.constants import (
34
38
 
35
39
  logger = logging.getLogger(__name__)
36
40
 
41
+ DatadocMetadataType = (
42
+ all_optional_model.DatadocMetadata | required_model.DatadocMetadata
43
+ )
44
+ DatasetType = all_optional_model.Dataset | required_model.Dataset
45
+ OptionalDatadocMetadataType = DatadocMetadataType | None
46
+ ExistingPseudonymizationMetadataType = (
47
+ all_optional_model.PseudonymizationMetadata
48
+ | required_model.PseudonymizationMetadata
49
+ | None
50
+ )
51
+
37
52
 
38
53
  def get_timestamp_now() -> datetime.datetime:
39
54
  """Return a timestamp for the current moment."""
@@ -119,7 +134,9 @@ def set_default_values_variables(variables: list) -> None:
119
134
  v.variable_role = VariableRole.MEASURE
120
135
 
121
136
 
122
- def set_default_values_dataset(dataset: model.Dataset) -> None:
137
+ def set_default_values_dataset(
138
+ dataset: DatasetType,
139
+ ) -> None:
123
140
  """Set default values on dataset.
124
141
 
125
142
  Args:
@@ -140,7 +157,9 @@ def set_default_values_dataset(dataset: model.Dataset) -> None:
140
157
  dataset.contains_personal_data = False
141
158
 
142
159
 
143
- def set_dataset_owner(dataset: model.Dataset) -> None:
160
+ def set_dataset_owner(
161
+ dataset: DatasetType,
162
+ ) -> None:
144
163
  """Sets the owner of the dataset from the DAPLA_GROUP_CONTEXT enviornment variable.
145
164
 
146
165
  Args:
@@ -153,7 +172,7 @@ def set_dataset_owner(dataset: model.Dataset) -> None:
153
172
 
154
173
 
155
174
  def set_variables_inherit_from_dataset(
156
- dataset: model.Dataset,
175
+ dataset: DatasetType,
157
176
  variables: list,
158
177
  ) -> None:
159
178
  """Set specific dataset values on a list of variable objects.
@@ -283,7 +302,9 @@ def _is_missing_metadata(
283
302
  )
284
303
 
285
304
 
286
- def num_obligatory_dataset_fields_completed(dataset: model.Dataset) -> int:
305
+ def num_obligatory_dataset_fields_completed(
306
+ dataset: DatasetType,
307
+ ) -> int:
287
308
  """Count the number of completed obligatory dataset fields.
288
309
 
289
310
  This function returns the total count of obligatory fields in the dataset that
@@ -345,7 +366,9 @@ def num_obligatory_variable_fields_completed(variable: model.Variable) -> int:
345
366
  return NUM_OBLIGATORY_VARIABLES_FIELDS - len(missing_metadata)
346
367
 
347
368
 
348
- def get_missing_obligatory_dataset_fields(dataset: model.Dataset) -> list:
369
+ def get_missing_obligatory_dataset_fields(
370
+ dataset: DatasetType,
371
+ ) -> list:
349
372
  """Identify all obligatory dataset fields that are missing values.
350
373
 
351
374
  This function checks for obligatory fields that are either directly missing
@@ -422,8 +445,9 @@ def running_in_notebook() -> bool:
422
445
 
423
446
 
424
447
  def override_dataset_fields(
425
- merged_metadata: model.DatadocMetadata,
426
- existing_metadata: model.DatadocMetadata,
448
+ merged_metadata: all_optional_model.DatadocMetadata,
449
+ existing_metadata: all_optional_model.DatadocMetadata
450
+ | required_model.DatadocMetadata,
427
451
  ) -> None:
428
452
  """Overrides specific fields in the dataset of `merged_metadata` with values from the dataset of `existing_metadata`.
429
453
 
@@ -449,10 +473,10 @@ def override_dataset_fields(
449
473
 
450
474
 
451
475
  def merge_variables(
452
- existing_metadata: model.DatadocMetadata,
453
- extracted_metadata: model.DatadocMetadata,
454
- merged_metadata: model.DatadocMetadata,
455
- ) -> model.DatadocMetadata:
476
+ existing_metadata: OptionalDatadocMetadataType,
477
+ extracted_metadata: all_optional_model.DatadocMetadata,
478
+ merged_metadata: all_optional_model.DatadocMetadata,
479
+ ) -> all_optional_model.DatadocMetadata:
456
480
  """Merges variables from the extracted metadata into the existing metadata and updates the merged metadata.
457
481
 
458
482
  This function compares the variables from `extracted_metadata` with those in `existing_metadata`.
@@ -466,11 +490,12 @@ def merge_variables(
466
490
  merged_metadata: The metadata object that will contain the result of the merge.
467
491
 
468
492
  Returns:
469
- model.DatadocMetadata: The `merged_metadata` object containing variables from both `existing_metadata`
493
+ all_optional_model.DatadocMetadata: The `merged_metadata` object containing variables from both `existing_metadata`
470
494
  and `extracted_metadata`.
471
495
  """
472
496
  if (
473
- existing_metadata.variables is not None
497
+ existing_metadata is not None
498
+ and existing_metadata.variables is not None
474
499
  and extracted_metadata is not None
475
500
  and extracted_metadata.variables is not None
476
501
  and merged_metadata.variables is not None
@@ -494,7 +519,9 @@ def merge_variables(
494
519
  existing.contains_data_until = (
495
520
  extracted.contains_data_until or existing.contains_data_until
496
521
  )
497
- merged_metadata.variables.append(existing)
522
+ merged_metadata.variables.append(
523
+ cast("datadoc_model.all_optional.model.Variable", existing)
524
+ )
498
525
  else:
499
526
  # If there is no existing metadata for this variable, we just use what we have extracted
500
527
  merged_metadata.variables.append(extracted)
@@ -9,24 +9,6 @@ from dapla_metadata.variable_definitions._generated.vardef_client.configuration
9
9
 
10
10
  VARDEF_HOST_TEST = "https://metadata.intern.test.ssb.no"
11
11
  WORKSPACE_DIR = "WORKSPACE_DIR"
12
- VARDEF_DESCRIPTIONS_FILE_PATH = "VARDEF_DESCRIPTIONS_FILE_PATH"
13
- VARDEF_DEFAULT_DESCRIPTION_PATH = (
14
- "variable_definitions/resources/vardef_model_descriptions_nb.yaml"
15
- )
16
-
17
-
18
- def get_descriptions_path() -> str:
19
- """Get the relative file path from the repo root to the Norwegian descriptions.
20
-
21
- First checks the `VARDEF_DESCRIPTIONS_FILE_PATH` environment variable; if not set, returns a default path.
22
-
23
- Returns:
24
- str: The file path to the descriptions.
25
- """
26
- return (
27
- get_config_item(VARDEF_DESCRIPTIONS_FILE_PATH)
28
- or VARDEF_DEFAULT_DESCRIPTION_PATH
29
- )
30
12
 
31
13
 
32
14
  def get_workspace_dir() -> str | None:
@@ -24,8 +24,6 @@ TEMPLATE_SECTION_HEADER_MACHINE_GENERATED_EN = (
24
24
  "\n--- Machine generated fields. Do not edit ---\n"
25
25
  )
26
26
 
27
- NORWEGIAN_DESCRIPTIONS = "norwegian_description"
28
-
29
27
  DEFAULT_DATE = date(1000, 1, 1)
30
28
 
31
29
  MACHINE_GENERATED_FIELDS = [