dapla-toolbelt-metadata 0.7.2__tar.gz → 0.8.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dapla-toolbelt-metadata might be problematic. Click here for more details.

Files changed (92) hide show
  1. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/PKG-INFO +2 -2
  2. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/pyproject.toml +2 -2
  3. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/datasets/core.py +24 -123
  4. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/datasets/model_backwards_compatibility.py +113 -6
  5. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/datasets/utility/utils.py +4 -9
  6. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_utils/config.py +2 -2
  7. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/LICENSE +0 -0
  8. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/README.md +0 -0
  9. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/__init__.py +0 -0
  10. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/_shared/__init__.py +0 -0
  11. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/_shared/config.py +0 -0
  12. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/_shared/enums.py +0 -0
  13. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/_shared/py.typed +0 -0
  14. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/dapla/__init__.py +0 -0
  15. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/dapla/user_info.py +0 -0
  16. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/datasets/__init__.py +0 -0
  17. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/datasets/code_list.py +0 -0
  18. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/datasets/dapla_dataset_path_info.py +0 -0
  19. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/datasets/dataset_parser.py +0 -0
  20. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/datasets/external_sources/__init__.py +0 -0
  21. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/datasets/external_sources/external_sources.py +0 -0
  22. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/datasets/model_validation.py +0 -0
  23. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/datasets/py.typed +0 -0
  24. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/datasets/statistic_subject_mapping.py +0 -0
  25. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/datasets/utility/__init__.py +0 -0
  26. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/datasets/utility/constants.py +0 -0
  27. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/datasets/utility/enums.py +0 -0
  28. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/standards/__init__.py +0 -0
  29. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/standards/name_validator.py +0 -0
  30. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/standards/standard_validators.py +0 -0
  31. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/standards/utils/__init__.py +0 -0
  32. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/standards/utils/constants.py +0 -0
  33. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/__init__.py +0 -0
  34. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/.openapi-generator/FILES +0 -0
  35. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/.openapi-generator/VERSION +0 -0
  36. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/.openapi-generator-ignore +0 -0
  37. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/README.md +0 -0
  38. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/__init__.py +0 -0
  39. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/__init__.py +0 -0
  40. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api/__init__.py +0 -0
  41. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api/data_migration_api.py +0 -0
  42. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api/draft_variable_definitions_api.py +0 -0
  43. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api/patches_api.py +0 -0
  44. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api/validity_periods_api.py +0 -0
  45. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api/variable_definitions_api.py +0 -0
  46. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api_client.py +0 -0
  47. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api_response.py +0 -0
  48. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/configuration.py +0 -0
  49. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/CompleteResponse.md +0 -0
  50. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/Contact.md +0 -0
  51. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/DataMigrationApi.md +0 -0
  52. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/Draft.md +0 -0
  53. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/DraftVariableDefinitionsApi.md +0 -0
  54. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/LanguageStringType.md +0 -0
  55. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/Owner.md +0 -0
  56. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/Patch.md +0 -0
  57. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/PatchesApi.md +0 -0
  58. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/PublicApi.md +0 -0
  59. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/SupportedLanguages.md +0 -0
  60. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/UpdateDraft.md +0 -0
  61. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/ValidityPeriod.md +0 -0
  62. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/ValidityPeriodsApi.md +0 -0
  63. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/VariableDefinitionsApi.md +0 -0
  64. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/VariableStatus.md +0 -0
  65. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/exceptions.py +0 -0
  66. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/__init__.py +0 -0
  67. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/complete_response.py +0 -0
  68. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/contact.py +0 -0
  69. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/draft.py +0 -0
  70. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/get_vardok_vardef_mapping_by_id200_response.py +0 -0
  71. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/language_string_type.py +0 -0
  72. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/owner.py +0 -0
  73. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/patch.py +0 -0
  74. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/problem.py +0 -0
  75. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/update_draft.py +0 -0
  76. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/validity_period.py +0 -0
  77. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/vardok_id_response.py +0 -0
  78. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/vardok_vardef_id_pair_response.py +0 -0
  79. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/variable_status.py +0 -0
  80. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/py.typed +0 -0
  81. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_generated/vardef_client/rest.py +0 -0
  82. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_utils/__init__.py +0 -0
  83. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_utils/_client.py +0 -0
  84. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_utils/constants.py +0 -0
  85. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_utils/files.py +0 -0
  86. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_utils/template_files.py +0 -0
  87. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/_utils/variable_definition_files.py +0 -0
  88. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/exceptions.py +0 -0
  89. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/vardef.py +0 -0
  90. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/vardok_id.py +0 -0
  91. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/vardok_vardef_id_pair.py +0 -0
  92. {dapla_toolbelt_metadata-0.7.2 → dapla_toolbelt_metadata-0.8.1}/src/dapla_metadata/variable_definitions/variable_definition.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: dapla-toolbelt-metadata
3
- Version: 0.7.2
3
+ Version: 0.8.1
4
4
  Summary: Dapla Toolbelt Metadata
5
5
  License: MIT
6
6
  Author: Team Metadata
@@ -24,7 +24,7 @@ Requires-Dist: pyjwt (>=2.8.0)
24
24
  Requires-Dist: python-dotenv (>=1.0.1)
25
25
  Requires-Dist: requests (>=2.31.0)
26
26
  Requires-Dist: ruamel-yaml (>=0.18.10)
27
- Requires-Dist: ssb-datadoc-model (==6.1.0)
27
+ Requires-Dist: ssb-datadoc-model (==7.0.1)
28
28
  Requires-Dist: ssb-klass-python (>=1.0.1)
29
29
  Requires-Dist: typing-extensions (>=4.12.2)
30
30
  Project-URL: Changelog, https://github.com/statisticsnorway/dapla-toolbelt-metadata/releases
@@ -2,7 +2,7 @@
2
2
  name = "dapla-toolbelt-metadata"
3
3
  description = "Dapla Toolbelt Metadata"
4
4
  license = "MIT"
5
- version = "0.7.2"
5
+ version = "0.8.1"
6
6
  dynamic = ["classifiers"]
7
7
  readme = "README.md"
8
8
  authors = [{ name = "Team Metadata", email = "metadata@ssb.no" }]
@@ -17,7 +17,7 @@ dependencies = [
17
17
  'cloudpathlib[gs] >=0.17.0',
18
18
  'pyjwt >=2.8.0',
19
19
  'ssb-klass-python >=1.0.1',
20
- 'ssb-datadoc-model ==6.1.0',
20
+ 'ssb-datadoc-model ==7.0.1',
21
21
  'typing-extensions >=4.12.2',
22
22
  'ruamel-yaml >=0.18.10',
23
23
  'google-auth >=2.38.0',
@@ -32,7 +32,6 @@ from dapla_metadata.datasets.utility.constants import INCONSISTENCIES_MESSAGE
32
32
  from dapla_metadata.datasets.utility.constants import METADATA_DOCUMENT_FILE_SUFFIX
33
33
  from dapla_metadata.datasets.utility.constants import NUM_OBLIGATORY_DATASET_FIELDS
34
34
  from dapla_metadata.datasets.utility.constants import NUM_OBLIGATORY_VARIABLES_FIELDS
35
- from dapla_metadata.datasets.utility.utils import ExistingPseudonymizationMetadataType
36
35
  from dapla_metadata.datasets.utility.utils import OptionalDatadocMetadataType
37
36
  from dapla_metadata.datasets.utility.utils import calculate_percentage
38
37
  from dapla_metadata.datasets.utility.utils import derive_assessment_from_state
@@ -119,9 +118,7 @@ class Datadoc:
119
118
  self.dataset_path: pathlib.Path | CloudPath | None = None
120
119
  self.dataset = all_optional_model.Dataset()
121
120
  self.variables: list = []
122
- self.pseudo_variables: list[all_optional_model.PseudoVariable] = []
123
121
  self.variables_lookup: dict[str, all_optional_model.Variable] = {}
124
- self.pseudo_variables_lookup: dict[str, all_optional_model.PseudoVariable] = {}
125
122
  self.explicitly_defined_metadata_document = False
126
123
  self.dataset_consistency_status: list = []
127
124
  if metadata_document_path:
@@ -161,19 +158,12 @@ class Datadoc:
161
158
  """
162
159
  extracted_metadata: all_optional_model.DatadocMetadata | None = None
163
160
  existing_metadata: OptionalDatadocMetadataType = None
164
- existing_pseudonymization: ExistingPseudonymizationMetadataType = None
165
161
 
166
162
  if self.metadata_document and self.metadata_document.exists():
167
163
  existing_metadata = self._extract_metadata_from_existing_document(
168
164
  self.metadata_document,
169
165
  )
170
166
 
171
- existing_pseudonymization = (
172
- self._extract_pseudonymization_from_existing_document(
173
- self.metadata_document,
174
- )
175
- )
176
-
177
167
  if (
178
168
  self.dataset_path is not None
179
169
  and self.dataset == all_optional_model.Dataset()
@@ -221,14 +211,10 @@ class Datadoc:
221
211
  else:
222
212
  self._set_metadata(existing_metadata or extracted_metadata)
223
213
 
224
- if existing_pseudonymization:
225
- self._set_pseudonymization_metadata(existing_pseudonymization)
226
-
227
214
  set_default_values_variables(self.variables)
228
215
  set_default_values_dataset(cast("all_optional_model.Dataset", self.dataset))
229
216
  set_dataset_owner(self.dataset)
230
217
  self._create_variables_lookup()
231
- self._create_pseudo_variables_lookup()
232
218
 
233
219
  def _get_existing_file_path(
234
220
  self,
@@ -255,32 +241,11 @@ class Datadoc:
255
241
  self.dataset = cast("all_optional_model.Dataset", merged_metadata.dataset)
256
242
  self.variables = merged_metadata.variables
257
243
 
258
- def _set_pseudonymization_metadata(
259
- self,
260
- existing_pseudonymization: ExistingPseudonymizationMetadataType,
261
- ) -> None:
262
- if not existing_pseudonymization or not (
263
- existing_pseudonymization.pseudo_variables is not None
264
- ):
265
- msg = "Error reading pseudonymization metadata"
266
- logger.error(msg)
267
- return
268
- self.pseudo_variables = cast(
269
- "list[all_optional_model.PseudoVariable]",
270
- existing_pseudonymization.pseudo_variables,
271
- )
272
-
273
244
  def _create_variables_lookup(self) -> None:
274
245
  self.variables_lookup = {
275
246
  v.short_name: v for v in self.variables if v.short_name
276
247
  }
277
248
 
278
- def _create_pseudo_variables_lookup(self) -> None:
279
- if self.pseudo_variables:
280
- self.pseudo_variables_lookup = {
281
- v.short_name: v for v in self.pseudo_variables if v.short_name
282
- }
283
-
284
249
  @staticmethod
285
250
  def _check_dataset_consistency(
286
251
  new_dataset_path: Path | CloudPath,
@@ -463,53 +428,6 @@ class Datadoc:
463
428
  )
464
429
  return None
465
430
 
466
- def _extract_pseudonymization_from_existing_document(
467
- self,
468
- document: pathlib.Path | CloudPath,
469
- ) -> (
470
- all_optional_model.PseudonymizationMetadata
471
- | required_model.PseudonymizationMetadata
472
- | None
473
- ):
474
- """Read pseudo metadata from an existing metadata document.
475
-
476
- If there is pseudo metadata in the document supplied, the method validates and returns the pseudonymization structure.
477
-
478
- Args:
479
- document: A path to the existing metadata document.
480
-
481
- Raises:
482
- json.JSONDecodeError: If the metadata document cannot be parsed.
483
- pydantic.ValidationError: If the data does not successfully validate.
484
- """
485
- metadata_model = (
486
- required_model
487
- if self.validate_required_fields_on_existing_metadata
488
- else all_optional_model
489
- )
490
-
491
- try:
492
- with document.open(mode="r", encoding="utf-8") as file:
493
- fresh_metadata = json.load(file)
494
- except json.JSONDecodeError:
495
- logger.warning(
496
- "Could not open existing metadata file %s.",
497
- document,
498
- exc_info=True,
499
- )
500
- return None
501
-
502
- if not is_metadata_in_container_structure(fresh_metadata):
503
- return None
504
-
505
- pseudonymization_metadata = fresh_metadata.get("pseudonymization")
506
- if pseudonymization_metadata is None:
507
- return None
508
-
509
- return metadata_model.PseudonymizationMetadata.model_validate_json(
510
- json.dumps(pseudonymization_metadata),
511
- )
512
-
513
431
  def _extract_subject_field_from_path(
514
432
  self,
515
433
  dapla_dataset_path_info: DaplaDatasetPathInfo,
@@ -627,13 +545,6 @@ class Datadoc:
627
545
  )
628
546
  if self.container:
629
547
  self.container.datadoc = datadoc
630
- if not self.container.pseudonymization:
631
- self.container.pseudonymization = (
632
- all_optional_model.PseudonymizationMetadata(
633
- pseudo_dataset=all_optional_model.PseudoDataset()
634
- )
635
- )
636
- self.container.pseudonymization.pseudo_variables = self.pseudo_variables
637
548
  else:
638
549
  self.container = all_optional_model.MetadataContainer(datadoc=datadoc)
639
550
  if self.metadata_document:
@@ -664,45 +575,35 @@ class Datadoc:
664
575
  ) + num_obligatory_variables_fields_completed(self.variables)
665
576
  return calculate_percentage(num_set_fields, num_all_fields)
666
577
 
667
- def add_pseudo_variable(self, variable_short_name: str) -> None:
668
- """Adds a new pseudo variable to the list of pseudonymized variables.
578
+ def add_pseudonymization(
579
+ self,
580
+ variable_short_name: str,
581
+ pseudonymization: all_optional_model.Pseudonymization | None = None,
582
+ ) -> None:
583
+ """Adds a new pseudo variable to the list of pseudonymized variables also sets is_personal_data to true.
584
+
585
+ If there is no pseudonymization supplied an empty Pseudonymization structure will be added to the model.
586
+
587
+ Args:
588
+ variable_short_name: The short name for the variable that one wants to update the pseudo for.
589
+ pseudonymization: The updated pseudonymization.
669
590
 
670
- Sets is_personal_data to pseudonymized encrypted personal data.
671
591
  """
672
- if self.variables_lookup[variable_short_name] is not None:
673
- pseudo_variable = all_optional_model.PseudoVariable(
674
- short_name=variable_short_name
675
- )
676
- self.pseudo_variables.append(pseudo_variable)
677
- self.pseudo_variables_lookup[variable_short_name] = pseudo_variable
678
- self.variables_lookup[
679
- variable_short_name
680
- ].is_personal_data = (
681
- all_optional_model.IsPersonalData.PSEUDONYMISED_ENCRYPTED_PERSONAL_DATA
682
- )
592
+ variable = self.variables_lookup[variable_short_name]
593
+ variable.pseudonymization = (
594
+ pseudonymization or all_optional_model.Pseudonymization()
595
+ )
596
+ variable.is_personal_data = True
683
597
 
684
- def remove_pseudo_variable(self, variable_short_name: str) -> None:
598
+ def remove_pseudonymization(self, variable_short_name: str) -> None:
685
599
  """Removes a pseudo variable by using the shortname.
686
600
 
687
601
  Updates the pseudo variable lookup by creating a new one.
688
602
  Sets is_personal_data to non pseudonymized encrypted personal data.
603
+
604
+ Args:
605
+ variable_short_name: The short name for the variable that one wants to remove the pseudo for.
689
606
  """
690
- if self.pseudo_variables_lookup[variable_short_name] is not None:
691
- pseudo_variable = self.get_pseudo_variable(variable_short_name)
692
-
693
- if pseudo_variable is not None:
694
- self.pseudo_variables = [
695
- pseudo_variable
696
- for pseudo_variable in self.pseudo_variables
697
- if pseudo_variable.short_name != variable_short_name
698
- ]
699
- self._create_pseudo_variables_lookup()
700
- self.variables_lookup[
701
- variable_short_name
702
- ].is_personal_data = all_optional_model.IsPersonalData.NON_PSEUDONYMISED_ENCRYPTED_PERSONAL_DATA
703
-
704
- def get_pseudo_variable(
705
- self, variable_short_name: str
706
- ) -> all_optional_model.PseudoVariable | None:
707
- """Finds a pseudo variable by shortname."""
708
- return self.pseudo_variables_lookup.get(variable_short_name)
607
+ if self.variables_lookup[variable_short_name].pseudonymization is not None:
608
+ self.variables_lookup[variable_short_name].pseudonymization = None
609
+ self.variables_lookup[variable_short_name].is_personal_data = False
@@ -13,6 +13,7 @@ A test must also be implemented for each new version.
13
13
 
14
14
  from __future__ import annotations
15
15
 
16
+ import logging
16
17
  from collections import OrderedDict
17
18
  from dataclasses import dataclass
18
19
  from datetime import datetime
@@ -22,10 +23,13 @@ from typing import Any
22
23
 
23
24
  import arrow
24
25
 
26
+ logger = logging.getLogger(__name__)
27
+
25
28
  if TYPE_CHECKING:
26
29
  from collections.abc import Callable
27
30
 
28
31
  VERSION_FIELD_NAME = "document_version"
32
+ PSEUDONYMIZATION_KEY = "pseudonymization"
29
33
 
30
34
 
31
35
  class UnknownModelVersionError(Exception):
@@ -187,6 +191,106 @@ def _cast_to_date_type(value_to_update: str | None) -> str | None:
187
191
  )
188
192
 
189
193
 
194
+ def convert_is_personal_data(supplied_metadata: dict[str, Any]) -> None:
195
+ """Convert 'is_personal_data' values in the supplied metadata to boolean.
196
+
197
+ Iterates over variables in the supplied metadata and updates the
198
+ 'is_personal_data' field:
199
+ - Sets it to True for NON_PSEUDONYMISED_ENCRYPTED_PERSONAL_DATA and PSEUDONYMISED_ENCRYPTED_PERSONAL_DATA.
200
+ - Sets it to False for NOT_PERSONAL_DATA.
201
+
202
+ Args:
203
+ supplied_metadata: The metadata dictionary to be updated.
204
+ """
205
+ for variable in supplied_metadata["datadoc"]["variables"]:
206
+ value = variable["is_personal_data"]
207
+ if value in (
208
+ "NON_PSEUDONYMISED_ENCRYPTED_PERSONAL_DATA",
209
+ "PSEUDONYMISED_ENCRYPTED_PERSONAL_DATA",
210
+ ):
211
+ variable["is_personal_data"] = True
212
+ elif value == "NOT_PERSONAL_DATA":
213
+ variable["is_personal_data"] = False
214
+
215
+
216
+ def copy_pseudonymization_metadata(supplied_metadata: dict[str, Any]) -> None:
217
+ """Copies pseudonymization metadata from the old pseudonymization section into the corresponding variable.
218
+
219
+ For each variable in `supplied_metadata["datadoc"]["variables"]` that has a matching
220
+ `short_name` in `supplied_metadata["pseudonymization"]["pseudo_variables"]`, this
221
+ function copies the following fields into the variable's 'pseudonymization' dictionary:
222
+
223
+ - stable_identifier_type
224
+ - stable_identifier_version
225
+ - encryption_algorithm
226
+ - encryption_key_reference
227
+ - encryption_algorithm_parameters
228
+
229
+ From the pseudo_dataset the value dataset_pseudo_time is copied to each variable as pseudonymization_time.
230
+
231
+ Args:
232
+ supplied_metadata: The metadata dictionary to be updated.
233
+ """
234
+ pseudo_vars = supplied_metadata.get(PSEUDONYMIZATION_KEY, {}).get(
235
+ "pseudo_variables", []
236
+ )
237
+ pseudo_dataset = (
238
+ supplied_metadata.get(PSEUDONYMIZATION_KEY, {}).get("pseudo_dataset") or {}
239
+ )
240
+ pseudo_time = pseudo_dataset.get("dataset_pseudo_time", None)
241
+ datadoc_vars = supplied_metadata.get("datadoc", {}).get("variables", [])
242
+ pseudo_lookup = {var.get("short_name"): var for var in pseudo_vars}
243
+
244
+ for variable in datadoc_vars:
245
+ short_name = variable.get("short_name")
246
+ if short_name in pseudo_lookup:
247
+ pseudo_var = pseudo_lookup[short_name]
248
+ variable[PSEUDONYMIZATION_KEY] = variable.get(
249
+ PSEUDONYMIZATION_KEY, {}
250
+ ).copy()
251
+
252
+ for field in [
253
+ "stable_identifier_type",
254
+ "stable_identifier_version",
255
+ "encryption_algorithm",
256
+ "encryption_key_reference",
257
+ "encryption_algorithm_parameters",
258
+ ]:
259
+ variable[PSEUDONYMIZATION_KEY][field] = pseudo_var[field]
260
+ variable[PSEUDONYMIZATION_KEY]["pseudonymization_time"] = pseudo_time
261
+
262
+ else:
263
+ variable[PSEUDONYMIZATION_KEY] = None
264
+
265
+
266
+ def handle_version_4_0_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
267
+ """Handle breaking changes for version 5.0.1.
268
+
269
+ This function modifies the supplied metadata to accommodate breaking changes
270
+ introduced in version 5.0.1. Specifically, it:
271
+ - Copies pseudonymization metadata if pseudonymization is enabled.
272
+ - Converts the 'is_personal_data' fields to be a bool.
273
+ - Updates the 'document_version' field in the 'datadoc' section to "5.0.1".
274
+ - All 'pseudonymization' from the container is removed.
275
+ - It also updates the container version to 1.0.0 from 0.0.1
276
+
277
+ Args:
278
+ supplied_metadata: The metadata dictionary to be updated.
279
+
280
+ Returns:
281
+ The updated metadata dictionary.
282
+ """
283
+ if supplied_metadata.get(PSEUDONYMIZATION_KEY):
284
+ copy_pseudonymization_metadata(supplied_metadata)
285
+
286
+ convert_is_personal_data(supplied_metadata)
287
+
288
+ supplied_metadata["datadoc"]["document_version"] = "5.0.1"
289
+ _remove_element_from_model(supplied_metadata, PSEUDONYMIZATION_KEY)
290
+ supplied_metadata["document_version"] = "1.0.0"
291
+ return supplied_metadata
292
+
293
+
190
294
  def handle_version_3_3_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
191
295
  """Handle breaking changes for version 3.3.0.
192
296
 
@@ -195,17 +299,19 @@ def handle_version_3_3_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]:
195
299
  'direct_person_identifying' field from each variable in 'datadoc.variables'
196
300
  and updates the 'document_version' field to "4.0.0".
197
301
 
302
+ Version 4.0.0 used an enum for is_personal_data, however this was changed to a bool again for version 5.0.1.
303
+ We skip setting the enum here and just keep the value it has.
304
+
198
305
  Args:
199
306
  supplied_metadata: The metadata dictionary to be updated.
200
307
 
201
308
  Returns:
202
309
  The updated metadata dictionary.
203
310
  """
204
- for i in range(len(supplied_metadata["datadoc"]["variables"])):
205
- _remove_element_from_model(
206
- supplied_metadata["datadoc"]["variables"][i],
207
- "direct_person_identifying",
208
- )
311
+ for variable in supplied_metadata["datadoc"]["variables"]:
312
+ variable["is_personal_data"] = variable["direct_person_identifying"]
313
+ _remove_element_from_model(variable, "direct_person_identifying")
314
+
209
315
  supplied_metadata["datadoc"]["document_version"] = "4.0.0"
210
316
  return supplied_metadata
211
317
 
@@ -457,7 +563,8 @@ BackwardsCompatibleVersion(version="2.2.0", handler=handle_version_2_2_0)
457
563
  BackwardsCompatibleVersion(version="3.1.0", handler=handle_version_3_1_0)
458
564
  BackwardsCompatibleVersion(version="3.2.0", handler=handle_version_3_2_0)
459
565
  BackwardsCompatibleVersion(version="3.3.0", handler=handle_version_3_3_0)
460
- BackwardsCompatibleVersion(version="4.0.0", handler=handle_current_version)
566
+ BackwardsCompatibleVersion(version="4.0.0", handler=handle_version_4_0_0)
567
+ BackwardsCompatibleVersion(version="5.0.1", handler=handle_current_version)
461
568
 
462
569
 
463
570
  def upgrade_metadata(fresh_metadata: dict[str, Any]) -> dict[str, Any]:
@@ -43,11 +43,6 @@ DatadocMetadataType = (
43
43
  )
44
44
  DatasetType = all_optional_model.Dataset | required_model.Dataset
45
45
  OptionalDatadocMetadataType = DatadocMetadataType | None
46
- ExistingPseudonymizationMetadataType = (
47
- all_optional_model.PseudonymizationMetadata
48
- | required_model.PseudonymizationMetadata
49
- | None
50
- )
51
46
 
52
47
 
53
48
  def get_timestamp_now() -> datetime.datetime:
@@ -114,22 +109,22 @@ def set_default_values_variables(variables: list) -> None:
114
109
  variables: A list of variable objects to set default values on.
115
110
 
116
111
  Example:
117
- >>> variables = [model.Variable(short_name="pers",id=None, is_personal_data = None), model.Variable(short_name="fnr",id='9662875c-c245-41de-b667-12ad2091a1ee', is_personal_data='PSEUDONYMISED_ENCRYPTED_PERSONAL_DATA')]
112
+ >>> variables = [model.Variable(short_name="pers",id=None, is_personal_data = None), model.Variable(short_name="fnr",id='9662875c-c245-41de-b667-12ad2091a1ee', is_personal_data=True)]
118
113
  >>> set_default_values_variables(variables)
119
114
  >>> isinstance(variables[0].id, uuid.UUID)
120
115
  True
121
116
 
122
- >>> variables[1].is_personal_data == 'PSEUDONYMISED_ENCRYPTED_PERSONAL_DATA'
117
+ >>> variables[1].is_personal_data == True
123
118
  True
124
119
 
125
- >>> variables[0].is_personal_data == 'NOT_PERSONAL_DATA'
120
+ >>> variables[0].is_personal_data == False
126
121
  True
127
122
  """
128
123
  for v in variables:
129
124
  if v.id is None:
130
125
  v.id = uuid.uuid4()
131
126
  if v.is_personal_data is None:
132
- v.is_personal_data = model.IsPersonalData.NOT_PERSONAL_DATA
127
+ v.is_personal_data = False
133
128
  if v.variable_role is None:
134
129
  v.variable_role = VariableRole.MEASURE
135
130
 
@@ -7,6 +7,7 @@ from dapla_metadata.variable_definitions._generated.vardef_client.configuration
7
7
  Configuration,
8
8
  )
9
9
 
10
+ VARDEF_HOST_PROD = "https://metadata.intern.ssb.no"
10
11
  VARDEF_HOST_TEST = "https://metadata.intern.test.ssb.no"
11
12
  WORKSPACE_DIR = "WORKSPACE_DIR"
12
13
 
@@ -36,8 +37,7 @@ def get_vardef_host() -> str:
36
37
  """
37
38
  match get_dapla_environment():
38
39
  case DaplaEnvironment.PROD:
39
- msg = "Vardef is not available in prod."
40
- raise NotImplementedError(msg)
40
+ return VARDEF_HOST_PROD
41
41
  case DaplaEnvironment.TEST:
42
42
  return VARDEF_HOST_TEST
43
43
  case DaplaEnvironment.DEV: