dapla-toolbelt-metadata 0.9.2__py3-none-any.whl → 0.9.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dapla-toolbelt-metadata might be problematic. Click here for more details.

@@ -46,6 +46,7 @@ from dapla_metadata.datasets.utility.utils import (
46
46
  )
47
47
  from dapla_metadata.datasets.utility.utils import set_dataset_owner
48
48
  from dapla_metadata.datasets.utility.utils import set_default_values_dataset
49
+ from dapla_metadata.datasets.utility.utils import set_default_values_pseudonymization
49
50
  from dapla_metadata.datasets.utility.utils import set_default_values_variables
50
51
 
51
52
  if TYPE_CHECKING:
@@ -444,9 +445,12 @@ class Datadoc:
444
445
  variable_short_name: str,
445
446
  pseudonymization: all_optional_model.Pseudonymization | None = None,
446
447
  ) -> None:
447
- """Adds a new pseudo variable to the list of pseudonymized variables also sets is_personal_data to true.
448
+ """Adds a new pseudo variable to the list of pseudonymized variables.
448
449
 
449
- If there is no pseudonymization supplied an empty Pseudonymization structure will be added to the model.
450
+ If `pseudonymization` is not supplied, an empty Pseudonymization structure
451
+ will be created and assigned to the variable.
452
+ If an encryption algorithm is recognized (one of the standard Dapla algorithms), default values are filled
453
+ for any missing fields.
450
454
 
451
455
  Args:
452
456
  variable_short_name: The short name for the variable that one wants to update the pseudo for.
@@ -454,15 +458,15 @@ class Datadoc:
454
458
 
455
459
  """
456
460
  variable = self.variables_lookup[variable_short_name]
457
- variable.pseudonymization = (
458
- pseudonymization or all_optional_model.Pseudonymization()
459
- )
461
+ if pseudonymization:
462
+ set_default_values_pseudonymization(variable, pseudonymization)
463
+ else:
464
+ variable.pseudonymization = all_optional_model.Pseudonymization()
460
465
 
461
466
  def remove_pseudonymization(self, variable_short_name: str) -> None:
462
467
  """Removes a pseudo variable by using the shortname.
463
468
 
464
469
  Updates the pseudo variable lookup by creating a new one.
465
- Sets is_personal_data to non pseudonymized encrypted personal data.
466
470
 
467
471
  Args:
468
472
  variable_short_name: The short name for the variable that one wants to remove the pseudo for.
@@ -47,6 +47,12 @@ OBLIGATORY_VARIABLES_METADATA_IDENTIFIERS = [
47
47
  "temporality_type",
48
48
  ]
49
49
 
50
+ OBLIGATORY_VARIABLES_PESUODONYMIZATION_IDENTIFIERS = [
51
+ "encryption_algorithm",
52
+ "encryption_key_refrence",
53
+ ]
54
+
55
+
50
56
  OBLIGATORY_VARIABLES_METADATA_IDENTIFIERS_MULTILANGUAGE = [
51
57
  "name",
52
58
  ]
@@ -91,3 +97,11 @@ METADATA_DOCUMENT_FILE_SUFFIX = "__DOC.json"
91
97
  DATADOC_STATISTICAL_SUBJECT_SOURCE_URL = (
92
98
  "https://www.ssb.no/xp/_/service/mimir/subjectStructurStatistics"
93
99
  )
100
+
101
+ PAPIS_STABLE_IDENTIFIER_TYPE = "FREG_SNR"
102
+ PAPIS_ENCRYPTION_KEY_REFERENCE = "papis-common-key-1"
103
+ DAEAD_ENCRYPTION_KEY_REFERENCE = "ssb-common-key-1"
104
+ ENCRYPTION_PARAMETER_SNAPSHOT_DATE = "snapshotDate"
105
+ ENCRYPTION_PARAMETER_KEY_ID = "keyId"
106
+ ENCRYPTION_PARAMETER_STRATEGY = "strategy"
107
+ ENCRYPTION_PARAMETER_STRATEGY_SKIP = "skip"
@@ -14,3 +14,10 @@ class SupportedLanguages(str, Enum):
14
14
  NORSK_BOKMÅL = "nb" # noqa: PLC2401 the listed problems do not apply in this case
15
15
  NORSK_NYNORSK = "nn"
16
16
  ENGLISH = "en"
17
+
18
+
19
+ class EncryptionAlgorithm(str, Enum):
20
+ """Encryption algorithm values for pseudonymization algoprithms offered on Dapla."""
21
+
22
+ PAPIS_ENCRYPTION_ALGORITHM = "TINK-FPE"
23
+ DAEAD_ENCRYPTION_ALGORITHM = "TINK-DAEAD"
@@ -4,6 +4,7 @@ import datetime # import is needed in xdoctest
4
4
  import logging
5
5
  import pathlib
6
6
  import uuid
7
+ from typing import Any
7
8
  from typing import TypeAlias
8
9
 
9
10
  import datadoc_model.all_optional.model as all_optional_model
@@ -12,12 +13,16 @@ import google.auth
12
13
  from cloudpathlib import CloudPath
13
14
  from cloudpathlib import GSClient
14
15
  from cloudpathlib import GSPath
15
- from datadoc_model import model
16
16
  from datadoc_model.all_optional.model import Assessment
17
17
  from datadoc_model.all_optional.model import DataSetState
18
18
  from datadoc_model.all_optional.model import VariableRole
19
19
 
20
20
  from dapla_metadata.dapla import user_info
21
+ from dapla_metadata.datasets.utility.constants import DAEAD_ENCRYPTION_KEY_REFERENCE
22
+ from dapla_metadata.datasets.utility.constants import ENCRYPTION_PARAMETER_KEY_ID
23
+ from dapla_metadata.datasets.utility.constants import ENCRYPTION_PARAMETER_SNAPSHOT_DATE
24
+ from dapla_metadata.datasets.utility.constants import ENCRYPTION_PARAMETER_STRATEGY
25
+ from dapla_metadata.datasets.utility.constants import ENCRYPTION_PARAMETER_STRATEGY_SKIP
21
26
  from dapla_metadata.datasets.utility.constants import NUM_OBLIGATORY_VARIABLES_FIELDS
22
27
  from dapla_metadata.datasets.utility.constants import (
23
28
  OBLIGATORY_DATASET_METADATA_IDENTIFIERS,
@@ -31,6 +36,9 @@ from dapla_metadata.datasets.utility.constants import (
31
36
  from dapla_metadata.datasets.utility.constants import (
32
37
  OBLIGATORY_VARIABLES_METADATA_IDENTIFIERS_MULTILANGUAGE,
33
38
  )
39
+ from dapla_metadata.datasets.utility.constants import PAPIS_ENCRYPTION_KEY_REFERENCE
40
+ from dapla_metadata.datasets.utility.constants import PAPIS_STABLE_IDENTIFIER_TYPE
41
+ from dapla_metadata.datasets.utility.enums import EncryptionAlgorithm
34
42
 
35
43
  logger = logging.getLogger(__name__)
36
44
 
@@ -39,12 +47,20 @@ DatadocMetadataType: TypeAlias = (
39
47
  )
40
48
  DatasetType: TypeAlias = all_optional_model.Dataset | required_model.Dataset
41
49
  VariableType: TypeAlias = all_optional_model.Variable | required_model.Variable
50
+ PseudonymizationType: TypeAlias = (
51
+ all_optional_model.Pseudonymization | required_model.Pseudonymization
52
+ )
42
53
  VariableListType: TypeAlias = (
43
54
  list[all_optional_model.Variable] | list[required_model.Variable]
44
55
  )
45
56
  OptionalDatadocMetadataType: TypeAlias = DatadocMetadataType | None
46
57
 
47
58
 
59
+ def get_current_date() -> str:
60
+ """Return a current date as str."""
61
+ return datetime.datetime.now(tz=datetime.timezone.utc).date().isoformat()
62
+
63
+
48
64
  def get_timestamp_now() -> datetime.datetime:
49
65
  """Return a timestamp for the current moment."""
50
66
  return datetime.datetime.now(tz=datetime.timezone.utc)
@@ -109,7 +125,7 @@ def set_default_values_variables(variables: list) -> None:
109
125
  variables: A list of variable objects to set default values on.
110
126
 
111
127
  Example:
112
- >>> variables = [model.Variable(short_name="pers",id=None, is_personal_data = None), model.Variable(short_name="fnr",id='9662875c-c245-41de-b667-12ad2091a1ee', is_personal_data=True)]
128
+ >>> variables = [all_optional_model.Variable(short_name="pers",id=None, is_personal_data = None), all_optional_model.Variable(short_name="fnr",id='9662875c-c245-41de-b667-12ad2091a1ee', is_personal_data=True)]
113
129
  >>> set_default_values_variables(variables)
114
130
  >>> isinstance(variables[0].id, uuid.UUID)
115
131
  True
@@ -138,7 +154,7 @@ def set_default_values_dataset(
138
154
  dataset: The dataset object to set default values on.
139
155
 
140
156
  Example:
141
- >>> dataset = model.Dataset(id=None)
157
+ >>> dataset = all_optional_model.Dataset(id=None)
142
158
  >>> set_default_values_dataset(dataset)
143
159
  >>> dataset.id is not None
144
160
  True
@@ -176,8 +192,8 @@ def set_variables_inherit_from_dataset(
176
192
  variables: A list of variable objects to update with dataset values.
177
193
 
178
194
  Example:
179
- >>> dataset = model.Dataset(short_name='person_data_v1', id='9662875c-c245-41de-b667-12ad2091a1ee', contains_data_from="2010-09-05", contains_data_until="2022-09-05")
180
- >>> variables = [model.Variable(short_name="pers", data_source=None, temporality_type=None, contains_data_from=None, contains_data_until=None)]
195
+ >>> dataset = all_optional_model.Dataset(short_name='person_data_v1', id='9662875c-c245-41de-b667-12ad2091a1ee', contains_data_from="2010-09-05", contains_data_until="2022-09-05")
196
+ >>> variables = [all_optional_model.Variable(short_name="pers", data_source=None, temporality_type=None, contains_data_from=None, contains_data_until=None)]
181
197
  >>> set_variables_inherit_from_dataset(dataset, variables)
182
198
 
183
199
  >>> variables[0].contains_data_from == dataset.contains_data_from
@@ -323,7 +339,9 @@ def num_obligatory_variables_fields_completed(variables: list) -> int:
323
339
  return num_completed
324
340
 
325
341
 
326
- def num_obligatory_variable_fields_completed(variable: model.Variable) -> int:
342
+ def num_obligatory_variable_fields_completed(
343
+ variable: all_optional_model.Variable,
344
+ ) -> int:
327
345
  """Count the number of obligatory fields completed for one variable.
328
346
 
329
347
  This function calculates the total number of obligatory fields that have
@@ -425,3 +443,66 @@ def running_in_notebook() -> bool:
425
443
  # interpreters and will throw a NameError. Therefore we're not running
426
444
  # in Jupyter.
427
445
  return False
446
+
447
+
448
+ def _ensure_encryption_parameters(
449
+ existing: list[dict[str, Any]] | None,
450
+ required: dict[str, Any],
451
+ ) -> list[dict[str, Any]]:
452
+ """Ensure required key/value pairs exist in parameters list."""
453
+ result = list(existing or [])
454
+
455
+ # Ensure each required key is present in at least one dict
456
+ for key, value in required.items():
457
+ if not any(key in d for d in result):
458
+ result.append({key: value})
459
+
460
+ return result
461
+
462
+
463
+ def set_default_values_pseudonymization(
464
+ variable: VariableType,
465
+ pseudonymization: PseudonymizationType | None,
466
+ ) -> None:
467
+ """Populate pseudonymization fields with defaults based on the encryption algorithm.
468
+
469
+ Updates the encryption key reference and encryption parameters if they are not set,
470
+ handling both PAPIS and DAED algorithms. Leaves unknown algorithms unchanged.
471
+ """
472
+ if pseudonymization is None:
473
+ return
474
+ if variable.pseudonymization is None:
475
+ variable.pseudonymization = pseudonymization
476
+ match pseudonymization.encryption_algorithm:
477
+ case EncryptionAlgorithm.PAPIS_ENCRYPTION_ALGORITHM.value:
478
+ if not pseudonymization.encryption_key_reference:
479
+ pseudonymization.encryption_key_reference = (
480
+ PAPIS_ENCRYPTION_KEY_REFERENCE
481
+ )
482
+ base_params = {
483
+ ENCRYPTION_PARAMETER_KEY_ID: PAPIS_ENCRYPTION_KEY_REFERENCE,
484
+ ENCRYPTION_PARAMETER_STRATEGY: ENCRYPTION_PARAMETER_STRATEGY_SKIP,
485
+ }
486
+ if pseudonymization.stable_identifier_type == PAPIS_STABLE_IDENTIFIER_TYPE:
487
+ base_params[ENCRYPTION_PARAMETER_SNAPSHOT_DATE] = get_current_date()
488
+ pseudonymization.encryption_algorithm_parameters = (
489
+ _ensure_encryption_parameters(
490
+ pseudonymization.encryption_algorithm_parameters,
491
+ base_params,
492
+ )
493
+ )
494
+ case EncryptionAlgorithm.DAEAD_ENCRYPTION_ALGORITHM.value:
495
+ if not pseudonymization.encryption_key_reference:
496
+ pseudonymization.encryption_key_reference = (
497
+ DAEAD_ENCRYPTION_KEY_REFERENCE
498
+ )
499
+ pseudonymization.encryption_algorithm_parameters = (
500
+ _ensure_encryption_parameters(
501
+ pseudonymization.encryption_algorithm_parameters,
502
+ {
503
+ ENCRYPTION_PARAMETER_KEY_ID: DAEAD_ENCRYPTION_KEY_REFERENCE,
504
+ },
505
+ )
506
+ )
507
+ case _:
508
+ pass
@@ -49,6 +49,10 @@ from dapla_metadata.variable_definitions.exceptions import vardef_file_error_han
49
49
 
50
50
  logger = logging.getLogger(__name__)
51
51
 
52
+ IDENTICAL_PATCH_ERROR_MESSAGE = (
53
+ "No changes detected in supported fields. Not creating identical patch."
54
+ )
55
+
52
56
 
53
57
  class VariableDefinition(CompleteResponse):
54
58
  """A Variable Definition.
@@ -274,11 +278,14 @@ class VariableDefinition(CompleteResponse):
274
278
  Returns:
275
279
  VariableDefinition: Variable Definition with all details.
276
280
  """
281
+ new_patch = _read_file_to_model(
282
+ file_path or self.get_file_path(),
283
+ Patch,
284
+ )
285
+ if new_patch == Patch.from_dict(self.to_dict()):
286
+ raise ValueError(IDENTICAL_PATCH_ERROR_MESSAGE)
277
287
  return self.create_patch(
278
- patch=_read_file_to_model(
279
- file_path or self.get_file_path(),
280
- Patch,
281
- ),
288
+ patch=new_patch,
282
289
  valid_from=valid_from,
283
290
  )
284
291
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dapla-toolbelt-metadata
3
- Version: 0.9.2
3
+ Version: 0.9.3
4
4
  Summary: Dapla Toolbelt Metadata
5
5
  Project-URL: homepage, https://github.com/statisticsnorway/dapla-toolbelt-metadata
6
6
  Project-URL: repository, https://github.com/statisticsnorway/dapla-toolbelt-metadata
@@ -8,7 +8,7 @@ dapla_metadata/dapla/user_info.py,sha256=bENez-ICt9ySR8orYebO68Q3_2LkIW9QTL58DTc
8
8
  dapla_metadata/datasets/__init__.py,sha256=an-REJgi7N8-S1SCz-MYO_8as6fMe03WvhjRP_hWWkg,293
9
9
  dapla_metadata/datasets/_merge.py,sha256=Tk5wQz6xZGr8veUAHZb42O8HARU8ObBJ_E4afvVWdlo,12993
10
10
  dapla_metadata/datasets/code_list.py,sha256=JtCE-5Q8grAKvkn0KKjzeGhO-96O7yGsastbuoakreg,9057
11
- dapla_metadata/datasets/core.py,sha256=r_lWZPB4zETQGXyKLwr1cgDQkeIIK_id5jGMCPHHYqg,20209
11
+ dapla_metadata/datasets/core.py,sha256=p-2OJsAEWCUqBlzn0YIYkK-pAgtvMROdoxXvCyjfWYs,20434
12
12
  dapla_metadata/datasets/dapla_dataset_path_info.py,sha256=WPeV_mwKk2B9sXd14SaP-kTb1bOQ_8W2KtrqOG7sJIY,26867
13
13
  dapla_metadata/datasets/dataset_parser.py,sha256=3dtRXNy1C8SfG8zTYWdY26nV4l-dG25IC_0J5t2bYwI,8285
14
14
  dapla_metadata/datasets/model_validation.py,sha256=pGT-jqaQQY4z7jz-7UQd0BQoTWDxDWPYAnDoRC2vd_c,6818
@@ -21,9 +21,9 @@ dapla_metadata/datasets/compatibility/model_backwards_compatibility.py,sha256=W5
21
21
  dapla_metadata/datasets/external_sources/__init__.py,sha256=qvIdXwqyEmXNUCB94ZtZXRzifdW4hiXASFFPtC70f6E,83
22
22
  dapla_metadata/datasets/external_sources/external_sources.py,sha256=9eIcOIUbaodNX1w9Tj2wl4U4wUmr5kF1R0i01fKUzGs,2974
23
23
  dapla_metadata/datasets/utility/__init__.py,sha256=pp6tUcgUbo8iq9OPtFKQrTbLuI3uY7NHptwWSTpasOU,33
24
- dapla_metadata/datasets/utility/constants.py,sha256=Iq8_0GnBwR0Ua1tPstNPJHt5mtiSehDQsW8uLNtBv_4,2489
25
- dapla_metadata/datasets/utility/enums.py,sha256=SpV4xlmP1YMaJPbmX03hqRLHUOhXIk5gquTeJ8G_5OE,432
26
- dapla_metadata/datasets/utility/utils.py,sha256=f1vRy9TUbp4mtvJtXnk0Z7YTL8LZv9arcReXEm46gCg,14735
24
+ dapla_metadata/datasets/utility/constants.py,sha256=94nGISL96rHvAndjHyaQEaJXNBnPAiRJN1slUaB03gM,2933
25
+ dapla_metadata/datasets/utility/enums.py,sha256=i6dcxWya5k4LjLdGGIM_H37rRndizug3peaAgoE5UdM,652
26
+ dapla_metadata/datasets/utility/utils.py,sha256=85Ms6jEcUuQUm-RRosscDVpvA5W4TOqiOZo2LAnXjFA,18301
27
27
  dapla_metadata/standards/__init__.py,sha256=n8jnMrudLuScSdfQ4UMJorc-Ptg3Y1-ilT8zAaQnM70,179
28
28
  dapla_metadata/standards/name_validator.py,sha256=6-DQE_EKVd6UjL--EXpFcZDQtusVbSFaWaUY-CfOV2c,9184
29
29
  dapla_metadata/standards/standard_validators.py,sha256=tcCiCI76wUVtMzXA2oCgdauZc0uGgUi11FKu-t7KGwQ,3767
@@ -34,7 +34,7 @@ dapla_metadata/variable_definitions/exceptions.py,sha256=ImB81bne-h45kX9lE5hIh80
34
34
  dapla_metadata/variable_definitions/vardef.py,sha256=WUpiKfvgFGPhMdjYSFSmdlXQKAolmRgW4-t-EocddQs,13934
35
35
  dapla_metadata/variable_definitions/vardok_id.py,sha256=8T23BUHyVQr5hovTVc2E4HVY7f7e_jdi3YL1qzMQgFw,1268
36
36
  dapla_metadata/variable_definitions/vardok_vardef_id_pair.py,sha256=8MDdd2-9L30MXkoQrk7NDcueaoxdeYie-TJhgoskTzk,1389
37
- dapla_metadata/variable_definitions/variable_definition.py,sha256=uSWvSuVDh5zmSXGUb7vitiNd0VThU5DzFV3Rd6gumYE,14620
37
+ dapla_metadata/variable_definitions/variable_definition.py,sha256=tsTo_BObHgppSNf6lGbt_Vh88PypPF4pwA-vWDgFy9A,14869
38
38
  dapla_metadata/variable_definitions/_generated/.openapi-generator-ignore,sha256=x9lryVB5wtVEuKQ5GcZ94b10RgtkVXbtvWXOArO1XsM,169
39
39
  dapla_metadata/variable_definitions/_generated/README.md,sha256=Y4et1oAhZTCr7a-CZfLbIpyYnhKzpygNg-gj7qJ09Eg,7650
40
40
  dapla_metadata/variable_definitions/_generated/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -90,7 +90,7 @@ dapla_metadata/variable_definitions/_utils/constants.py,sha256=zr5FNVCEz6TM9PVEr
90
90
  dapla_metadata/variable_definitions/_utils/files.py,sha256=JbPgPNQ7iA38juMqGEdcg5OjZZUwCb6NQtPL0AEspD0,10933
91
91
  dapla_metadata/variable_definitions/_utils/template_files.py,sha256=7fcc7yEHOl5JUZ698kqj4IiikXPHBi3SrAVOk4wqQtw,3308
92
92
  dapla_metadata/variable_definitions/_utils/variable_definition_files.py,sha256=sGhcSpckR9NtYGNh2oVkiCd5SI3bbJEBhc1PA2uShs0,4701
93
- dapla_toolbelt_metadata-0.9.2.dist-info/METADATA,sha256=Rf7QxJjGzKIGtHWu9PDYwv3bKzC8YRFuEMuF-EPm0lk,4723
94
- dapla_toolbelt_metadata-0.9.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
95
- dapla_toolbelt_metadata-0.9.2.dist-info/licenses/LICENSE,sha256=np3IfD5m0ZUofn_kVzDZqliozuiO6wrktw3LRPjyEiI,1073
96
- dapla_toolbelt_metadata-0.9.2.dist-info/RECORD,,
93
+ dapla_toolbelt_metadata-0.9.3.dist-info/METADATA,sha256=aCJzU5NSK7_yY9lu5R9KnO-cE6P6o_-gc_32CS6qeKU,4723
94
+ dapla_toolbelt_metadata-0.9.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
95
+ dapla_toolbelt_metadata-0.9.3.dist-info/licenses/LICENSE,sha256=np3IfD5m0ZUofn_kVzDZqliozuiO6wrktw3LRPjyEiI,1073
96
+ dapla_toolbelt_metadata-0.9.3.dist-info/RECORD,,