dapla-toolbelt-metadata 0.9.1__tar.gz → 0.9.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dapla-toolbelt-metadata might be problematic. Click here for more details.

Files changed (239) hide show
  1. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/PKG-INFO +1 -1
  2. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/pyproject.toml +1 -1
  3. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/__init__.py +1 -1
  4. dapla_toolbelt_metadata-0.9.3/src/dapla_metadata/datasets/_merge.py +333 -0
  5. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/core.py +33 -176
  6. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/utility/constants.py +19 -4
  7. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/utility/enums.py +7 -0
  8. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/utility/utils.py +79 -83
  9. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/variable_definition.py +11 -4
  10. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/conftest.py +1 -0
  11. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/compatibility/v6_0_0/person_data_v1__DOC.json +2 -2
  12. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/test_datadoc_metadata.py +1 -428
  13. dapla_toolbelt_metadata-0.9.3/tests/datasets/test_dataset_consistency.py +229 -0
  14. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/test_model_backwards_compatibility.py +23 -0
  15. dapla_toolbelt_metadata-0.9.3/tests/datasets/test_pseudonymization.py +534 -0
  16. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/test_utils.py +7 -0
  17. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/test_validators.py +2 -3
  18. dapla_toolbelt_metadata-0.9.3/tests/variable_definitions/resources/variable_definition_editing_files/variable_definition_var_test_wypvb3wd_2025-09-18T14-19-28.yaml +62 -0
  19. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/test_variable_definition.py +22 -0
  20. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/uv.lock +335 -309
  21. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.cookiecutter.json +0 -0
  22. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.cruft.json +0 -0
  23. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.editorconfig +0 -0
  24. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.gitattributes +0 -0
  25. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  26. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  27. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.github/dependabot.yml +0 -0
  28. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.github/labels.yml +0 -0
  29. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.github/release-drafter.yml +0 -0
  30. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.github/workflows/constraints.txt +0 -0
  31. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.github/workflows/docs.yml +0 -0
  32. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.github/workflows/labeler.yml +0 -0
  33. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.github/workflows/release.yml +0 -0
  34. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.github/workflows/tests.yml +0 -0
  35. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.gitignore +0 -0
  36. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.pre-commit-config.yaml +0 -0
  37. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.vscode/settings.json +0 -0
  38. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/CODEOWNERS +0 -0
  39. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/CODE_OF_CONDUCT.md +0 -0
  40. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/CONTRIBUTING.md +0 -0
  41. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/LICENSE +0 -0
  42. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/Makefile +0 -0
  43. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/README.md +0 -0
  44. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/SECURITY.md +0 -0
  45. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/backstage.yaml +0 -0
  46. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/bin/__init__.py +0 -0
  47. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/bin/openapi_generate_post_process.sh +0 -0
  48. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/bin/upgrade_metadata_file.py +0 -0
  49. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/datasets/create_metadata_for_new_dataset.ipynb +0 -0
  50. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/datasets/create_metadata_for_new_period.ipynb +0 -0
  51. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/datasets/resources/sykefratot/klargjorte_data/person_testdata_p2021_v1__DOC.json +0 -0
  52. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/standards/navnestandard.ipynb +0 -0
  53. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/variable_definitions/les_variabeldefinisjon.ipynb +0 -0
  54. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/variable_definitions/les_variabeldefinisjoner.ipynb +0 -0
  55. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/variable_definitions/migrer_variabeldefinisjoner.ipynb +0 -0
  56. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/variable_definitions/ny_gyldighetsperiode_variabeldefinisjon.ipynb +0 -0
  57. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/variable_definitions/ny_variabeldefinisjon.ipynb +0 -0
  58. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/variable_definitions/publiser_variabeldefinisjon_eksternt.ipynb +0 -0
  59. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/variable_definitions/publiser_variabeldefinisjon_internt.ipynb +0 -0
  60. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/variable_definitions/rediger_publisert_variabeldefinisjon.ipynb +0 -0
  61. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/variable_definitions/rediger_utkast_variabeldefinisjon.ipynb +0 -0
  62. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/variable_definitions/reserver_kortnavn.ipynb +0 -0
  63. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/variable_definitions/sjekk_kortnavn_variabeldefinisjon.ipynb +0 -0
  64. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/variable_definitions/sjekk_migrerte_variabeldefinisjoner.ipynb +0 -0
  65. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/variable_definitions/slett_utkast_variabeldefinisjon.ipynb +0 -0
  66. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/docs/Makefile +0 -0
  67. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/docs/codeofconduct.md +0 -0
  68. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/docs/conf.py +0 -0
  69. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/docs/contributing.md +0 -0
  70. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/docs/index.md +0 -0
  71. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/docs/license.md +0 -0
  72. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/docs/make.bat +0 -0
  73. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/docs/reference.md +0 -0
  74. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/docs/templates/package.rst_t +0 -0
  75. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/docs/vardef_client.md +0 -0
  76. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/noxfile.py +0 -0
  77. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/schemas/jsonschema/complete-response-schema.json +0 -0
  78. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/sonar-project.properties +0 -0
  79. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/__init__.py +0 -0
  80. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/_shared/__init__.py +0 -0
  81. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/_shared/config.py +0 -0
  82. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/_shared/enums.py +0 -0
  83. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/_shared/py.typed +0 -0
  84. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/dapla/__init__.py +0 -0
  85. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/dapla/user_info.py +0 -0
  86. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/code_list.py +0 -0
  87. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/compatibility/__init__.py +0 -0
  88. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/compatibility/_handlers.py +0 -0
  89. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/compatibility/_utils.py +0 -0
  90. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/compatibility/model_backwards_compatibility.py +0 -0
  91. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/dapla_dataset_path_info.py +0 -0
  92. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/dataset_parser.py +0 -0
  93. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/external_sources/__init__.py +0 -0
  94. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/external_sources/external_sources.py +0 -0
  95. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/model_validation.py +0 -0
  96. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/py.typed +0 -0
  97. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/statistic_subject_mapping.py +0 -0
  98. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/utility/__init__.py +0 -0
  99. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/standards/__init__.py +0 -0
  100. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/standards/name_validator.py +0 -0
  101. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/standards/standard_validators.py +0 -0
  102. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/standards/utils/__init__.py +0 -0
  103. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/standards/utils/constants.py +0 -0
  104. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/__init__.py +0 -0
  105. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/.openapi-generator/FILES +0 -0
  106. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/.openapi-generator/VERSION +0 -0
  107. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/.openapi-generator-ignore +0 -0
  108. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/README.md +0 -0
  109. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/__init__.py +0 -0
  110. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/__init__.py +0 -0
  111. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api/__init__.py +0 -0
  112. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api/data_migration_api.py +0 -0
  113. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api/draft_variable_definitions_api.py +0 -0
  114. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api/patches_api.py +0 -0
  115. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api/validity_periods_api.py +0 -0
  116. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api/variable_definitions_api.py +0 -0
  117. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api_client.py +0 -0
  118. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api_response.py +0 -0
  119. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/configuration.py +0 -0
  120. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/CompleteResponse.md +0 -0
  121. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/Contact.md +0 -0
  122. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/DataMigrationApi.md +0 -0
  123. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/Draft.md +0 -0
  124. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/DraftVariableDefinitionsApi.md +0 -0
  125. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/LanguageStringType.md +0 -0
  126. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/Owner.md +0 -0
  127. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/Patch.md +0 -0
  128. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/PatchesApi.md +0 -0
  129. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/PublicApi.md +0 -0
  130. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/SupportedLanguages.md +0 -0
  131. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/UpdateDraft.md +0 -0
  132. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/ValidityPeriod.md +0 -0
  133. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/ValidityPeriodsApi.md +0 -0
  134. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/VariableDefinitionsApi.md +0 -0
  135. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/VariableStatus.md +0 -0
  136. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/exceptions.py +0 -0
  137. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/__init__.py +0 -0
  138. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/complete_response.py +0 -0
  139. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/contact.py +0 -0
  140. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/draft.py +0 -0
  141. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/get_vardok_vardef_mapping_by_id200_response.py +0 -0
  142. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/language_string_type.py +0 -0
  143. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/owner.py +0 -0
  144. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/patch.py +0 -0
  145. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/problem.py +0 -0
  146. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/update_draft.py +0 -0
  147. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/validity_period.py +0 -0
  148. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/vardok_id_response.py +0 -0
  149. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/vardok_vardef_id_pair_response.py +0 -0
  150. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/variable_status.py +0 -0
  151. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/py.typed +0 -0
  152. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/rest.py +0 -0
  153. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_utils/__init__.py +0 -0
  154. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_utils/_client.py +0 -0
  155. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_utils/config.py +0 -0
  156. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_utils/constants.py +0 -0
  157. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_utils/files.py +0 -0
  158. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_utils/template_files.py +0 -0
  159. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_utils/variable_definition_files.py +0 -0
  160. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/exceptions.py +0 -0
  161. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/vardef.py +0 -0
  162. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/vardok_id.py +0 -0
  163. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/vardok_vardef_id_pair.py +0 -0
  164. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/__init__.py +0 -0
  165. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/conftest.py +0 -0
  166. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/dapla/__init__.py +0 -0
  167. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/dapla/test_user_info.py +0 -0
  168. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/__init__.py +0 -0
  169. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/constants.py +0 -0
  170. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/code_list/code_list.csv +0 -0
  171. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/code_list/code_list_en.csv +0 -0
  172. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/code_list/code_list_nb.csv +0 -0
  173. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/code_list/code_list_nn.csv +0 -0
  174. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/code_list/empty.csv +0 -0
  175. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/code_list/no_code.csv +0 -0
  176. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/datasets/fewer_variables_p2021-12-31_p2021-12-31_v1.parquet +0 -0
  177. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/datasets/ifpn/klargjorte_data/person_testdata_p2021-12-31_p2021-12-31_v1.parquet +0 -0
  178. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/datasets/person_data.csv +0 -0
  179. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/datasets/person_data_v1.parquet +0 -0
  180. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/datasets/person_data_v1.parquet.gzip +0 -0
  181. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/datasets/person_testdata_p2021-12-31_p2021-12-31_v1.parquet +0 -0
  182. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/datasets/sasdata.sas7bdat +0 -0
  183. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/compatibility/README.md +0 -0
  184. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/compatibility/v0_1_1/person_data_v1__DOC.json +0 -0
  185. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/compatibility/v1_0_0/person_data_v1__DOC.json +0 -0
  186. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/compatibility/v2_1_0/person_data_v1__DOC.json +0 -0
  187. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/compatibility/v2_2_0/person_data_v1__DOC.json +0 -0
  188. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/compatibility/v3_1_0/person_data_v1__DOC.json +0 -0
  189. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/compatibility/v3_2_0/person_data_v1__DOC.json +0 -0
  190. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/compatibility/v3_3_0/person_data_v1__DOC.json +0 -0
  191. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/compatibility/v4_0_0/person_data_v1__DOC.json +0 -0
  192. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/compatibility/v5_0_1/person_data_v1__DOC.json +0 -0
  193. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/fewer_variables_p2020-12-31_p2020-12-31_v1__DOC.json +0 -0
  194. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/invalid_id_field/person_data_v1__DOC.json +0 -0
  195. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/person_data_v1__DOC.json +0 -0
  196. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/person_testdata_p2020-12-31_p2020-12-31_v1__DOC.json +0 -0
  197. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/pseudo/dataset_and_pseudo/no_pseudo_v4_0_0_person_data_v1__DOC.json +0 -0
  198. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/pseudo/dataset_and_pseudo/person_data_v1__DOC.json +0 -0
  199. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/pseudo/dataset_and_pseudo/v4_0_0_person_data_v1__DOC.json +0 -0
  200. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/pseudo/pseudo/person_data_v1__DOC.json +0 -0
  201. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/pseudo/pseudo_missing_variables/person_data_v1__DOC.json +0 -0
  202. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/valid_id_field/person_data_v1__DOC.json +0 -0
  203. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/valid_variable_id_field/person_data_v1__DOC.json +0 -0
  204. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/klargjorte_data/person_testdata_p2021-12-31_p2021-12-31_v1.parquet +0 -0
  205. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/klargjorte_data/person_testdata_p2021-12-31_p2021-12-31_v1__DOC.json +0 -0
  206. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/statistical_subject_structure/empty.xml +0 -0
  207. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/statistical_subject_structure/extract_secondary_subject.xml +0 -0
  208. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/statistical_subject_structure/missing_language.xml +0 -0
  209. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/statistical_subject_structure/simple.xml +0 -0
  210. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/test_code_list.py +0 -0
  211. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/test_dapla_dataset_path_info.py +0 -0
  212. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/test_dataset_parser.py +0 -0
  213. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/test_statistic_subject_mapping.py +0 -0
  214. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/standards/__init__.py +0 -0
  215. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/standards/conftest.py +0 -0
  216. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/standards/test_check_naming_standard.py +0 -0
  217. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/utils/__init__.py +0 -0
  218. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/utils/constants.py +0 -0
  219. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/utils/microcks_testcontainer.py +0 -0
  220. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/__init__.py +0 -0
  221. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/conftest.py +0 -0
  222. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/constants.py +0 -0
  223. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/resources/openapi/variable-definitions-internal.yml +0 -0
  224. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/resources/variable_definition_editing_files/classification_reference.yaml +0 -0
  225. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/resources/variable_definition_editing_files/date_and_definition.yaml +0 -0
  226. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/resources/variable_definition_editing_files/variable_definition_landbak_wypvb3wd_2025-05-02T10-06-20.yaml +0 -0
  227. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/test_config.py +0 -0
  228. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/test_template_files.py +0 -0
  229. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/test_vardef.py +0 -0
  230. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/test_variable_definition_files.py +0 -0
  231. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/vardef_client/__init__.py +0 -0
  232. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/vardef_client/api/__init__.py +0 -0
  233. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/vardef_client/api/test_happy_path.py +0 -0
  234. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/vardef_client/models/__init__.py +0 -0
  235. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/vardef_client/models/test_models_instantiation.py +0 -0
  236. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/vardef_client/test_exceptions.py +0 -0
  237. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/vardef-maintenance/README.md +0 -0
  238. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/vardef-maintenance/vardef-maintenance.toml +0 -0
  239. {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/vardef-maintenance/vardef_client_init.sh +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dapla-toolbelt-metadata
3
- Version: 0.9.1
3
+ Version: 0.9.3
4
4
  Summary: Dapla Toolbelt Metadata
5
5
  Project-URL: homepage, https://github.com/statisticsnorway/dapla-toolbelt-metadata
6
6
  Project-URL: repository, https://github.com/statisticsnorway/dapla-toolbelt-metadata
@@ -2,7 +2,7 @@
2
2
  name = "dapla-toolbelt-metadata"
3
3
  description = "Dapla Toolbelt Metadata"
4
4
  license = "MIT"
5
- version = "0.9.1"
5
+ version = "0.9.3"
6
6
  classifiers = ["Development Status :: 4 - Beta"]
7
7
  readme = "README.md"
8
8
  authors = [{ name = "Statistics Norway", email = "metadata@ssb.no" }]
@@ -1,6 +1,6 @@
1
1
  """Document dataset."""
2
2
 
3
- from datadoc_model import model
3
+ from datadoc_model.all_optional import model
4
4
 
5
5
  from .core import Datadoc
6
6
  from .dapla_dataset_path_info import DaplaDatasetPathInfo
@@ -0,0 +1,333 @@
1
+ """Code relating to merging metadata from an existing metadata document and metadata extracted from a new dataset.
2
+
3
+ This is primarily convenience functionality for users whereby they can programmatically generate metadata without
4
+ having to manually enter it. This is primarily useful when data is sharded by time (i.e. each dataset applies for
5
+ a particular period like a month or a year). Assuming there aren't structural changes, the metadata may be reused
6
+ for all periods.
7
+
8
+ It is important to be able to detect changes in the structure of the data and warn users about this so that they can
9
+ make changes as appropriate.
10
+ """
11
+
12
+ import copy
13
+ import logging
14
+ import warnings
15
+ from collections.abc import Iterable
16
+ from dataclasses import dataclass
17
+ from dataclasses import field
18
+ from pathlib import Path
19
+ from typing import cast
20
+
21
+ import datadoc_model
22
+ import datadoc_model.all_optional.model as all_optional_model
23
+ import datadoc_model.required.model as required_model
24
+ from cloudpathlib import CloudPath
25
+
26
+ from dapla_metadata.datasets.dapla_dataset_path_info import DaplaDatasetPathInfo
27
+ from dapla_metadata.datasets.utility.constants import (
28
+ DATASET_FIELDS_FROM_EXISTING_METADATA,
29
+ )
30
+ from dapla_metadata.datasets.utility.constants import INCONSISTENCIES_MESSAGE
31
+ from dapla_metadata.datasets.utility.utils import OptionalDatadocMetadataType
32
+ from dapla_metadata.datasets.utility.utils import VariableListType
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+ BUCKET_NAME_MESSAGE = "Bucket name"
37
+ DATA_PRODUCT_NAME_MESSAGE = "Data product name"
38
+ DATASET_STATE_MESSAGE = "Dataset state"
39
+ DATASET_SHORT_NAME_MESSAGE = "Dataset short name"
40
+ VARIABLES_ADDITIONAL_MESSAGE = (
41
+ "Dataset has additional variables than defined in metadata"
42
+ )
43
+ VARIABLE_RENAME_MESSAGE = "Variables have been renamed in the dataset"
44
+ VARIABLE_ORDER_MESSAGE = "The order of variables in the dataset has changed"
45
+ VARIABLE_DATATYPES_MESSAGE = "Variable datatypes differ"
46
+ VARIABLES_FEWER_MESSAGE = "Dataset has fewer variables than defined in metadata"
47
+
48
+
49
+ class InconsistentDatasetsWarning(UserWarning):
50
+ """Existing and new datasets differ significantly from one another."""
51
+
52
+
53
+ class InconsistentDatasetsError(ValueError):
54
+ """Existing and new datasets differ significantly from one another."""
55
+
56
+
57
+ @dataclass
58
+ class DatasetConsistencyStatus:
59
+ """Store the status for different aspects of dataset consistency.
60
+
61
+ Attributes:
62
+ message: Communicates to the user what aspect is inconsistent.
63
+ success: False if inconsistency is detected.
64
+ variables: Optionally communicate which variables are affected.
65
+ """
66
+
67
+ message: str
68
+ success: bool
69
+ variables: Iterable[str] = field(default_factory=list)
70
+
71
+ def __str__(self) -> str:
72
+ """Format the user message."""
73
+ message = self.message
74
+ if self.variables:
75
+ message += f"\n\tVariables: {self.variables}"
76
+ return message
77
+
78
+
79
+ def check_dataset_consistency(
80
+ new_dataset_path: Path | CloudPath,
81
+ existing_dataset_path: Path,
82
+ ) -> list[DatasetConsistencyStatus]:
83
+ """Run consistency tests.
84
+
85
+ Args:
86
+ new_dataset_path: Path to the dataset to be documented.
87
+ existing_dataset_path: Path stored in the existing metadata.
88
+
89
+ Returns:
90
+ List of consistency check results.
91
+ """
92
+ new_dataset_path_info = DaplaDatasetPathInfo(new_dataset_path)
93
+ existing_dataset_path_info = DaplaDatasetPathInfo(existing_dataset_path)
94
+ return [
95
+ DatasetConsistencyStatus(
96
+ message=BUCKET_NAME_MESSAGE,
97
+ success=(
98
+ new_dataset_path_info.bucket_name
99
+ == existing_dataset_path_info.bucket_name
100
+ ),
101
+ ),
102
+ DatasetConsistencyStatus(
103
+ message=DATA_PRODUCT_NAME_MESSAGE,
104
+ success=(
105
+ new_dataset_path_info.statistic_short_name
106
+ == existing_dataset_path_info.statistic_short_name
107
+ ),
108
+ ),
109
+ DatasetConsistencyStatus(
110
+ message=DATASET_STATE_MESSAGE,
111
+ success=(
112
+ new_dataset_path_info.dataset_state
113
+ == existing_dataset_path_info.dataset_state
114
+ ),
115
+ ),
116
+ DatasetConsistencyStatus(
117
+ message=DATASET_SHORT_NAME_MESSAGE,
118
+ success=(
119
+ new_dataset_path_info.dataset_short_name
120
+ == existing_dataset_path_info.dataset_short_name
121
+ ),
122
+ ),
123
+ ]
124
+
125
+
126
+ def check_variables_consistency(
127
+ extracted_variables: VariableListType,
128
+ existing_variables: VariableListType,
129
+ ) -> list[DatasetConsistencyStatus]:
130
+ """Check for consistency in variables structure.
131
+
132
+ Compares the existing metadata and that extracted from the new dataset and provides
133
+ highly detailed feedback on what is different between them.
134
+
135
+ We don't return all the results because that could create conflicting messages and false positives.
136
+
137
+ Args:
138
+ extracted_variables (VariableListType): Variables extracted from the new dataset.
139
+ existing_variables (VariableListType): Variables already documented in existing metadata
140
+
141
+ Returns:
142
+ list[DatasetConsistencyStatus]: The list of checks and whether they were successful.
143
+ """
144
+ extracted_names_set = {v.short_name or "" for v in extracted_variables}
145
+ existing_names_set = {v.short_name or "" for v in existing_variables}
146
+ same_length = len(extracted_variables) == len(existing_variables)
147
+ more_extracted_variables = extracted_names_set.difference(existing_names_set)
148
+ fewer_extracted_variables = existing_names_set.difference(extracted_names_set)
149
+ results = []
150
+ if same_length:
151
+ if more_extracted_variables:
152
+ results.append(
153
+ DatasetConsistencyStatus(
154
+ message=VARIABLE_RENAME_MESSAGE,
155
+ variables=more_extracted_variables,
156
+ success=not bool(more_extracted_variables),
157
+ )
158
+ )
159
+ else:
160
+ results.append(
161
+ DatasetConsistencyStatus(
162
+ message=VARIABLE_ORDER_MESSAGE,
163
+ success=[v.short_name or "" for v in extracted_variables]
164
+ == [v.short_name or "" for v in existing_variables],
165
+ )
166
+ )
167
+ results.append(
168
+ DatasetConsistencyStatus(
169
+ message=VARIABLE_DATATYPES_MESSAGE,
170
+ success=[v.data_type for v in extracted_variables]
171
+ == [v.data_type for v in existing_variables],
172
+ )
173
+ )
174
+ else:
175
+ results.extend(
176
+ [
177
+ DatasetConsistencyStatus(
178
+ message=VARIABLES_ADDITIONAL_MESSAGE,
179
+ variables=more_extracted_variables,
180
+ success=not bool(more_extracted_variables),
181
+ ),
182
+ DatasetConsistencyStatus(
183
+ message=VARIABLES_FEWER_MESSAGE,
184
+ variables=fewer_extracted_variables,
185
+ success=not bool(fewer_extracted_variables),
186
+ ),
187
+ ]
188
+ )
189
+ return results
190
+
191
+
192
+ def check_ready_to_merge(
193
+ results: list[DatasetConsistencyStatus], *, errors_as_warnings: bool
194
+ ) -> None:
195
+ """Check if the datasets are consistent enough to make a successful merge of metadata.
196
+
197
+ Args:
198
+ results: List if dict with property name and boolean success flag
199
+ errors_as_warnings: True if failing checks should be raised as warnings, not errors.
200
+
201
+ Raises:
202
+ InconsistentDatasetsError: If inconsistencies are found and `errors_as_warnings == False`
203
+ """
204
+ if failures := [result for result in results if not result.success]:
205
+ messages_list = "\n - ".join(str(f) for f in failures)
206
+ msg = f"{INCONSISTENCIES_MESSAGE}\n - {messages_list}"
207
+ if errors_as_warnings:
208
+ warnings.warn(
209
+ message=msg,
210
+ category=InconsistentDatasetsWarning,
211
+ stacklevel=2,
212
+ )
213
+ else:
214
+ raise InconsistentDatasetsError(
215
+ msg,
216
+ )
217
+
218
+
219
+ def override_dataset_fields(
220
+ merged_metadata: all_optional_model.DatadocMetadata,
221
+ existing_metadata: all_optional_model.DatadocMetadata
222
+ | required_model.DatadocMetadata,
223
+ ) -> None:
224
+ """Overrides specific fields in the dataset of `merged_metadata` with values from the dataset of `existing_metadata`.
225
+
226
+ This function iterates over a predefined list of fields, `DATASET_FIELDS_FROM_EXISTING_METADATA`,
227
+ and sets the corresponding fields in the `merged_metadata.dataset` object to the values
228
+ from the `existing_metadata.dataset` object.
229
+
230
+ Args:
231
+ merged_metadata: An instance of `DatadocMetadata` containing the dataset to be updated.
232
+ existing_metadata: An instance of `DatadocMetadata` containing the dataset whose values are used to update `merged_metadata.dataset`.
233
+
234
+ Returns:
235
+ `None`.
236
+ """
237
+ if merged_metadata.dataset and existing_metadata.dataset:
238
+ # Override the fields as defined
239
+ for field in DATASET_FIELDS_FROM_EXISTING_METADATA:
240
+ setattr(
241
+ merged_metadata.dataset,
242
+ field,
243
+ getattr(existing_metadata.dataset, field),
244
+ )
245
+
246
+
247
+ def merge_variables(
248
+ existing_metadata: OptionalDatadocMetadataType,
249
+ extracted_metadata: all_optional_model.DatadocMetadata,
250
+ merged_metadata: all_optional_model.DatadocMetadata,
251
+ ) -> all_optional_model.DatadocMetadata:
252
+ """Merges variables from the extracted metadata into the existing metadata and updates the merged metadata.
253
+
254
+ This function compares the variables from `extracted_metadata` with those in `existing_metadata`.
255
+ For each variable in `extracted_metadata`, it checks if a variable with the same `short_name` exists
256
+ in `existing_metadata`. If a match is found, it updates the existing variable with information from
257
+ `extracted_metadata`. If no match is found, the variable from `extracted_metadata` is directly added to `merged_metadata`.
258
+
259
+ Args:
260
+ existing_metadata: The metadata object containing the current state of variables.
261
+ extracted_metadata: The metadata object containing new or updated variables to merge.
262
+ merged_metadata: The metadata object that will contain the result of the merge.
263
+
264
+ Returns:
265
+ all_optional_model.DatadocMetadata: The `merged_metadata` object containing variables from both `existing_metadata`
266
+ and `extracted_metadata`.
267
+ """
268
+ if (
269
+ existing_metadata is not None
270
+ and existing_metadata.variables is not None
271
+ and extracted_metadata is not None
272
+ and extracted_metadata.variables is not None
273
+ and merged_metadata.variables is not None
274
+ ):
275
+ for extracted in extracted_metadata.variables:
276
+ existing = next(
277
+ (
278
+ existing
279
+ for existing in existing_metadata.variables
280
+ if existing.short_name == extracted.short_name
281
+ ),
282
+ None,
283
+ )
284
+ if existing:
285
+ existing.id = (
286
+ None # Set to None so that it will be set assigned a fresh ID later
287
+ )
288
+ existing.contains_data_from = (
289
+ extracted.contains_data_from or existing.contains_data_from
290
+ )
291
+ existing.contains_data_until = (
292
+ extracted.contains_data_until or existing.contains_data_until
293
+ )
294
+ merged_metadata.variables.append(
295
+ cast("datadoc_model.all_optional.model.Variable", existing)
296
+ )
297
+ else:
298
+ # If there is no existing metadata for this variable, we just use what we have extracted
299
+ merged_metadata.variables.append(extracted)
300
+ return merged_metadata
301
+
302
+
303
+ def merge_metadata(
304
+ extracted_metadata: all_optional_model.DatadocMetadata | None,
305
+ existing_metadata: OptionalDatadocMetadataType,
306
+ ) -> all_optional_model.DatadocMetadata:
307
+ if not existing_metadata:
308
+ logger.warning(
309
+ "No existing metadata found, no merge to perform. Continuing with extracted metadata.",
310
+ )
311
+ return extracted_metadata or all_optional_model.DatadocMetadata()
312
+
313
+ if not extracted_metadata:
314
+ return cast("all_optional_model.DatadocMetadata", existing_metadata)
315
+
316
+ # Use the extracted metadata as a base
317
+ merged_metadata = all_optional_model.DatadocMetadata(
318
+ dataset=copy.deepcopy(extracted_metadata.dataset),
319
+ variables=[],
320
+ )
321
+
322
+ override_dataset_fields(
323
+ merged_metadata=merged_metadata,
324
+ existing_metadata=existing_metadata,
325
+ )
326
+
327
+ # Merge variables.
328
+ # For each extracted variable, copy existing metadata into the merged metadata
329
+ return merge_variables(
330
+ existing_metadata=existing_metadata,
331
+ extracted_metadata=extracted_metadata,
332
+ merged_metadata=merged_metadata,
333
+ )
@@ -5,7 +5,6 @@ from __future__ import annotations
5
5
  import copy
6
6
  import json
7
7
  import logging
8
- import warnings
9
8
  from concurrent.futures import ThreadPoolExecutor
10
9
  from pathlib import Path
11
10
  from typing import TYPE_CHECKING
@@ -17,6 +16,11 @@ from datadoc_model.all_optional.model import DataSetStatus
17
16
 
18
17
  from dapla_metadata._shared import config
19
18
  from dapla_metadata.dapla import user_info
19
+ from dapla_metadata.datasets._merge import DatasetConsistencyStatus
20
+ from dapla_metadata.datasets._merge import check_dataset_consistency
21
+ from dapla_metadata.datasets._merge import check_ready_to_merge
22
+ from dapla_metadata.datasets._merge import check_variables_consistency
23
+ from dapla_metadata.datasets._merge import merge_metadata
20
24
  from dapla_metadata.datasets.compatibility import is_metadata_in_container_structure
21
25
  from dapla_metadata.datasets.compatibility import upgrade_metadata
22
26
  from dapla_metadata.datasets.dapla_dataset_path_info import DaplaDatasetPathInfo
@@ -26,7 +30,6 @@ from dapla_metadata.datasets.statistic_subject_mapping import StatisticSubjectMa
26
30
  from dapla_metadata.datasets.utility.constants import (
27
31
  DEFAULT_SPATIAL_COVERAGE_DESCRIPTION,
28
32
  )
29
- from dapla_metadata.datasets.utility.constants import INCONSISTENCIES_MESSAGE
30
33
  from dapla_metadata.datasets.utility.constants import METADATA_DOCUMENT_FILE_SUFFIX
31
34
  from dapla_metadata.datasets.utility.constants import NUM_OBLIGATORY_DATASET_FIELDS
32
35
  from dapla_metadata.datasets.utility.constants import NUM_OBLIGATORY_VARIABLES_FIELDS
@@ -34,7 +37,6 @@ from dapla_metadata.datasets.utility.utils import OptionalDatadocMetadataType
34
37
  from dapla_metadata.datasets.utility.utils import calculate_percentage
35
38
  from dapla_metadata.datasets.utility.utils import derive_assessment_from_state
36
39
  from dapla_metadata.datasets.utility.utils import get_timestamp_now
37
- from dapla_metadata.datasets.utility.utils import merge_variables
38
40
  from dapla_metadata.datasets.utility.utils import normalize_path
39
41
  from dapla_metadata.datasets.utility.utils import (
40
42
  num_obligatory_dataset_fields_completed,
@@ -42,9 +44,9 @@ from dapla_metadata.datasets.utility.utils import (
42
44
  from dapla_metadata.datasets.utility.utils import (
43
45
  num_obligatory_variables_fields_completed,
44
46
  )
45
- from dapla_metadata.datasets.utility.utils import override_dataset_fields
46
47
  from dapla_metadata.datasets.utility.utils import set_dataset_owner
47
48
  from dapla_metadata.datasets.utility.utils import set_default_values_dataset
49
+ from dapla_metadata.datasets.utility.utils import set_default_values_pseudonymization
48
50
  from dapla_metadata.datasets.utility.utils import set_default_values_variables
49
51
 
50
52
  if TYPE_CHECKING:
@@ -53,18 +55,9 @@ if TYPE_CHECKING:
53
55
 
54
56
  from cloudpathlib import CloudPath
55
57
 
56
-
57
58
  logger = logging.getLogger(__name__)
58
59
 
59
60
 
60
- class InconsistentDatasetsWarning(UserWarning):
61
- """Existing and new datasets differ significantly from one another."""
62
-
63
-
64
- class InconsistentDatasetsError(ValueError):
65
- """Existing and new datasets differ significantly from one another."""
66
-
67
-
68
61
  class Datadoc:
69
62
  """Handle reading, updating and writing of metadata.
70
63
 
@@ -118,7 +111,7 @@ class Datadoc:
118
111
  self.variables: list = []
119
112
  self.variables_lookup: dict[str, all_optional_model.Variable] = {}
120
113
  self.explicitly_defined_metadata_document = False
121
- self.dataset_consistency_status: list = []
114
+ self.dataset_consistency_status: list[DatasetConsistencyStatus] = []
122
115
  if metadata_document_path:
123
116
  self.metadata_document = normalize_path(metadata_document_path)
124
117
  self.explicitly_defined_metadata_document = True
@@ -169,20 +162,22 @@ class Datadoc:
169
162
  ):
170
163
  extracted_metadata = self._extract_metadata_from_dataset(self.dataset_path)
171
164
 
172
- if extracted_metadata is not None:
173
- existing_file_path = self._get_existing_file_path(extracted_metadata)
174
- if (
175
- self.dataset_path
176
- and existing_file_path is not None
177
- and extracted_metadata is not None
178
- and existing_metadata is not None
179
- ):
180
- self.dataset_consistency_status = self._check_dataset_consistency(
181
- self.dataset_path,
182
- Path(existing_file_path),
183
- extracted_metadata,
184
- existing_metadata,
165
+ if (
166
+ self.dataset_path
167
+ and self.metadata_document
168
+ and extracted_metadata
169
+ and existing_metadata
170
+ ):
171
+ self.dataset_consistency_status = check_dataset_consistency(
172
+ self.dataset_path,
173
+ Path(self.metadata_document),
174
+ )
175
+ self.dataset_consistency_status.extend(
176
+ check_variables_consistency(
177
+ extracted_metadata.variables or [],
178
+ existing_metadata.variables or [],
185
179
  )
180
+ )
186
181
 
187
182
  if (
188
183
  self.dataset_path
@@ -192,11 +187,11 @@ class Datadoc:
192
187
  and extracted_metadata is not None
193
188
  and existing_metadata is not None
194
189
  ):
195
- self._check_ready_to_merge(
190
+ check_ready_to_merge(
196
191
  self.dataset_consistency_status,
197
192
  errors_as_warnings=self.errors_as_warnings,
198
193
  )
199
- merged_metadata = self._merge_metadata(
194
+ merged_metadata = merge_metadata(
200
195
  extracted_metadata,
201
196
  existing_metadata,
202
197
  )
@@ -214,19 +209,6 @@ class Datadoc:
214
209
  set_dataset_owner(self.dataset)
215
210
  self._create_variables_lookup()
216
211
 
217
- def _get_existing_file_path(
218
- self,
219
- extracted_metadata: all_optional_model.DatadocMetadata | None,
220
- ) -> str:
221
- if (
222
- extracted_metadata is not None
223
- and extracted_metadata.dataset is not None
224
- and extracted_metadata.dataset.file_path is not None
225
- ):
226
- return extracted_metadata.dataset.file_path
227
- msg = "Could not access existing dataset file path"
228
- raise ValueError(msg)
229
-
230
212
  def _set_metadata(
231
213
  self,
232
214
  merged_metadata: OptionalDatadocMetadataType,
@@ -244,134 +226,6 @@ class Datadoc:
244
226
  v.short_name: v for v in self.variables if v.short_name
245
227
  }
246
228
 
247
- @staticmethod
248
- def _check_dataset_consistency(
249
- new_dataset_path: Path | CloudPath,
250
- existing_dataset_path: Path,
251
- extracted_metadata: all_optional_model.DatadocMetadata,
252
- existing_metadata: OptionalDatadocMetadataType,
253
- ) -> list[dict[str, object]]:
254
- """Run consistency tests.
255
-
256
- Args:
257
- new_dataset_path: Path to the dataset to be documented.
258
- existing_dataset_path: Path stored in the existing metadata.
259
- extracted_metadata: Metadata extracted from a physical dataset.
260
- existing_metadata: Metadata from a previously created metadata document.
261
-
262
- Returns:
263
- List if dict with property name and boolean success flag
264
- """
265
- new_dataset_path_info = DaplaDatasetPathInfo(new_dataset_path)
266
- existing_dataset_path_info = DaplaDatasetPathInfo(existing_dataset_path)
267
- return [
268
- {
269
- "name": "Bucket name",
270
- "success": (
271
- new_dataset_path_info.bucket_name
272
- == existing_dataset_path_info.bucket_name
273
- ),
274
- },
275
- {
276
- "name": "Data product name",
277
- "success": (
278
- new_dataset_path_info.statistic_short_name
279
- == existing_dataset_path_info.statistic_short_name
280
- ),
281
- },
282
- {
283
- "name": "Dataset state",
284
- "success": (
285
- new_dataset_path_info.dataset_state
286
- == existing_dataset_path_info.dataset_state
287
- ),
288
- },
289
- {
290
- "name": "Dataset short name",
291
- "success": (
292
- new_dataset_path_info.dataset_short_name
293
- == existing_dataset_path_info.dataset_short_name
294
- ),
295
- },
296
- {
297
- "name": "Variable names",
298
- "success": (
299
- existing_metadata is not None
300
- and {v.short_name for v in extracted_metadata.variables or []}
301
- == {v.short_name for v in existing_metadata.variables or []}
302
- ),
303
- },
304
- {
305
- "name": "Variable datatypes",
306
- "success": (
307
- existing_metadata is not None
308
- and [v.data_type for v in extracted_metadata.variables or []]
309
- == [v.data_type for v in existing_metadata.variables or []]
310
- ),
311
- },
312
- ]
313
-
314
- @staticmethod
315
- def _check_ready_to_merge(
316
- results: list[dict[str, object]], *, errors_as_warnings: bool
317
- ) -> None:
318
- """Check if the datasets are consistent enough to make a successful merge of metadata.
319
-
320
- Args:
321
- results: List if dict with property name and boolean success flag
322
- errors_as_warnings: True if failing checks should be raised as warnings, not errors.
323
-
324
- Raises:
325
- InconsistentDatasetsError: If inconsistencies are found and `errors_as_warnings == False`
326
- """
327
- if failures := [result for result in results if not result["success"]]:
328
- msg = f"{INCONSISTENCIES_MESSAGE} {', '.join(str(f['name']) for f in failures)}"
329
- if errors_as_warnings:
330
- warnings.warn(
331
- message=msg,
332
- category=InconsistentDatasetsWarning,
333
- stacklevel=2,
334
- )
335
- else:
336
- raise InconsistentDatasetsError(
337
- msg,
338
- )
339
-
340
- @staticmethod
341
- def _merge_metadata(
342
- extracted_metadata: all_optional_model.DatadocMetadata | None,
343
- existing_metadata: OptionalDatadocMetadataType,
344
- ) -> all_optional_model.DatadocMetadata:
345
- if not existing_metadata:
346
- logger.warning(
347
- "No existing metadata found, no merge to perform. Continuing with extracted metadata.",
348
- )
349
- return extracted_metadata or all_optional_model.DatadocMetadata()
350
-
351
- if not extracted_metadata:
352
- return cast("all_optional_model.DatadocMetadata", existing_metadata)
353
-
354
- # Use the extracted metadata as a base
355
- merged_metadata = all_optional_model.DatadocMetadata(
356
- dataset=copy.deepcopy(extracted_metadata.dataset),
357
- variables=[],
358
- )
359
-
360
- override_dataset_fields(
361
- merged_metadata=merged_metadata,
362
- existing_metadata=cast(
363
- "all_optional_model.DatadocMetadata", existing_metadata
364
- ),
365
- )
366
-
367
- # Merge variables.
368
- # For each extracted variable, copy existing metadata into the merged metadata
369
- return merge_variables(
370
- existing_metadata=existing_metadata,
371
- extracted_metadata=extracted_metadata,
372
- merged_metadata=merged_metadata,
373
- )
374
-
375
229
  def _extract_metadata_from_existing_document(
376
230
  self,
377
231
  document: pathlib.Path | CloudPath,
@@ -591,9 +445,12 @@ class Datadoc:
591
445
  variable_short_name: str,
592
446
  pseudonymization: all_optional_model.Pseudonymization | None = None,
593
447
  ) -> None:
594
- """Adds a new pseudo variable to the list of pseudonymized variables also sets is_personal_data to true.
448
+ """Adds a new pseudo variable to the list of pseudonymized variables.
595
449
 
596
- If there is no pseudonymization supplied an empty Pseudonymization structure will be added to the model.
450
+ If `pseudonymization` is not supplied, an empty Pseudonymization structure
451
+ will be created and assigned to the variable.
452
+ If an encryption algorithm is recognized (one of the standard Dapla algorithms), default values are filled
453
+ for any missing fields.
597
454
 
598
455
  Args:
599
456
  variable_short_name: The short name for the variable that one wants to update the pseudo for.
@@ -601,15 +458,15 @@ class Datadoc:
601
458
 
602
459
  """
603
460
  variable = self.variables_lookup[variable_short_name]
604
- variable.pseudonymization = (
605
- pseudonymization or all_optional_model.Pseudonymization()
606
- )
461
+ if pseudonymization:
462
+ set_default_values_pseudonymization(variable, pseudonymization)
463
+ else:
464
+ variable.pseudonymization = all_optional_model.Pseudonymization()
607
465
 
608
466
  def remove_pseudonymization(self, variable_short_name: str) -> None:
609
467
  """Removes a pseudo variable by using the shortname.
610
468
 
611
469
  Updates the pseudo variable lookup by creating a new one.
612
- Sets is_personal_data to non pseudonymized encrypted personal data.
613
470
 
614
471
  Args:
615
472
  variable_short_name: The short name for the variable that one wants to remove the pseudo for.