dapla-toolbelt-metadata 0.9.1__tar.gz → 0.9.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dapla-toolbelt-metadata might be problematic. Click here for more details.
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/PKG-INFO +1 -1
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/pyproject.toml +1 -1
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/__init__.py +1 -1
- dapla_toolbelt_metadata-0.9.3/src/dapla_metadata/datasets/_merge.py +333 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/core.py +33 -176
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/utility/constants.py +19 -4
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/utility/enums.py +7 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/utility/utils.py +79 -83
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/variable_definition.py +11 -4
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/conftest.py +1 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/compatibility/v6_0_0/person_data_v1__DOC.json +2 -2
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/test_datadoc_metadata.py +1 -428
- dapla_toolbelt_metadata-0.9.3/tests/datasets/test_dataset_consistency.py +229 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/test_model_backwards_compatibility.py +23 -0
- dapla_toolbelt_metadata-0.9.3/tests/datasets/test_pseudonymization.py +534 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/test_utils.py +7 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/test_validators.py +2 -3
- dapla_toolbelt_metadata-0.9.3/tests/variable_definitions/resources/variable_definition_editing_files/variable_definition_var_test_wypvb3wd_2025-09-18T14-19-28.yaml +62 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/test_variable_definition.py +22 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/uv.lock +335 -309
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.cookiecutter.json +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.cruft.json +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.editorconfig +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.gitattributes +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.github/dependabot.yml +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.github/labels.yml +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.github/release-drafter.yml +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.github/workflows/constraints.txt +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.github/workflows/docs.yml +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.github/workflows/labeler.yml +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.github/workflows/release.yml +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.github/workflows/tests.yml +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.gitignore +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.pre-commit-config.yaml +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/.vscode/settings.json +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/CODEOWNERS +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/CODE_OF_CONDUCT.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/CONTRIBUTING.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/LICENSE +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/Makefile +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/README.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/SECURITY.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/backstage.yaml +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/bin/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/bin/openapi_generate_post_process.sh +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/bin/upgrade_metadata_file.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/datasets/create_metadata_for_new_dataset.ipynb +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/datasets/create_metadata_for_new_period.ipynb +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/datasets/resources/sykefratot/klargjorte_data/person_testdata_p2021_v1__DOC.json +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/standards/navnestandard.ipynb +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/variable_definitions/les_variabeldefinisjon.ipynb +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/variable_definitions/les_variabeldefinisjoner.ipynb +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/variable_definitions/migrer_variabeldefinisjoner.ipynb +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/variable_definitions/ny_gyldighetsperiode_variabeldefinisjon.ipynb +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/variable_definitions/ny_variabeldefinisjon.ipynb +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/variable_definitions/publiser_variabeldefinisjon_eksternt.ipynb +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/variable_definitions/publiser_variabeldefinisjon_internt.ipynb +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/variable_definitions/rediger_publisert_variabeldefinisjon.ipynb +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/variable_definitions/rediger_utkast_variabeldefinisjon.ipynb +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/variable_definitions/reserver_kortnavn.ipynb +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/variable_definitions/sjekk_kortnavn_variabeldefinisjon.ipynb +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/variable_definitions/sjekk_migrerte_variabeldefinisjoner.ipynb +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/demo/variable_definitions/slett_utkast_variabeldefinisjon.ipynb +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/docs/Makefile +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/docs/codeofconduct.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/docs/conf.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/docs/contributing.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/docs/index.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/docs/license.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/docs/make.bat +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/docs/reference.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/docs/templates/package.rst_t +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/docs/vardef_client.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/noxfile.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/schemas/jsonschema/complete-response-schema.json +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/sonar-project.properties +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/_shared/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/_shared/config.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/_shared/enums.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/_shared/py.typed +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/dapla/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/dapla/user_info.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/code_list.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/compatibility/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/compatibility/_handlers.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/compatibility/_utils.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/compatibility/model_backwards_compatibility.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/dapla_dataset_path_info.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/dataset_parser.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/external_sources/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/external_sources/external_sources.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/model_validation.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/py.typed +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/statistic_subject_mapping.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/utility/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/standards/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/standards/name_validator.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/standards/standard_validators.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/standards/utils/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/standards/utils/constants.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/.openapi-generator/FILES +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/.openapi-generator/VERSION +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/.openapi-generator-ignore +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/README.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api/data_migration_api.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api/draft_variable_definitions_api.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api/patches_api.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api/validity_periods_api.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api/variable_definitions_api.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api_client.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/api_response.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/configuration.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/CompleteResponse.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/Contact.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/DataMigrationApi.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/Draft.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/DraftVariableDefinitionsApi.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/LanguageStringType.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/Owner.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/Patch.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/PatchesApi.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/PublicApi.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/SupportedLanguages.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/UpdateDraft.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/ValidityPeriod.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/ValidityPeriodsApi.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/VariableDefinitionsApi.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/docs/VariableStatus.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/exceptions.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/complete_response.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/contact.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/draft.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/get_vardok_vardef_mapping_by_id200_response.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/language_string_type.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/owner.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/patch.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/problem.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/update_draft.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/validity_period.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/vardok_id_response.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/vardok_vardef_id_pair_response.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/variable_status.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/py.typed +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_generated/vardef_client/rest.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_utils/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_utils/_client.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_utils/config.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_utils/constants.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_utils/files.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_utils/template_files.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/_utils/variable_definition_files.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/exceptions.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/vardef.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/vardok_id.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/variable_definitions/vardok_vardef_id_pair.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/conftest.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/dapla/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/dapla/test_user_info.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/constants.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/code_list/code_list.csv +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/code_list/code_list_en.csv +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/code_list/code_list_nb.csv +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/code_list/code_list_nn.csv +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/code_list/empty.csv +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/code_list/no_code.csv +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/datasets/fewer_variables_p2021-12-31_p2021-12-31_v1.parquet +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/datasets/ifpn/klargjorte_data/person_testdata_p2021-12-31_p2021-12-31_v1.parquet +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/datasets/person_data.csv +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/datasets/person_data_v1.parquet +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/datasets/person_data_v1.parquet.gzip +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/datasets/person_testdata_p2021-12-31_p2021-12-31_v1.parquet +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/datasets/sasdata.sas7bdat +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/compatibility/README.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/compatibility/v0_1_1/person_data_v1__DOC.json +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/compatibility/v1_0_0/person_data_v1__DOC.json +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/compatibility/v2_1_0/person_data_v1__DOC.json +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/compatibility/v2_2_0/person_data_v1__DOC.json +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/compatibility/v3_1_0/person_data_v1__DOC.json +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/compatibility/v3_2_0/person_data_v1__DOC.json +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/compatibility/v3_3_0/person_data_v1__DOC.json +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/compatibility/v4_0_0/person_data_v1__DOC.json +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/compatibility/v5_0_1/person_data_v1__DOC.json +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/fewer_variables_p2020-12-31_p2020-12-31_v1__DOC.json +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/invalid_id_field/person_data_v1__DOC.json +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/person_data_v1__DOC.json +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/person_testdata_p2020-12-31_p2020-12-31_v1__DOC.json +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/pseudo/dataset_and_pseudo/no_pseudo_v4_0_0_person_data_v1__DOC.json +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/pseudo/dataset_and_pseudo/person_data_v1__DOC.json +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/pseudo/dataset_and_pseudo/v4_0_0_person_data_v1__DOC.json +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/pseudo/pseudo/person_data_v1__DOC.json +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/pseudo/pseudo_missing_variables/person_data_v1__DOC.json +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/valid_id_field/person_data_v1__DOC.json +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/existing_metadata_file/valid_variable_id_field/person_data_v1__DOC.json +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/klargjorte_data/person_testdata_p2021-12-31_p2021-12-31_v1.parquet +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/klargjorte_data/person_testdata_p2021-12-31_p2021-12-31_v1__DOC.json +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/statistical_subject_structure/empty.xml +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/statistical_subject_structure/extract_secondary_subject.xml +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/statistical_subject_structure/missing_language.xml +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/resources/statistical_subject_structure/simple.xml +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/test_code_list.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/test_dapla_dataset_path_info.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/test_dataset_parser.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/datasets/test_statistic_subject_mapping.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/standards/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/standards/conftest.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/standards/test_check_naming_standard.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/utils/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/utils/constants.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/utils/microcks_testcontainer.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/conftest.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/constants.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/resources/openapi/variable-definitions-internal.yml +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/resources/variable_definition_editing_files/classification_reference.yaml +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/resources/variable_definition_editing_files/date_and_definition.yaml +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/resources/variable_definition_editing_files/variable_definition_landbak_wypvb3wd_2025-05-02T10-06-20.yaml +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/test_config.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/test_template_files.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/test_vardef.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/test_variable_definition_files.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/vardef_client/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/vardef_client/api/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/vardef_client/api/test_happy_path.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/vardef_client/models/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/vardef_client/models/test_models_instantiation.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/tests/variable_definitions/vardef_client/test_exceptions.py +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/vardef-maintenance/README.md +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/vardef-maintenance/vardef-maintenance.toml +0 -0
- {dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/vardef-maintenance/vardef_client_init.sh +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dapla-toolbelt-metadata
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.3
|
|
4
4
|
Summary: Dapla Toolbelt Metadata
|
|
5
5
|
Project-URL: homepage, https://github.com/statisticsnorway/dapla-toolbelt-metadata
|
|
6
6
|
Project-URL: repository, https://github.com/statisticsnorway/dapla-toolbelt-metadata
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
name = "dapla-toolbelt-metadata"
|
|
3
3
|
description = "Dapla Toolbelt Metadata"
|
|
4
4
|
license = "MIT"
|
|
5
|
-
version = "0.9.
|
|
5
|
+
version = "0.9.3"
|
|
6
6
|
classifiers = ["Development Status :: 4 - Beta"]
|
|
7
7
|
readme = "README.md"
|
|
8
8
|
authors = [{ name = "Statistics Norway", email = "metadata@ssb.no" }]
|
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
"""Code relating to merging metadata from an existing metadata document and metadata extracted from a new dataset.
|
|
2
|
+
|
|
3
|
+
This is primarily convenience functionality for users whereby they can programmatically generate metadata without
|
|
4
|
+
having to manually enter it. This is primarily useful when data is sharded by time (i.e. each dataset applies for
|
|
5
|
+
a particular period like a month or a year). Assuming there aren't structural changes, the metadata may be reused
|
|
6
|
+
for all periods.
|
|
7
|
+
|
|
8
|
+
It is important to be able to detect changes in the structure of the data and warn users about this so that they can
|
|
9
|
+
make changes as appropriate.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import copy
|
|
13
|
+
import logging
|
|
14
|
+
import warnings
|
|
15
|
+
from collections.abc import Iterable
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from dataclasses import field
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import cast
|
|
20
|
+
|
|
21
|
+
import datadoc_model
|
|
22
|
+
import datadoc_model.all_optional.model as all_optional_model
|
|
23
|
+
import datadoc_model.required.model as required_model
|
|
24
|
+
from cloudpathlib import CloudPath
|
|
25
|
+
|
|
26
|
+
from dapla_metadata.datasets.dapla_dataset_path_info import DaplaDatasetPathInfo
|
|
27
|
+
from dapla_metadata.datasets.utility.constants import (
|
|
28
|
+
DATASET_FIELDS_FROM_EXISTING_METADATA,
|
|
29
|
+
)
|
|
30
|
+
from dapla_metadata.datasets.utility.constants import INCONSISTENCIES_MESSAGE
|
|
31
|
+
from dapla_metadata.datasets.utility.utils import OptionalDatadocMetadataType
|
|
32
|
+
from dapla_metadata.datasets.utility.utils import VariableListType
|
|
33
|
+
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
BUCKET_NAME_MESSAGE = "Bucket name"
|
|
37
|
+
DATA_PRODUCT_NAME_MESSAGE = "Data product name"
|
|
38
|
+
DATASET_STATE_MESSAGE = "Dataset state"
|
|
39
|
+
DATASET_SHORT_NAME_MESSAGE = "Dataset short name"
|
|
40
|
+
VARIABLES_ADDITIONAL_MESSAGE = (
|
|
41
|
+
"Dataset has additional variables than defined in metadata"
|
|
42
|
+
)
|
|
43
|
+
VARIABLE_RENAME_MESSAGE = "Variables have been renamed in the dataset"
|
|
44
|
+
VARIABLE_ORDER_MESSAGE = "The order of variables in the dataset has changed"
|
|
45
|
+
VARIABLE_DATATYPES_MESSAGE = "Variable datatypes differ"
|
|
46
|
+
VARIABLES_FEWER_MESSAGE = "Dataset has fewer variables than defined in metadata"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class InconsistentDatasetsWarning(UserWarning):
|
|
50
|
+
"""Existing and new datasets differ significantly from one another."""
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class InconsistentDatasetsError(ValueError):
|
|
54
|
+
"""Existing and new datasets differ significantly from one another."""
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class DatasetConsistencyStatus:
|
|
59
|
+
"""Store the status for different aspects of dataset consistency.
|
|
60
|
+
|
|
61
|
+
Attributes:
|
|
62
|
+
message: Communicates to the user what aspect is inconsistent.
|
|
63
|
+
success: False if inconsistency is detected.
|
|
64
|
+
variables: Optionally communicate which variables are affected.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
message: str
|
|
68
|
+
success: bool
|
|
69
|
+
variables: Iterable[str] = field(default_factory=list)
|
|
70
|
+
|
|
71
|
+
def __str__(self) -> str:
|
|
72
|
+
"""Format the user message."""
|
|
73
|
+
message = self.message
|
|
74
|
+
if self.variables:
|
|
75
|
+
message += f"\n\tVariables: {self.variables}"
|
|
76
|
+
return message
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def check_dataset_consistency(
|
|
80
|
+
new_dataset_path: Path | CloudPath,
|
|
81
|
+
existing_dataset_path: Path,
|
|
82
|
+
) -> list[DatasetConsistencyStatus]:
|
|
83
|
+
"""Run consistency tests.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
new_dataset_path: Path to the dataset to be documented.
|
|
87
|
+
existing_dataset_path: Path stored in the existing metadata.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
List of consistency check results.
|
|
91
|
+
"""
|
|
92
|
+
new_dataset_path_info = DaplaDatasetPathInfo(new_dataset_path)
|
|
93
|
+
existing_dataset_path_info = DaplaDatasetPathInfo(existing_dataset_path)
|
|
94
|
+
return [
|
|
95
|
+
DatasetConsistencyStatus(
|
|
96
|
+
message=BUCKET_NAME_MESSAGE,
|
|
97
|
+
success=(
|
|
98
|
+
new_dataset_path_info.bucket_name
|
|
99
|
+
== existing_dataset_path_info.bucket_name
|
|
100
|
+
),
|
|
101
|
+
),
|
|
102
|
+
DatasetConsistencyStatus(
|
|
103
|
+
message=DATA_PRODUCT_NAME_MESSAGE,
|
|
104
|
+
success=(
|
|
105
|
+
new_dataset_path_info.statistic_short_name
|
|
106
|
+
== existing_dataset_path_info.statistic_short_name
|
|
107
|
+
),
|
|
108
|
+
),
|
|
109
|
+
DatasetConsistencyStatus(
|
|
110
|
+
message=DATASET_STATE_MESSAGE,
|
|
111
|
+
success=(
|
|
112
|
+
new_dataset_path_info.dataset_state
|
|
113
|
+
== existing_dataset_path_info.dataset_state
|
|
114
|
+
),
|
|
115
|
+
),
|
|
116
|
+
DatasetConsistencyStatus(
|
|
117
|
+
message=DATASET_SHORT_NAME_MESSAGE,
|
|
118
|
+
success=(
|
|
119
|
+
new_dataset_path_info.dataset_short_name
|
|
120
|
+
== existing_dataset_path_info.dataset_short_name
|
|
121
|
+
),
|
|
122
|
+
),
|
|
123
|
+
]
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def check_variables_consistency(
|
|
127
|
+
extracted_variables: VariableListType,
|
|
128
|
+
existing_variables: VariableListType,
|
|
129
|
+
) -> list[DatasetConsistencyStatus]:
|
|
130
|
+
"""Check for consistency in variables structure.
|
|
131
|
+
|
|
132
|
+
Compares the existing metadata and that extracted from the new dataset and provides
|
|
133
|
+
highly detailed feedback on what is different between them.
|
|
134
|
+
|
|
135
|
+
We don't return all the results because that could create conflicting messages and false positives.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
extracted_variables (VariableListType): Variables extracted from the new dataset.
|
|
139
|
+
existing_variables (VariableListType): Variables already documented in existing metadata
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
list[DatasetConsistencyStatus]: The list of checks and whether they were successful.
|
|
143
|
+
"""
|
|
144
|
+
extracted_names_set = {v.short_name or "" for v in extracted_variables}
|
|
145
|
+
existing_names_set = {v.short_name or "" for v in existing_variables}
|
|
146
|
+
same_length = len(extracted_variables) == len(existing_variables)
|
|
147
|
+
more_extracted_variables = extracted_names_set.difference(existing_names_set)
|
|
148
|
+
fewer_extracted_variables = existing_names_set.difference(extracted_names_set)
|
|
149
|
+
results = []
|
|
150
|
+
if same_length:
|
|
151
|
+
if more_extracted_variables:
|
|
152
|
+
results.append(
|
|
153
|
+
DatasetConsistencyStatus(
|
|
154
|
+
message=VARIABLE_RENAME_MESSAGE,
|
|
155
|
+
variables=more_extracted_variables,
|
|
156
|
+
success=not bool(more_extracted_variables),
|
|
157
|
+
)
|
|
158
|
+
)
|
|
159
|
+
else:
|
|
160
|
+
results.append(
|
|
161
|
+
DatasetConsistencyStatus(
|
|
162
|
+
message=VARIABLE_ORDER_MESSAGE,
|
|
163
|
+
success=[v.short_name or "" for v in extracted_variables]
|
|
164
|
+
== [v.short_name or "" for v in existing_variables],
|
|
165
|
+
)
|
|
166
|
+
)
|
|
167
|
+
results.append(
|
|
168
|
+
DatasetConsistencyStatus(
|
|
169
|
+
message=VARIABLE_DATATYPES_MESSAGE,
|
|
170
|
+
success=[v.data_type for v in extracted_variables]
|
|
171
|
+
== [v.data_type for v in existing_variables],
|
|
172
|
+
)
|
|
173
|
+
)
|
|
174
|
+
else:
|
|
175
|
+
results.extend(
|
|
176
|
+
[
|
|
177
|
+
DatasetConsistencyStatus(
|
|
178
|
+
message=VARIABLES_ADDITIONAL_MESSAGE,
|
|
179
|
+
variables=more_extracted_variables,
|
|
180
|
+
success=not bool(more_extracted_variables),
|
|
181
|
+
),
|
|
182
|
+
DatasetConsistencyStatus(
|
|
183
|
+
message=VARIABLES_FEWER_MESSAGE,
|
|
184
|
+
variables=fewer_extracted_variables,
|
|
185
|
+
success=not bool(fewer_extracted_variables),
|
|
186
|
+
),
|
|
187
|
+
]
|
|
188
|
+
)
|
|
189
|
+
return results
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def check_ready_to_merge(
|
|
193
|
+
results: list[DatasetConsistencyStatus], *, errors_as_warnings: bool
|
|
194
|
+
) -> None:
|
|
195
|
+
"""Check if the datasets are consistent enough to make a successful merge of metadata.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
results: List if dict with property name and boolean success flag
|
|
199
|
+
errors_as_warnings: True if failing checks should be raised as warnings, not errors.
|
|
200
|
+
|
|
201
|
+
Raises:
|
|
202
|
+
InconsistentDatasetsError: If inconsistencies are found and `errors_as_warnings == False`
|
|
203
|
+
"""
|
|
204
|
+
if failures := [result for result in results if not result.success]:
|
|
205
|
+
messages_list = "\n - ".join(str(f) for f in failures)
|
|
206
|
+
msg = f"{INCONSISTENCIES_MESSAGE}\n - {messages_list}"
|
|
207
|
+
if errors_as_warnings:
|
|
208
|
+
warnings.warn(
|
|
209
|
+
message=msg,
|
|
210
|
+
category=InconsistentDatasetsWarning,
|
|
211
|
+
stacklevel=2,
|
|
212
|
+
)
|
|
213
|
+
else:
|
|
214
|
+
raise InconsistentDatasetsError(
|
|
215
|
+
msg,
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def override_dataset_fields(
|
|
220
|
+
merged_metadata: all_optional_model.DatadocMetadata,
|
|
221
|
+
existing_metadata: all_optional_model.DatadocMetadata
|
|
222
|
+
| required_model.DatadocMetadata,
|
|
223
|
+
) -> None:
|
|
224
|
+
"""Overrides specific fields in the dataset of `merged_metadata` with values from the dataset of `existing_metadata`.
|
|
225
|
+
|
|
226
|
+
This function iterates over a predefined list of fields, `DATASET_FIELDS_FROM_EXISTING_METADATA`,
|
|
227
|
+
and sets the corresponding fields in the `merged_metadata.dataset` object to the values
|
|
228
|
+
from the `existing_metadata.dataset` object.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
merged_metadata: An instance of `DatadocMetadata` containing the dataset to be updated.
|
|
232
|
+
existing_metadata: An instance of `DatadocMetadata` containing the dataset whose values are used to update `merged_metadata.dataset`.
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
`None`.
|
|
236
|
+
"""
|
|
237
|
+
if merged_metadata.dataset and existing_metadata.dataset:
|
|
238
|
+
# Override the fields as defined
|
|
239
|
+
for field in DATASET_FIELDS_FROM_EXISTING_METADATA:
|
|
240
|
+
setattr(
|
|
241
|
+
merged_metadata.dataset,
|
|
242
|
+
field,
|
|
243
|
+
getattr(existing_metadata.dataset, field),
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def merge_variables(
|
|
248
|
+
existing_metadata: OptionalDatadocMetadataType,
|
|
249
|
+
extracted_metadata: all_optional_model.DatadocMetadata,
|
|
250
|
+
merged_metadata: all_optional_model.DatadocMetadata,
|
|
251
|
+
) -> all_optional_model.DatadocMetadata:
|
|
252
|
+
"""Merges variables from the extracted metadata into the existing metadata and updates the merged metadata.
|
|
253
|
+
|
|
254
|
+
This function compares the variables from `extracted_metadata` with those in `existing_metadata`.
|
|
255
|
+
For each variable in `extracted_metadata`, it checks if a variable with the same `short_name` exists
|
|
256
|
+
in `existing_metadata`. If a match is found, it updates the existing variable with information from
|
|
257
|
+
`extracted_metadata`. If no match is found, the variable from `extracted_metadata` is directly added to `merged_metadata`.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
existing_metadata: The metadata object containing the current state of variables.
|
|
261
|
+
extracted_metadata: The metadata object containing new or updated variables to merge.
|
|
262
|
+
merged_metadata: The metadata object that will contain the result of the merge.
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
all_optional_model.DatadocMetadata: The `merged_metadata` object containing variables from both `existing_metadata`
|
|
266
|
+
and `extracted_metadata`.
|
|
267
|
+
"""
|
|
268
|
+
if (
|
|
269
|
+
existing_metadata is not None
|
|
270
|
+
and existing_metadata.variables is not None
|
|
271
|
+
and extracted_metadata is not None
|
|
272
|
+
and extracted_metadata.variables is not None
|
|
273
|
+
and merged_metadata.variables is not None
|
|
274
|
+
):
|
|
275
|
+
for extracted in extracted_metadata.variables:
|
|
276
|
+
existing = next(
|
|
277
|
+
(
|
|
278
|
+
existing
|
|
279
|
+
for existing in existing_metadata.variables
|
|
280
|
+
if existing.short_name == extracted.short_name
|
|
281
|
+
),
|
|
282
|
+
None,
|
|
283
|
+
)
|
|
284
|
+
if existing:
|
|
285
|
+
existing.id = (
|
|
286
|
+
None # Set to None so that it will be set assigned a fresh ID later
|
|
287
|
+
)
|
|
288
|
+
existing.contains_data_from = (
|
|
289
|
+
extracted.contains_data_from or existing.contains_data_from
|
|
290
|
+
)
|
|
291
|
+
existing.contains_data_until = (
|
|
292
|
+
extracted.contains_data_until or existing.contains_data_until
|
|
293
|
+
)
|
|
294
|
+
merged_metadata.variables.append(
|
|
295
|
+
cast("datadoc_model.all_optional.model.Variable", existing)
|
|
296
|
+
)
|
|
297
|
+
else:
|
|
298
|
+
# If there is no existing metadata for this variable, we just use what we have extracted
|
|
299
|
+
merged_metadata.variables.append(extracted)
|
|
300
|
+
return merged_metadata
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def merge_metadata(
|
|
304
|
+
extracted_metadata: all_optional_model.DatadocMetadata | None,
|
|
305
|
+
existing_metadata: OptionalDatadocMetadataType,
|
|
306
|
+
) -> all_optional_model.DatadocMetadata:
|
|
307
|
+
if not existing_metadata:
|
|
308
|
+
logger.warning(
|
|
309
|
+
"No existing metadata found, no merge to perform. Continuing with extracted metadata.",
|
|
310
|
+
)
|
|
311
|
+
return extracted_metadata or all_optional_model.DatadocMetadata()
|
|
312
|
+
|
|
313
|
+
if not extracted_metadata:
|
|
314
|
+
return cast("all_optional_model.DatadocMetadata", existing_metadata)
|
|
315
|
+
|
|
316
|
+
# Use the extracted metadata as a base
|
|
317
|
+
merged_metadata = all_optional_model.DatadocMetadata(
|
|
318
|
+
dataset=copy.deepcopy(extracted_metadata.dataset),
|
|
319
|
+
variables=[],
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
override_dataset_fields(
|
|
323
|
+
merged_metadata=merged_metadata,
|
|
324
|
+
existing_metadata=existing_metadata,
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
# Merge variables.
|
|
328
|
+
# For each extracted variable, copy existing metadata into the merged metadata
|
|
329
|
+
return merge_variables(
|
|
330
|
+
existing_metadata=existing_metadata,
|
|
331
|
+
extracted_metadata=extracted_metadata,
|
|
332
|
+
merged_metadata=merged_metadata,
|
|
333
|
+
)
|
{dapla_toolbelt_metadata-0.9.1 → dapla_toolbelt_metadata-0.9.3}/src/dapla_metadata/datasets/core.py
RENAMED
|
@@ -5,7 +5,6 @@ from __future__ import annotations
|
|
|
5
5
|
import copy
|
|
6
6
|
import json
|
|
7
7
|
import logging
|
|
8
|
-
import warnings
|
|
9
8
|
from concurrent.futures import ThreadPoolExecutor
|
|
10
9
|
from pathlib import Path
|
|
11
10
|
from typing import TYPE_CHECKING
|
|
@@ -17,6 +16,11 @@ from datadoc_model.all_optional.model import DataSetStatus
|
|
|
17
16
|
|
|
18
17
|
from dapla_metadata._shared import config
|
|
19
18
|
from dapla_metadata.dapla import user_info
|
|
19
|
+
from dapla_metadata.datasets._merge import DatasetConsistencyStatus
|
|
20
|
+
from dapla_metadata.datasets._merge import check_dataset_consistency
|
|
21
|
+
from dapla_metadata.datasets._merge import check_ready_to_merge
|
|
22
|
+
from dapla_metadata.datasets._merge import check_variables_consistency
|
|
23
|
+
from dapla_metadata.datasets._merge import merge_metadata
|
|
20
24
|
from dapla_metadata.datasets.compatibility import is_metadata_in_container_structure
|
|
21
25
|
from dapla_metadata.datasets.compatibility import upgrade_metadata
|
|
22
26
|
from dapla_metadata.datasets.dapla_dataset_path_info import DaplaDatasetPathInfo
|
|
@@ -26,7 +30,6 @@ from dapla_metadata.datasets.statistic_subject_mapping import StatisticSubjectMa
|
|
|
26
30
|
from dapla_metadata.datasets.utility.constants import (
|
|
27
31
|
DEFAULT_SPATIAL_COVERAGE_DESCRIPTION,
|
|
28
32
|
)
|
|
29
|
-
from dapla_metadata.datasets.utility.constants import INCONSISTENCIES_MESSAGE
|
|
30
33
|
from dapla_metadata.datasets.utility.constants import METADATA_DOCUMENT_FILE_SUFFIX
|
|
31
34
|
from dapla_metadata.datasets.utility.constants import NUM_OBLIGATORY_DATASET_FIELDS
|
|
32
35
|
from dapla_metadata.datasets.utility.constants import NUM_OBLIGATORY_VARIABLES_FIELDS
|
|
@@ -34,7 +37,6 @@ from dapla_metadata.datasets.utility.utils import OptionalDatadocMetadataType
|
|
|
34
37
|
from dapla_metadata.datasets.utility.utils import calculate_percentage
|
|
35
38
|
from dapla_metadata.datasets.utility.utils import derive_assessment_from_state
|
|
36
39
|
from dapla_metadata.datasets.utility.utils import get_timestamp_now
|
|
37
|
-
from dapla_metadata.datasets.utility.utils import merge_variables
|
|
38
40
|
from dapla_metadata.datasets.utility.utils import normalize_path
|
|
39
41
|
from dapla_metadata.datasets.utility.utils import (
|
|
40
42
|
num_obligatory_dataset_fields_completed,
|
|
@@ -42,9 +44,9 @@ from dapla_metadata.datasets.utility.utils import (
|
|
|
42
44
|
from dapla_metadata.datasets.utility.utils import (
|
|
43
45
|
num_obligatory_variables_fields_completed,
|
|
44
46
|
)
|
|
45
|
-
from dapla_metadata.datasets.utility.utils import override_dataset_fields
|
|
46
47
|
from dapla_metadata.datasets.utility.utils import set_dataset_owner
|
|
47
48
|
from dapla_metadata.datasets.utility.utils import set_default_values_dataset
|
|
49
|
+
from dapla_metadata.datasets.utility.utils import set_default_values_pseudonymization
|
|
48
50
|
from dapla_metadata.datasets.utility.utils import set_default_values_variables
|
|
49
51
|
|
|
50
52
|
if TYPE_CHECKING:
|
|
@@ -53,18 +55,9 @@ if TYPE_CHECKING:
|
|
|
53
55
|
|
|
54
56
|
from cloudpathlib import CloudPath
|
|
55
57
|
|
|
56
|
-
|
|
57
58
|
logger = logging.getLogger(__name__)
|
|
58
59
|
|
|
59
60
|
|
|
60
|
-
class InconsistentDatasetsWarning(UserWarning):
|
|
61
|
-
"""Existing and new datasets differ significantly from one another."""
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
class InconsistentDatasetsError(ValueError):
|
|
65
|
-
"""Existing and new datasets differ significantly from one another."""
|
|
66
|
-
|
|
67
|
-
|
|
68
61
|
class Datadoc:
|
|
69
62
|
"""Handle reading, updating and writing of metadata.
|
|
70
63
|
|
|
@@ -118,7 +111,7 @@ class Datadoc:
|
|
|
118
111
|
self.variables: list = []
|
|
119
112
|
self.variables_lookup: dict[str, all_optional_model.Variable] = {}
|
|
120
113
|
self.explicitly_defined_metadata_document = False
|
|
121
|
-
self.dataset_consistency_status: list = []
|
|
114
|
+
self.dataset_consistency_status: list[DatasetConsistencyStatus] = []
|
|
122
115
|
if metadata_document_path:
|
|
123
116
|
self.metadata_document = normalize_path(metadata_document_path)
|
|
124
117
|
self.explicitly_defined_metadata_document = True
|
|
@@ -169,20 +162,22 @@ class Datadoc:
|
|
|
169
162
|
):
|
|
170
163
|
extracted_metadata = self._extract_metadata_from_dataset(self.dataset_path)
|
|
171
164
|
|
|
172
|
-
if
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
self.
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
165
|
+
if (
|
|
166
|
+
self.dataset_path
|
|
167
|
+
and self.metadata_document
|
|
168
|
+
and extracted_metadata
|
|
169
|
+
and existing_metadata
|
|
170
|
+
):
|
|
171
|
+
self.dataset_consistency_status = check_dataset_consistency(
|
|
172
|
+
self.dataset_path,
|
|
173
|
+
Path(self.metadata_document),
|
|
174
|
+
)
|
|
175
|
+
self.dataset_consistency_status.extend(
|
|
176
|
+
check_variables_consistency(
|
|
177
|
+
extracted_metadata.variables or [],
|
|
178
|
+
existing_metadata.variables or [],
|
|
185
179
|
)
|
|
180
|
+
)
|
|
186
181
|
|
|
187
182
|
if (
|
|
188
183
|
self.dataset_path
|
|
@@ -192,11 +187,11 @@ class Datadoc:
|
|
|
192
187
|
and extracted_metadata is not None
|
|
193
188
|
and existing_metadata is not None
|
|
194
189
|
):
|
|
195
|
-
|
|
190
|
+
check_ready_to_merge(
|
|
196
191
|
self.dataset_consistency_status,
|
|
197
192
|
errors_as_warnings=self.errors_as_warnings,
|
|
198
193
|
)
|
|
199
|
-
merged_metadata =
|
|
194
|
+
merged_metadata = merge_metadata(
|
|
200
195
|
extracted_metadata,
|
|
201
196
|
existing_metadata,
|
|
202
197
|
)
|
|
@@ -214,19 +209,6 @@ class Datadoc:
|
|
|
214
209
|
set_dataset_owner(self.dataset)
|
|
215
210
|
self._create_variables_lookup()
|
|
216
211
|
|
|
217
|
-
def _get_existing_file_path(
|
|
218
|
-
self,
|
|
219
|
-
extracted_metadata: all_optional_model.DatadocMetadata | None,
|
|
220
|
-
) -> str:
|
|
221
|
-
if (
|
|
222
|
-
extracted_metadata is not None
|
|
223
|
-
and extracted_metadata.dataset is not None
|
|
224
|
-
and extracted_metadata.dataset.file_path is not None
|
|
225
|
-
):
|
|
226
|
-
return extracted_metadata.dataset.file_path
|
|
227
|
-
msg = "Could not access existing dataset file path"
|
|
228
|
-
raise ValueError(msg)
|
|
229
|
-
|
|
230
212
|
def _set_metadata(
|
|
231
213
|
self,
|
|
232
214
|
merged_metadata: OptionalDatadocMetadataType,
|
|
@@ -244,134 +226,6 @@ class Datadoc:
|
|
|
244
226
|
v.short_name: v for v in self.variables if v.short_name
|
|
245
227
|
}
|
|
246
228
|
|
|
247
|
-
@staticmethod
|
|
248
|
-
def _check_dataset_consistency(
|
|
249
|
-
new_dataset_path: Path | CloudPath,
|
|
250
|
-
existing_dataset_path: Path,
|
|
251
|
-
extracted_metadata: all_optional_model.DatadocMetadata,
|
|
252
|
-
existing_metadata: OptionalDatadocMetadataType,
|
|
253
|
-
) -> list[dict[str, object]]:
|
|
254
|
-
"""Run consistency tests.
|
|
255
|
-
|
|
256
|
-
Args:
|
|
257
|
-
new_dataset_path: Path to the dataset to be documented.
|
|
258
|
-
existing_dataset_path: Path stored in the existing metadata.
|
|
259
|
-
extracted_metadata: Metadata extracted from a physical dataset.
|
|
260
|
-
existing_metadata: Metadata from a previously created metadata document.
|
|
261
|
-
|
|
262
|
-
Returns:
|
|
263
|
-
List if dict with property name and boolean success flag
|
|
264
|
-
"""
|
|
265
|
-
new_dataset_path_info = DaplaDatasetPathInfo(new_dataset_path)
|
|
266
|
-
existing_dataset_path_info = DaplaDatasetPathInfo(existing_dataset_path)
|
|
267
|
-
return [
|
|
268
|
-
{
|
|
269
|
-
"name": "Bucket name",
|
|
270
|
-
"success": (
|
|
271
|
-
new_dataset_path_info.bucket_name
|
|
272
|
-
== existing_dataset_path_info.bucket_name
|
|
273
|
-
),
|
|
274
|
-
},
|
|
275
|
-
{
|
|
276
|
-
"name": "Data product name",
|
|
277
|
-
"success": (
|
|
278
|
-
new_dataset_path_info.statistic_short_name
|
|
279
|
-
== existing_dataset_path_info.statistic_short_name
|
|
280
|
-
),
|
|
281
|
-
},
|
|
282
|
-
{
|
|
283
|
-
"name": "Dataset state",
|
|
284
|
-
"success": (
|
|
285
|
-
new_dataset_path_info.dataset_state
|
|
286
|
-
== existing_dataset_path_info.dataset_state
|
|
287
|
-
),
|
|
288
|
-
},
|
|
289
|
-
{
|
|
290
|
-
"name": "Dataset short name",
|
|
291
|
-
"success": (
|
|
292
|
-
new_dataset_path_info.dataset_short_name
|
|
293
|
-
== existing_dataset_path_info.dataset_short_name
|
|
294
|
-
),
|
|
295
|
-
},
|
|
296
|
-
{
|
|
297
|
-
"name": "Variable names",
|
|
298
|
-
"success": (
|
|
299
|
-
existing_metadata is not None
|
|
300
|
-
and {v.short_name for v in extracted_metadata.variables or []}
|
|
301
|
-
== {v.short_name for v in existing_metadata.variables or []}
|
|
302
|
-
),
|
|
303
|
-
},
|
|
304
|
-
{
|
|
305
|
-
"name": "Variable datatypes",
|
|
306
|
-
"success": (
|
|
307
|
-
existing_metadata is not None
|
|
308
|
-
and [v.data_type for v in extracted_metadata.variables or []]
|
|
309
|
-
== [v.data_type for v in existing_metadata.variables or []]
|
|
310
|
-
),
|
|
311
|
-
},
|
|
312
|
-
]
|
|
313
|
-
|
|
314
|
-
@staticmethod
|
|
315
|
-
def _check_ready_to_merge(
|
|
316
|
-
results: list[dict[str, object]], *, errors_as_warnings: bool
|
|
317
|
-
) -> None:
|
|
318
|
-
"""Check if the datasets are consistent enough to make a successful merge of metadata.
|
|
319
|
-
|
|
320
|
-
Args:
|
|
321
|
-
results: List if dict with property name and boolean success flag
|
|
322
|
-
errors_as_warnings: True if failing checks should be raised as warnings, not errors.
|
|
323
|
-
|
|
324
|
-
Raises:
|
|
325
|
-
InconsistentDatasetsError: If inconsistencies are found and `errors_as_warnings == False`
|
|
326
|
-
"""
|
|
327
|
-
if failures := [result for result in results if not result["success"]]:
|
|
328
|
-
msg = f"{INCONSISTENCIES_MESSAGE} {', '.join(str(f['name']) for f in failures)}"
|
|
329
|
-
if errors_as_warnings:
|
|
330
|
-
warnings.warn(
|
|
331
|
-
message=msg,
|
|
332
|
-
category=InconsistentDatasetsWarning,
|
|
333
|
-
stacklevel=2,
|
|
334
|
-
)
|
|
335
|
-
else:
|
|
336
|
-
raise InconsistentDatasetsError(
|
|
337
|
-
msg,
|
|
338
|
-
)
|
|
339
|
-
|
|
340
|
-
@staticmethod
|
|
341
|
-
def _merge_metadata(
|
|
342
|
-
extracted_metadata: all_optional_model.DatadocMetadata | None,
|
|
343
|
-
existing_metadata: OptionalDatadocMetadataType,
|
|
344
|
-
) -> all_optional_model.DatadocMetadata:
|
|
345
|
-
if not existing_metadata:
|
|
346
|
-
logger.warning(
|
|
347
|
-
"No existing metadata found, no merge to perform. Continuing with extracted metadata.",
|
|
348
|
-
)
|
|
349
|
-
return extracted_metadata or all_optional_model.DatadocMetadata()
|
|
350
|
-
|
|
351
|
-
if not extracted_metadata:
|
|
352
|
-
return cast("all_optional_model.DatadocMetadata", existing_metadata)
|
|
353
|
-
|
|
354
|
-
# Use the extracted metadata as a base
|
|
355
|
-
merged_metadata = all_optional_model.DatadocMetadata(
|
|
356
|
-
dataset=copy.deepcopy(extracted_metadata.dataset),
|
|
357
|
-
variables=[],
|
|
358
|
-
)
|
|
359
|
-
|
|
360
|
-
override_dataset_fields(
|
|
361
|
-
merged_metadata=merged_metadata,
|
|
362
|
-
existing_metadata=cast(
|
|
363
|
-
"all_optional_model.DatadocMetadata", existing_metadata
|
|
364
|
-
),
|
|
365
|
-
)
|
|
366
|
-
|
|
367
|
-
# Merge variables.
|
|
368
|
-
# For each extracted variable, copy existing metadata into the merged metadata
|
|
369
|
-
return merge_variables(
|
|
370
|
-
existing_metadata=existing_metadata,
|
|
371
|
-
extracted_metadata=extracted_metadata,
|
|
372
|
-
merged_metadata=merged_metadata,
|
|
373
|
-
)
|
|
374
|
-
|
|
375
229
|
def _extract_metadata_from_existing_document(
|
|
376
230
|
self,
|
|
377
231
|
document: pathlib.Path | CloudPath,
|
|
@@ -591,9 +445,12 @@ class Datadoc:
|
|
|
591
445
|
variable_short_name: str,
|
|
592
446
|
pseudonymization: all_optional_model.Pseudonymization | None = None,
|
|
593
447
|
) -> None:
|
|
594
|
-
"""Adds a new pseudo variable to the list of pseudonymized variables
|
|
448
|
+
"""Adds a new pseudo variable to the list of pseudonymized variables.
|
|
595
449
|
|
|
596
|
-
If
|
|
450
|
+
If `pseudonymization` is not supplied, an empty Pseudonymization structure
|
|
451
|
+
will be created and assigned to the variable.
|
|
452
|
+
If an encryption algorithm is recognized (one of the standard Dapla algorithms), default values are filled
|
|
453
|
+
for any missing fields.
|
|
597
454
|
|
|
598
455
|
Args:
|
|
599
456
|
variable_short_name: The short name for the variable that one wants to update the pseudo for.
|
|
@@ -601,15 +458,15 @@ class Datadoc:
|
|
|
601
458
|
|
|
602
459
|
"""
|
|
603
460
|
variable = self.variables_lookup[variable_short_name]
|
|
604
|
-
|
|
605
|
-
pseudonymization
|
|
606
|
-
|
|
461
|
+
if pseudonymization:
|
|
462
|
+
set_default_values_pseudonymization(variable, pseudonymization)
|
|
463
|
+
else:
|
|
464
|
+
variable.pseudonymization = all_optional_model.Pseudonymization()
|
|
607
465
|
|
|
608
466
|
def remove_pseudonymization(self, variable_short_name: str) -> None:
|
|
609
467
|
"""Removes a pseudo variable by using the shortname.
|
|
610
468
|
|
|
611
469
|
Updates the pseudo variable lookup by creating a new one.
|
|
612
|
-
Sets is_personal_data to non pseudonymized encrypted personal data.
|
|
613
470
|
|
|
614
471
|
Args:
|
|
615
472
|
variable_short_name: The short name for the variable that one wants to remove the pseudo for.
|