dapla-toolbelt-metadata 0.4.2__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dapla-toolbelt-metadata might be problematic. Click here for more details.
- {dapla_toolbelt_metadata-0.4.2 → dapla_toolbelt_metadata-0.6.0}/PKG-INFO +8 -10
- {dapla_toolbelt_metadata-0.4.2 → dapla_toolbelt_metadata-0.6.0}/README.md +0 -2
- {dapla_toolbelt_metadata-0.4.2 → dapla_toolbelt_metadata-0.6.0}/pyproject.toml +29 -25
- dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/__init__.py +15 -0
- dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/dapla/__init__.py +4 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/_shared → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/dapla}/user_info.py +66 -20
- {dapla_toolbelt_metadata-0.4.2 → dapla_toolbelt_metadata-0.6.0}/src/dapla_metadata/datasets/code_list.py +1 -1
- {dapla_toolbelt_metadata-0.4.2 → dapla_toolbelt_metadata-0.6.0}/src/dapla_metadata/datasets/core.py +1 -1
- {dapla_toolbelt_metadata-0.4.2 → dapla_toolbelt_metadata-0.6.0}/src/dapla_metadata/datasets/dapla_dataset_path_info.py +128 -14
- {dapla_toolbelt_metadata-0.4.2 → dapla_toolbelt_metadata-0.6.0}/src/dapla_metadata/datasets/dataset_parser.py +21 -15
- {dapla_toolbelt_metadata-0.4.2 → dapla_toolbelt_metadata-0.6.0}/src/dapla_metadata/datasets/model_backwards_compatibility.py +6 -6
- {dapla_toolbelt_metadata-0.4.2 → dapla_toolbelt_metadata-0.6.0}/src/dapla_metadata/datasets/model_validation.py +2 -2
- {dapla_toolbelt_metadata-0.4.2 → dapla_toolbelt_metadata-0.6.0}/src/dapla_metadata/datasets/utility/constants.py +1 -0
- {dapla_toolbelt_metadata-0.4.2 → dapla_toolbelt_metadata-0.6.0}/src/dapla_metadata/datasets/utility/enums.py +1 -1
- {dapla_toolbelt_metadata-0.4.2 → dapla_toolbelt_metadata-0.6.0}/src/dapla_metadata/datasets/utility/utils.py +8 -12
- dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/standards/__init__.py +4 -0
- dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/standards/name_validator.py +250 -0
- dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/standards/standard_validators.py +98 -0
- dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/standards/utils/__init__.py +1 -0
- dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/standards/utils/constants.py +49 -0
- dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/__init__.py +9 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/.openapi-generator/FILES +0 -5
- dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated/.openapi-generator/VERSION +1 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/__init__.py +0 -5
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/api/__init__.py +0 -1
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/api/data_migration_api.py +2 -2
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/api/draft_variable_definitions_api.py +14 -14
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/api/patches_api.py +15 -15
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/api/validity_periods_api.py +8 -281
- dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated/vardef_client/api/public_api.py → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated/vardef_client/api/variable_definitions_api.py +73 -358
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/models/__init__.py +2 -6
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/models/complete_response.py +8 -32
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/models/contact.py +2 -2
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/models/draft.py +8 -23
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/models/language_string_type.py +7 -6
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/models/owner.py +2 -2
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/models/patch.py +16 -61
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/models/problem.py +2 -2
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/models/update_draft.py +22 -55
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/models/validity_period.py +14 -48
- dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated/vardef_client/models/variable_status.py +33 -0
- dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_utils/__init__.py +1 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_utils}/_client.py +5 -3
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_utils}/config.py +25 -1
- dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_utils/constants.py +41 -0
- dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_utils/descriptions.py +89 -0
- dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_utils/files.py +273 -0
- dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_utils/template_files.py +112 -0
- dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_utils/variable_definition_files.py +93 -0
- dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/exceptions.py +196 -0
- dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/resources/vardef_model_descriptions_nb.yaml +63 -0
- {dapla_toolbelt_metadata-0.4.2 → dapla_toolbelt_metadata-0.6.0}/src/dapla_metadata/variable_definitions/vardef.py +131 -10
- dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/variable_definition.py +420 -0
- dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/__init__.py +0 -6
- dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/__init__.py +0 -7
- dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/exceptions.py +0 -66
- dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated/.openapi-generator/VERSION +0 -1
- dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated/vardef_client/api/variable_definitions_api.py +0 -1205
- dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated/vardef_client/models/klass_reference.py +0 -99
- dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated/vardef_client/models/rendered_contact.py +0 -92
- dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated/vardef_client/models/rendered_variable_definition.py +0 -235
- dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated/vardef_client/models/supported_languages.py +0 -33
- dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated/vardef_client/models/variable_status.py +0 -33
- dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/variable_definition.py +0 -212
- {dapla_toolbelt_metadata-0.4.2 → dapla_toolbelt_metadata-0.6.0}/LICENSE +0 -0
- {dapla_toolbelt_metadata-0.4.2 → dapla_toolbelt_metadata-0.6.0}/src/dapla_metadata/_shared/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.4.2 → dapla_toolbelt_metadata-0.6.0}/src/dapla_metadata/_shared/config.py +0 -0
- {dapla_toolbelt_metadata-0.4.2 → dapla_toolbelt_metadata-0.6.0}/src/dapla_metadata/_shared/enums.py +0 -0
- {dapla_toolbelt_metadata-0.4.2 → dapla_toolbelt_metadata-0.6.0}/src/dapla_metadata/_shared/py.typed +0 -0
- {dapla_toolbelt_metadata-0.4.2 → dapla_toolbelt_metadata-0.6.0}/src/dapla_metadata/datasets/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.4.2 → dapla_toolbelt_metadata-0.6.0}/src/dapla_metadata/datasets/external_sources/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.4.2 → dapla_toolbelt_metadata-0.6.0}/src/dapla_metadata/datasets/external_sources/external_sources.py +0 -0
- {dapla_toolbelt_metadata-0.4.2 → dapla_toolbelt_metadata-0.6.0}/src/dapla_metadata/datasets/py.typed +0 -0
- {dapla_toolbelt_metadata-0.4.2 → dapla_toolbelt_metadata-0.6.0}/src/dapla_metadata/datasets/statistic_subject_mapping.py +0 -0
- {dapla_toolbelt_metadata-0.4.2 → dapla_toolbelt_metadata-0.6.0}/src/dapla_metadata/datasets/utility/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/.openapi-generator-ignore +0 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/README.md +0 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/__init__.py +0 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/api_client.py +0 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/api_response.py +0 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/configuration.py +0 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/docs/CompleteResponse.md +0 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/docs/Contact.md +0 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/docs/DataMigrationApi.md +0 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/docs/Draft.md +0 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/docs/DraftVariableDefinitionsApi.md +0 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/docs/LanguageStringType.md +0 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/docs/Owner.md +0 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/docs/Patch.md +0 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/docs/PatchesApi.md +0 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/docs/PublicApi.md +0 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/docs/SupportedLanguages.md +0 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/docs/UpdateDraft.md +0 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/docs/ValidityPeriod.md +0 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/docs/ValidityPeriodsApi.md +0 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/docs/VariableDefinitionsApi.md +0 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/docs/VariableStatus.md +0 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/exceptions.py +0 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/py.typed +0 -0
- {dapla_toolbelt_metadata-0.4.2/src/dapla_metadata/variable_definitions/generated → dapla_toolbelt_metadata-0.6.0/src/dapla_metadata/variable_definitions/_generated}/vardef_client/rest.py +0 -0
|
@@ -1,34 +1,34 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: dapla-toolbelt-metadata
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Dapla Toolbelt Metadata
|
|
5
|
-
Home-page: https://github.com/statisticsnorway/dapla-toolbelt-metadata
|
|
6
5
|
License: MIT
|
|
7
6
|
Author: Team Metadata
|
|
8
7
|
Author-email: metadata@ssb.no
|
|
9
|
-
Requires-Python: >=3.
|
|
8
|
+
Requires-Python: >=3.11,<4.0
|
|
10
9
|
Classifier: Development Status :: 4 - Beta
|
|
11
10
|
Classifier: License :: OSI Approved :: MIT License
|
|
12
11
|
Classifier: Programming Language :: Python :: 3
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
14
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
15
13
|
Classifier: Programming Language :: Python :: 3.12
|
|
16
14
|
Classifier: Programming Language :: Python :: 3.13
|
|
17
15
|
Requires-Dist: arrow (>=1.3.0)
|
|
18
16
|
Requires-Dist: beautifulsoup4 (>=4.12.3)
|
|
19
17
|
Requires-Dist: cloudpathlib[gs] (>=0.17.0)
|
|
20
|
-
Requires-Dist:
|
|
21
|
-
Requires-Dist:
|
|
18
|
+
Requires-Dist: google-auth (>=2.38.0)
|
|
19
|
+
Requires-Dist: lxml (>=5.3.1)
|
|
22
20
|
Requires-Dist: pyarrow (>=8.0.0)
|
|
23
21
|
Requires-Dist: pydantic (>=2.5.2)
|
|
24
22
|
Requires-Dist: pyjwt (>=2.8.0)
|
|
25
23
|
Requires-Dist: python-dotenv (>=1.0.1)
|
|
26
24
|
Requires-Dist: requests (>=2.31.0)
|
|
25
|
+
Requires-Dist: ruamel-yaml (>=0.18.10)
|
|
27
26
|
Requires-Dist: ssb-datadoc-model (>=6.0.0,<7.0.0)
|
|
28
|
-
Requires-Dist: ssb-klass-python (>=
|
|
27
|
+
Requires-Dist: ssb-klass-python (>=1.0.1)
|
|
29
28
|
Requires-Dist: typing-extensions (>=4.12.2)
|
|
30
29
|
Project-URL: Changelog, https://github.com/statisticsnorway/dapla-toolbelt-metadata/releases
|
|
31
30
|
Project-URL: Documentation, https://statisticsnorway.github.io/dapla-toolbelt-metadata
|
|
31
|
+
Project-URL: Homepage, https://github.com/statisticsnorway/dapla-toolbelt-metadata
|
|
32
32
|
Project-URL: Repository, https://github.com/statisticsnorway/dapla-toolbelt-metadata
|
|
33
33
|
Description-Content-Type: text/markdown
|
|
34
34
|
|
|
@@ -45,7 +45,6 @@ Description-Content-Type: text/markdown
|
|
|
45
45
|
[][sonarquality]
|
|
46
46
|
|
|
47
47
|
[][pre-commit]
|
|
48
|
-
[][black]
|
|
49
48
|
[](https://github.com/astral-sh/ruff)
|
|
50
49
|
[][poetry]
|
|
51
50
|
|
|
@@ -56,7 +55,6 @@ Description-Content-Type: text/markdown
|
|
|
56
55
|
[sonarcov]: https://sonarcloud.io/summary/overall?id=statisticsnorway_dapla-toolbelt-metadata
|
|
57
56
|
[sonarquality]: https://sonarcloud.io/summary/overall?id=statisticsnorway_dapla-toolbelt-metadata
|
|
58
57
|
[pre-commit]: https://github.com/pre-commit/pre-commit
|
|
59
|
-
[black]: https://github.com/psf/black
|
|
60
58
|
[poetry]: https://python-poetry.org/
|
|
61
59
|
|
|
62
60
|
Tools and clients for working with the Dapla Metadata system.
|
|
@@ -11,7 +11,6 @@
|
|
|
11
11
|
[][sonarquality]
|
|
12
12
|
|
|
13
13
|
[][pre-commit]
|
|
14
|
-
[][black]
|
|
15
14
|
[](https://github.com/astral-sh/ruff)
|
|
16
15
|
[][poetry]
|
|
17
16
|
|
|
@@ -22,7 +21,6 @@
|
|
|
22
21
|
[sonarcov]: https://sonarcloud.io/summary/overall?id=statisticsnorway_dapla-toolbelt-metadata
|
|
23
22
|
[sonarquality]: https://sonarcloud.io/summary/overall?id=statisticsnorway_dapla-toolbelt-metadata
|
|
24
23
|
[pre-commit]: https://github.com/pre-commit/pre-commit
|
|
25
|
-
[black]: https://github.com/psf/black
|
|
26
24
|
[poetry]: https://python-poetry.org/
|
|
27
25
|
|
|
28
26
|
Tools and clients for working with the Dapla Metadata system.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "dapla-toolbelt-metadata"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.6.0"
|
|
4
4
|
description = "Dapla Toolbelt Metadata"
|
|
5
5
|
authors = ["Team Metadata <metadata@ssb.no>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -16,23 +16,23 @@ packages = [{ include = "dapla_metadata", from = "src" }]
|
|
|
16
16
|
Changelog = "https://github.com/statisticsnorway/dapla-toolbelt-metadata/releases"
|
|
17
17
|
|
|
18
18
|
[tool.poetry.dependencies]
|
|
19
|
-
python = ">=3.
|
|
19
|
+
python = ">=3.11,<4.0"
|
|
20
20
|
pyarrow = ">=8.0.0"
|
|
21
21
|
pydantic = ">=2.5.2"
|
|
22
|
-
pandas = ">=1.4.2"
|
|
23
|
-
dapla-toolbelt = ">=3.0.1"
|
|
24
22
|
arrow = ">=1.3.0"
|
|
25
23
|
python-dotenv = ">=1.0.1"
|
|
26
24
|
requests = ">=2.31.0"
|
|
27
25
|
beautifulsoup4 = ">=4.12.3"
|
|
28
26
|
cloudpathlib = { extras = ["gs"], version = ">=0.17.0" }
|
|
29
27
|
pyjwt = ">=2.8.0"
|
|
30
|
-
ssb-klass-python = ">=
|
|
28
|
+
ssb-klass-python = ">=1.0.1"
|
|
31
29
|
ssb-datadoc-model = "^6.0.0"
|
|
32
30
|
typing-extensions = ">=4.12.2"
|
|
31
|
+
ruamel-yaml = ">=0.18.10"
|
|
32
|
+
google-auth = ">=2.38.0"
|
|
33
|
+
lxml = ">=5.3.1"
|
|
33
34
|
|
|
34
35
|
[tool.poetry.group.dev.dependencies]
|
|
35
|
-
black = ">=24.8.0"
|
|
36
36
|
pygments = ">=2.18.0"
|
|
37
37
|
coverage = { extras = ["toml"], version = ">=6.2" }
|
|
38
38
|
faker = ">=26.1.0"
|
|
@@ -48,8 +48,8 @@ typeguard = ">=2.13.3"
|
|
|
48
48
|
xdoctest = { extras = ["colors"], version = ">=0.15.10" }
|
|
49
49
|
myst-parser = { version = ">=0.16.1" }
|
|
50
50
|
mypy = ">=0.950"
|
|
51
|
+
pandas = ">=1.4.2"
|
|
51
52
|
pytest-cov = ">=3.0.0"
|
|
52
|
-
nbstripout = "^0.8.1"
|
|
53
53
|
python-kacl = "*"
|
|
54
54
|
pytest-mock = ">=3.14.0"
|
|
55
55
|
deptry = ">=0.12.0"
|
|
@@ -59,11 +59,13 @@ requests-mock = ">=1.12.1"
|
|
|
59
59
|
types-Pygments = "*"
|
|
60
60
|
types-colorama = "*"
|
|
61
61
|
types-setuptools = "*"
|
|
62
|
+
types-requests = "*"
|
|
62
63
|
types-beautifulsoup4 = ">=4.12.0.20240511"
|
|
63
64
|
ipykernel = "^6.29.5"
|
|
64
65
|
rich = "^13.9.4"
|
|
65
66
|
bpython = "^0.24"
|
|
66
67
|
testcontainers = { version = "^4.8.2", extras = ["generic"] }
|
|
68
|
+
pytest-asyncio = "^0.26.0"
|
|
67
69
|
|
|
68
70
|
[tool.pytest.ini_options]
|
|
69
71
|
pythonpath = ["src"]
|
|
@@ -78,19 +80,14 @@ source = ["dapla_metadata"]
|
|
|
78
80
|
omit = [
|
|
79
81
|
"tests/*",
|
|
80
82
|
"__init__.py",
|
|
81
|
-
"*/dapla_metadata/variable_definitions/
|
|
83
|
+
"*/dapla_metadata/variable_definitions/_generated/*",
|
|
82
84
|
]
|
|
83
85
|
relative_files = true
|
|
84
86
|
|
|
85
87
|
[tool.coverage.report]
|
|
86
88
|
show_missing = true
|
|
87
89
|
fail_under = 80
|
|
88
|
-
omit = ["*/dapla_metadata/variable_definitions/
|
|
89
|
-
|
|
90
|
-
[tool.black]
|
|
91
|
-
# We don't want to format generated files because it creates large diffs
|
|
92
|
-
# when generated code is updated.
|
|
93
|
-
extend-exclude = "/generated/"
|
|
90
|
+
omit = ["*/dapla_metadata/variable_definitions/_generated/*"]
|
|
94
91
|
|
|
95
92
|
[tool.mypy]
|
|
96
93
|
plugins = ["pydantic.mypy"]
|
|
@@ -100,10 +97,10 @@ pretty = true
|
|
|
100
97
|
show_column_numbers = true
|
|
101
98
|
show_error_context = true
|
|
102
99
|
# Generated code is not high quality, just ignore the problems there
|
|
103
|
-
exclude = "
|
|
100
|
+
exclude = "_generated/vardef_client/.*"
|
|
104
101
|
|
|
105
102
|
[[tool.mypy.overrides]]
|
|
106
|
-
module = ["dapla_metadata.variable_definitions.
|
|
103
|
+
module = ["dapla_metadata.variable_definitions._generated.vardef_client.*"]
|
|
107
104
|
warn_unreachable = false
|
|
108
105
|
ignore_errors = true
|
|
109
106
|
disable_error_code = ["unreachable"]
|
|
@@ -121,8 +118,8 @@ module = [
|
|
|
121
118
|
"datadoc_model.model",
|
|
122
119
|
"pytest_mock",
|
|
123
120
|
"testcontainers.*",
|
|
124
|
-
"yaml",
|
|
125
121
|
"httpx",
|
|
122
|
+
"ruamel.*",
|
|
126
123
|
]
|
|
127
124
|
ignore_missing_imports = true
|
|
128
125
|
|
|
@@ -141,7 +138,7 @@ disable_error_code = [
|
|
|
141
138
|
force-exclude = true # Apply excludes to pre-commit
|
|
142
139
|
show-fixes = true
|
|
143
140
|
src = ["src", "tests"]
|
|
144
|
-
target-version = "
|
|
141
|
+
target-version = "py311" # Minimum Python version supported
|
|
145
142
|
include = ["*.py", "*.pyi", "**/pyproject.toml", "*.ipynb"]
|
|
146
143
|
extend-exclude = [
|
|
147
144
|
"__pycache__",
|
|
@@ -149,19 +146,23 @@ extend-exclude = [
|
|
|
149
146
|
".ipynb_checkpoints",
|
|
150
147
|
"noxfile.py",
|
|
151
148
|
"docs/conf.py",
|
|
149
|
+
"_generated/",
|
|
152
150
|
]
|
|
153
151
|
|
|
154
152
|
# Ruff rules may be customized as desired: https://docs.astral.sh/ruff/rules/
|
|
155
153
|
[tool.ruff.lint]
|
|
156
154
|
select = ["ALL"]
|
|
157
155
|
ignore = [
|
|
158
|
-
"
|
|
159
|
-
"
|
|
160
|
-
"
|
|
161
|
-
"
|
|
162
|
-
"
|
|
163
|
-
"
|
|
164
|
-
"
|
|
156
|
+
"ANN202", # Don't requiere return type annotation for private functions.
|
|
157
|
+
"ANN401", # Allow type annotation with type Any.
|
|
158
|
+
"COM812", # Suggested to ignore when using ruff format
|
|
159
|
+
"D100", # Supress undocumented-public-module. Only doc of public api required.
|
|
160
|
+
"FBT001", # Allow boolean positional arguments in a function.
|
|
161
|
+
"FBT002", # Allow boolean default positional arguments in a function.
|
|
162
|
+
"FIX002", # Don't fail on TODO as long as it's documented correctly.
|
|
163
|
+
"E402", # Supress module-import-not-at-top-of-file, needed in jupyter notebooks.
|
|
164
|
+
"E501", # Supress line-too-long warnings: trust black's judgement on this one.
|
|
165
|
+
"PLR2004", # Allow to compare with unnamed numerical constants.
|
|
165
166
|
]
|
|
166
167
|
|
|
167
168
|
[tool.ruff.lint.isort]
|
|
@@ -176,6 +177,9 @@ convention = "google" # You can also use "numpy".
|
|
|
176
177
|
[tool.ruff.lint.flake8-pytest-style]
|
|
177
178
|
fixture-parentheses = false
|
|
178
179
|
|
|
180
|
+
[tool.ruff.lint.pylint]
|
|
181
|
+
max-args = 8
|
|
182
|
+
|
|
179
183
|
[tool.ruff.lint.pep8-naming]
|
|
180
184
|
classmethod-decorators = [
|
|
181
185
|
"classmethod",
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Tools and clients for working with the Dapla Metadata system."""
|
|
2
|
+
|
|
3
|
+
import warnings
|
|
4
|
+
|
|
5
|
+
warnings.filterwarnings(
|
|
6
|
+
"ignore",
|
|
7
|
+
message="As the c extension couldn't be imported, `google-crc32c` is using a pure python implementation that is significantly slower.",
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
import datadoc_model.model as datadoc_model
|
|
11
|
+
|
|
12
|
+
from . import dapla
|
|
13
|
+
from . import datasets
|
|
14
|
+
from . import standards
|
|
15
|
+
from . import variable_definitions
|
|
@@ -13,9 +13,6 @@ from dapla_metadata._shared.enums import DaplaService
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
PLACEHOLDER_EMAIL_ADDRESS = "default_user@ssb.no"
|
|
17
|
-
|
|
18
|
-
|
|
19
16
|
class UserInfo(Protocol):
|
|
20
17
|
"""Information about the current user.
|
|
21
18
|
|
|
@@ -27,6 +24,16 @@ class UserInfo(Protocol):
|
|
|
27
24
|
"""Get the short email address."""
|
|
28
25
|
...
|
|
29
26
|
|
|
27
|
+
@property
|
|
28
|
+
def current_group(self) -> str:
|
|
29
|
+
"""Get the group which the user is currently representing."""
|
|
30
|
+
...
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def current_team(self) -> str:
|
|
34
|
+
"""Get the team which the user is currently representing."""
|
|
35
|
+
...
|
|
36
|
+
|
|
30
37
|
|
|
31
38
|
class UnknownUserInfo:
|
|
32
39
|
"""Fallback when no implementation is found."""
|
|
@@ -36,14 +43,38 @@ class UnknownUserInfo:
|
|
|
36
43
|
"""Unknown email address."""
|
|
37
44
|
return None
|
|
38
45
|
|
|
46
|
+
@property
|
|
47
|
+
def current_group(self) -> str:
|
|
48
|
+
"""Get the group which the user is currently representing."""
|
|
49
|
+
return ""
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def current_team(self) -> str:
|
|
53
|
+
"""Get the team which the user is currently representing."""
|
|
54
|
+
return ""
|
|
55
|
+
|
|
39
56
|
|
|
40
57
|
class TestUserInfo:
|
|
41
58
|
"""Information about the current user for local development and testing."""
|
|
42
59
|
|
|
60
|
+
PLACEHOLDER_EMAIL_ADDRESS = "default_user@ssb.no"
|
|
61
|
+
PLACEHOLDER_GROUP = "default-team-developers"
|
|
62
|
+
PLACEHOLDER_TEAM = "default-team"
|
|
63
|
+
|
|
43
64
|
@property
|
|
44
65
|
def short_email(self) -> str | None:
|
|
45
66
|
"""Get the short email address."""
|
|
46
|
-
return PLACEHOLDER_EMAIL_ADDRESS
|
|
67
|
+
return TestUserInfo.PLACEHOLDER_EMAIL_ADDRESS
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def current_group(self) -> str | None:
|
|
71
|
+
"""Get the group which the user is currently representing."""
|
|
72
|
+
return TestUserInfo.PLACEHOLDER_GROUP
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def current_team(self) -> str | None:
|
|
76
|
+
"""Get the team which the user is currently representing."""
|
|
77
|
+
return TestUserInfo.PLACEHOLDER_TEAM
|
|
47
78
|
|
|
48
79
|
|
|
49
80
|
class DaplaLabUserInfo:
|
|
@@ -65,6 +96,19 @@ class DaplaLabUserInfo:
|
|
|
65
96
|
)
|
|
66
97
|
return None
|
|
67
98
|
|
|
99
|
+
@property
|
|
100
|
+
def current_group(self) -> str:
|
|
101
|
+
"""Get the group which the user is currently representing."""
|
|
102
|
+
if group := config.get_dapla_group_context():
|
|
103
|
+
return group
|
|
104
|
+
msg = "DAPLA_GROUP_CONTEXT environment variable not found"
|
|
105
|
+
raise OSError(msg)
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
def current_team(self) -> str:
|
|
109
|
+
"""Get the team which the user is currently representing."""
|
|
110
|
+
return parse_team_name(self.current_group)
|
|
111
|
+
|
|
68
112
|
|
|
69
113
|
class JupyterHubUserInfo:
|
|
70
114
|
"""Information about the current user when running on JupyterHub."""
|
|
@@ -74,6 +118,16 @@ class JupyterHubUserInfo:
|
|
|
74
118
|
"""Get the short email address."""
|
|
75
119
|
return config.get_jupyterhub_user()
|
|
76
120
|
|
|
121
|
+
@property
|
|
122
|
+
def current_group(self) -> str:
|
|
123
|
+
"""Get the group which the user is currently representing."""
|
|
124
|
+
raise NotImplementedError
|
|
125
|
+
|
|
126
|
+
@property
|
|
127
|
+
def current_team(self) -> str:
|
|
128
|
+
"""Get the team which the user is currently representing."""
|
|
129
|
+
raise NotImplementedError
|
|
130
|
+
|
|
77
131
|
|
|
78
132
|
def get_user_info_for_current_platform() -> UserInfo:
|
|
79
133
|
"""Return the correct implementation of UserInfo for the current platform."""
|
|
@@ -88,28 +142,20 @@ def get_user_info_for_current_platform() -> UserInfo:
|
|
|
88
142
|
return UnknownUserInfo()
|
|
89
143
|
|
|
90
144
|
|
|
91
|
-
def get_owner() -> str:
|
|
92
|
-
"""Returns the owner read from the GROUP_CONTEXT environment variable."""
|
|
93
|
-
if group := config.get_dapla_group_context():
|
|
94
|
-
return parse_team_name(group)
|
|
95
|
-
msg = "DAPLA_GROUP_CONTEXT environment variable not found"
|
|
96
|
-
raise OSError(msg)
|
|
97
|
-
|
|
98
|
-
|
|
99
145
|
def parse_team_name(group: str) -> str:
|
|
100
146
|
"""Parses the group to get the current team.
|
|
101
147
|
|
|
102
|
-
>>> parse_team_name(dapla-metadata-developers)
|
|
103
|
-
|
|
148
|
+
>>> parse_team_name("dapla-metadata-developers")
|
|
149
|
+
'dapla-metadata'
|
|
104
150
|
|
|
105
|
-
>>> parse_team_name(dapla-metadata-data-admins)
|
|
106
|
-
|
|
151
|
+
>>> parse_team_name("dapla-metadata-data-admins")
|
|
152
|
+
'dapla-metadata'
|
|
107
153
|
|
|
108
|
-
>>> parse_team_name(dapla-metadata)
|
|
109
|
-
|
|
154
|
+
>>> parse_team_name("dapla-metadata")
|
|
155
|
+
'dapla'
|
|
110
156
|
|
|
111
|
-
>>> parse_team_name(dapla-metadata-not-real-name)
|
|
112
|
-
|
|
157
|
+
>>> parse_team_name("dapla-metadata-not-real-name")
|
|
158
|
+
'dapla-metadata-not-real'
|
|
113
159
|
"""
|
|
114
160
|
parts = group.split("-")
|
|
115
161
|
return "-".join(parts[:-2] if group.endswith("data-admins") else parts[:-1])
|
{dapla_toolbelt_metadata-0.4.2 → dapla_toolbelt_metadata-0.6.0}/src/dapla_metadata/datasets/core.py
RENAMED
|
@@ -14,7 +14,7 @@ from datadoc_model import model
|
|
|
14
14
|
from datadoc_model.model import DataSetStatus
|
|
15
15
|
|
|
16
16
|
from dapla_metadata._shared import config
|
|
17
|
-
from dapla_metadata.
|
|
17
|
+
from dapla_metadata.dapla import user_info
|
|
18
18
|
from dapla_metadata.datasets.dapla_dataset_path_info import DaplaDatasetPathInfo
|
|
19
19
|
from dapla_metadata.datasets.dataset_parser import DatasetParser
|
|
20
20
|
from dapla_metadata.datasets.model_backwards_compatibility import (
|
|
@@ -478,7 +478,7 @@ class DaplaDatasetPathInfo:
|
|
|
478
478
|
"""Extract the bucket name from the dataset path.
|
|
479
479
|
|
|
480
480
|
Returns:
|
|
481
|
-
The bucket name or None if the dataset path is not a GCS path.
|
|
481
|
+
The bucket name or None if the dataset path is not a GCS path nor ssb bucketeer path.
|
|
482
482
|
|
|
483
483
|
Examples:
|
|
484
484
|
>>> DaplaDatasetPathInfo('gs://ssb-staging-dapla-felles-data-delt/datadoc/utdata/person_data_p2021_v2.parquet').bucket_name
|
|
@@ -492,17 +492,35 @@ class DaplaDatasetPathInfo:
|
|
|
492
492
|
|
|
493
493
|
>>> DaplaDatasetPathInfo('ssb-staging-dapla-felles-data-delt/datadoc/utdata/person_data_p2021_v2.parquet').bucket_name
|
|
494
494
|
None
|
|
495
|
+
|
|
496
|
+
>>> DaplaDatasetPathInfo('ssb-staging-dapla-felles-data-delt/datadoc/utdata/person_data_p2021_v2.parquet').bucket_name
|
|
497
|
+
None
|
|
498
|
+
|
|
499
|
+
>>> DaplaDatasetPathInfo('buckets/ssb-staging-dapla-felles-data-delt/stat/utdata/person_data_p2021_v2.parquet').bucket_name
|
|
500
|
+
ssb-staging-dapla-felles-data-delt
|
|
501
|
+
|
|
502
|
+
>>> DaplaDatasetPathInfo('buckets/ssb-staging-dapla-felles-data-delt/person_data_p2021_v2.parquet').bucket_name
|
|
503
|
+
ssb-staging-dapla-felles-data-delt
|
|
504
|
+
|
|
505
|
+
>>> DaplaDatasetPathInfo('home/work/buckets/ssb-staging-dapla-felles-produkt/stat/utdata/person_data_p2021_v2.parquet').bucket_name
|
|
506
|
+
ssb-staging-dapla-felles-produkt
|
|
495
507
|
"""
|
|
496
508
|
prefix: str | None = None
|
|
497
|
-
|
|
509
|
+
dataset_string = str(self.dataset_string)
|
|
510
|
+
if GSPath.cloud_prefix in self.dataset_string:
|
|
498
511
|
prefix = GSPath.cloud_prefix
|
|
499
|
-
|
|
512
|
+
_, bucket_and_rest = dataset_string.split(prefix, 1)
|
|
513
|
+
elif GS_PREFIX_FROM_PATHLIB in self.dataset_string:
|
|
500
514
|
prefix = GS_PREFIX_FROM_PATHLIB
|
|
515
|
+
_, bucket_and_rest = self.dataset_string.split(prefix, 1)
|
|
516
|
+
elif "buckets/" in self.dataset_string:
|
|
517
|
+
prefix = "buckets/"
|
|
518
|
+
_, bucket_and_rest = self.dataset_string.split(prefix, 1)
|
|
501
519
|
else:
|
|
502
520
|
return None
|
|
503
521
|
|
|
504
522
|
return pathlib.Path(
|
|
505
|
-
|
|
523
|
+
bucket_and_rest,
|
|
506
524
|
).parts[0]
|
|
507
525
|
|
|
508
526
|
@property
|
|
@@ -528,6 +546,15 @@ class DaplaDatasetPathInfo:
|
|
|
528
546
|
|
|
529
547
|
>>> DaplaDatasetPathInfo('my_data/simple_dataset_name.parquet').dataset_short_name
|
|
530
548
|
simple_dataset_name
|
|
549
|
+
|
|
550
|
+
>>> DaplaDatasetPathInfo('gs:/ssb-staging-dapla-felles-data-delt/datadoc/utdata/person_data_p2021_v2.parquet').dataset_short_name
|
|
551
|
+
person_data
|
|
552
|
+
|
|
553
|
+
>>> DaplaDatasetPathInfo('buckets/ssb-staging-dapla-felles-data-delt/stat/utdata/folk_data_p2021_v2.parquet').dataset_short_name
|
|
554
|
+
folk_data
|
|
555
|
+
|
|
556
|
+
>>> DaplaDatasetPathInfo('buckets/ssb-staging-dapla-felles-data-delt/stat/utdata/dapla/bus_p2021_v2.parquet').dataset_short_name
|
|
557
|
+
bus
|
|
531
558
|
"""
|
|
532
559
|
if self.contains_data_from or self.contains_data_until:
|
|
533
560
|
short_name_sections = self.dataset_name_sections[
|
|
@@ -601,6 +628,9 @@ class DaplaDatasetPathInfo:
|
|
|
601
628
|
>>> DaplaDatasetPathInfo('utdata/min_statistikk/person_data_v1.parquet').dataset_state
|
|
602
629
|
<DataSetState.OUTPUT_DATA: 'OUTPUT_DATA'>
|
|
603
630
|
|
|
631
|
+
>>> DaplaDatasetPathInfo('buckets/bucket_name/stat_name/inndata/min_statistikk/person_data_v1.parquet').dataset_state
|
|
632
|
+
<DataSetState.INPUT_DATA: 'INPUT_DATA'>
|
|
633
|
+
|
|
604
634
|
>>> DaplaDatasetPathInfo('my_special_data/person_data_v1.parquet').dataset_state
|
|
605
635
|
None
|
|
606
636
|
"""
|
|
@@ -632,6 +662,12 @@ class DaplaDatasetPathInfo:
|
|
|
632
662
|
|
|
633
663
|
>>> DaplaDatasetPathInfo('person_data.parquet').dataset_version
|
|
634
664
|
None
|
|
665
|
+
|
|
666
|
+
>>> DaplaDatasetPathInfo('buckets/bucket_name/stat_name/inndata/min_statistikk/person_data_v1.parquet').dataset_version
|
|
667
|
+
'1'
|
|
668
|
+
|
|
669
|
+
>>> DaplaDatasetPathInfo('buckets/bucket_name/stat_name/inndata/min_statistikk/person_data.parquet').dataset_version
|
|
670
|
+
None
|
|
635
671
|
"""
|
|
636
672
|
minimum_elements_in_file_name: Final[int] = 2
|
|
637
673
|
minimum_characters_in_version_string: Final[int] = 2
|
|
@@ -645,13 +681,37 @@ class DaplaDatasetPathInfo:
|
|
|
645
681
|
return last_filename_element[1:]
|
|
646
682
|
return None
|
|
647
683
|
|
|
684
|
+
def _get_left_parts(
|
|
685
|
+
self,
|
|
686
|
+
dataset_path_parts: list[str],
|
|
687
|
+
state_index: int,
|
|
688
|
+
) -> list[str]:
|
|
689
|
+
"""Retrieve the path parts before the dataset state, considering bucket prefixes."""
|
|
690
|
+
bucket_prefix = {"gs:", "buckets"}
|
|
691
|
+
left_parts = dataset_path_parts[:state_index]
|
|
692
|
+
|
|
693
|
+
# Stop checking beyond the bucket prefix
|
|
694
|
+
prefix_intersection = bucket_prefix & set(left_parts)
|
|
695
|
+
if prefix_intersection:
|
|
696
|
+
first_prefix = min(
|
|
697
|
+
left_parts.index(prefix) for prefix in prefix_intersection
|
|
698
|
+
)
|
|
699
|
+
left_parts = left_parts[first_prefix:]
|
|
700
|
+
|
|
701
|
+
return (
|
|
702
|
+
[]
|
|
703
|
+
if left_parts == ["/"]
|
|
704
|
+
or (left_parts[0] in bucket_prefix and len(left_parts) <= 2)
|
|
705
|
+
else left_parts
|
|
706
|
+
)
|
|
707
|
+
|
|
648
708
|
@property
|
|
649
709
|
def statistic_short_name(
|
|
650
710
|
self,
|
|
651
711
|
) -> str | None:
|
|
652
712
|
"""Extract the statistical short name from the filepath.
|
|
653
713
|
|
|
654
|
-
Extract the statistical short name from the filepath right before the
|
|
714
|
+
Extract the statistical short name from the filepath either after bucket name or right before the
|
|
655
715
|
dataset state based on the Dapla filepath naming convention.
|
|
656
716
|
|
|
657
717
|
Returns:
|
|
@@ -662,21 +722,75 @@ class DaplaDatasetPathInfo:
|
|
|
662
722
|
>>> DaplaDatasetPathInfo('prosjekt/befolkning/klargjorte_data/person_data_v1.parquet').statistic_short_name
|
|
663
723
|
befolkning
|
|
664
724
|
|
|
725
|
+
>>> DaplaDatasetPathInfo('buckets/prosjekt/befolkning/person_data_v1.parquet').statistic_short_name
|
|
726
|
+
befolkning
|
|
727
|
+
|
|
665
728
|
>>> DaplaDatasetPathInfo('befolkning/inndata/person_data_v1.parquet').statistic_short_name
|
|
666
729
|
befolkning
|
|
667
730
|
|
|
731
|
+
>>> DaplaDatasetPathInfo('buckets/bucket_name/stat_name/inndata/min_statistikk/person_data.parquet').statistic_short_name
|
|
732
|
+
stat_name
|
|
733
|
+
|
|
734
|
+
>>> DaplaDatasetPathInfo('buckets/stat_name/utdata/person_data.parquet').statistic_short_name
|
|
735
|
+
None
|
|
736
|
+
|
|
668
737
|
>>> DaplaDatasetPathInfo('befolkning/person_data.parquet').statistic_short_name
|
|
669
738
|
None
|
|
739
|
+
|
|
740
|
+
>>> DaplaDatasetPathInfo('buckets/produkt/befolkning/utdata/person_data.parquet').statistic_short_name
|
|
741
|
+
befolkning
|
|
742
|
+
|
|
743
|
+
>>> DaplaDatasetPathInfo('resources/buckets/produkt/befolkning/utdata/person_data.parquet').statistic_short_name
|
|
744
|
+
befolkning
|
|
745
|
+
|
|
746
|
+
>>> DaplaDatasetPathInfo('gs://statistikk/produkt/klargjorte-data/persondata_p1990-Q1_p2023-Q4_v1/aar=2019/data.parquet').statistic_short_name
|
|
747
|
+
produkt
|
|
748
|
+
|
|
749
|
+
>>> DaplaDatasetPathInfo('gs://statistikk/produkt/persondata_p1990-Q1_p2023-Q4_v1/aar=2019/data.parquet').statistic_short_name
|
|
750
|
+
None
|
|
751
|
+
|
|
752
|
+
>>> DaplaDatasetPathInfo('buckets/ssb-staging-dapla-felles-data-delt/person_data_p2021_v2.parquet').statistic_short_name
|
|
753
|
+
None
|
|
670
754
|
"""
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
755
|
+
if not self.dataset_state:
|
|
756
|
+
if self.bucket_name:
|
|
757
|
+
parts = self.dataset_path.parent.parts
|
|
758
|
+
|
|
759
|
+
if self.bucket_name not in parts:
|
|
760
|
+
return None
|
|
761
|
+
|
|
762
|
+
# Find the index of bucket_name in the path
|
|
763
|
+
bucket_name_index = self.dataset_path.parent.parts.index(
|
|
764
|
+
self.bucket_name,
|
|
765
|
+
)
|
|
766
|
+
|
|
767
|
+
# If there are parts after bucket_name, return the part immediately after it
|
|
768
|
+
if len(self.dataset_path.parent.parts) > bucket_name_index + 1:
|
|
769
|
+
return self.dataset_path.parent.parts[bucket_name_index + 1]
|
|
770
|
+
|
|
771
|
+
return None
|
|
772
|
+
|
|
773
|
+
dataset_state_names = self._extract_norwegian_dataset_state_path_part(
|
|
774
|
+
self.dataset_state,
|
|
775
|
+
)
|
|
776
|
+
dataset_path_parts = list(self.dataset_path.parts)
|
|
777
|
+
|
|
778
|
+
for state in dataset_state_names:
|
|
779
|
+
if state not in dataset_path_parts:
|
|
780
|
+
continue
|
|
781
|
+
|
|
782
|
+
index = dataset_path_parts.index(state)
|
|
783
|
+
|
|
784
|
+
if index == 0:
|
|
785
|
+
continue
|
|
786
|
+
|
|
787
|
+
left_parts = self._get_left_parts(dataset_path_parts, index)
|
|
788
|
+
|
|
789
|
+
if not left_parts:
|
|
790
|
+
return None
|
|
791
|
+
|
|
792
|
+
return dataset_path_parts[index - 1]
|
|
793
|
+
|
|
680
794
|
return None
|
|
681
795
|
|
|
682
796
|
def path_complies_with_naming_standard(self) -> bool:
|