dapla-toolbelt-metadata 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dapla-toolbelt-metadata might be problematic. Click here for more details.
- dapla_toolbelt_metadata-0.1.1/LICENSE +21 -0
- dapla_toolbelt_metadata-0.1.1/PKG-INFO +125 -0
- dapla_toolbelt_metadata-0.1.1/README.md +78 -0
- dapla_toolbelt_metadata-0.1.1/pyproject.toml +189 -0
- dapla_toolbelt_metadata-0.1.1/src/dataset/__init__.py +11 -0
- dapla_toolbelt_metadata-0.1.1/src/dataset/code_list.py +244 -0
- dapla_toolbelt_metadata-0.1.1/src/dataset/config.py +151 -0
- dapla_toolbelt_metadata-0.1.1/src/dataset/core.py +543 -0
- dapla_toolbelt_metadata-0.1.1/src/dataset/dapla_dataset_path_info.py +685 -0
- dapla_toolbelt_metadata-0.1.1/src/dataset/dataset_parser.py +241 -0
- dapla_toolbelt_metadata-0.1.1/src/dataset/external_sources/__init__.py +1 -0
- dapla_toolbelt_metadata-0.1.1/src/dataset/external_sources/external_sources.py +87 -0
- dapla_toolbelt_metadata-0.1.1/src/dataset/model_backwards_compatibility.py +520 -0
- dapla_toolbelt_metadata-0.1.1/src/dataset/model_validation.py +188 -0
- dapla_toolbelt_metadata-0.1.1/src/dataset/py.typed +0 -0
- dapla_toolbelt_metadata-0.1.1/src/dataset/statistic_subject_mapping.py +182 -0
- dapla_toolbelt_metadata-0.1.1/src/dataset/user_info.py +88 -0
- dapla_toolbelt_metadata-0.1.1/src/dataset/utility/__init__.py +1 -0
- dapla_toolbelt_metadata-0.1.1/src/dataset/utility/constants.py +92 -0
- dapla_toolbelt_metadata-0.1.1/src/dataset/utility/enums.py +35 -0
- dapla_toolbelt_metadata-0.1.1/src/dataset/utility/utils.py +405 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright © 2024 Statistics Norway
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: dapla-toolbelt-metadata
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Dapla Toolbelt Metadata
|
|
5
|
+
Home-page: https://github.com/statisticsnorway/dapla-toolbelt-metadata
|
|
6
|
+
License: MIT
|
|
7
|
+
Author: Team Metadata
|
|
8
|
+
Author-email: metadata@ssb.no
|
|
9
|
+
Requires-Python: >=3.10,<4.0
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Requires-Dist: arrow (>=1.3.0)
|
|
17
|
+
Requires-Dist: beautifulsoup4 (>=4.12.3)
|
|
18
|
+
Requires-Dist: black (>=24.8.0,<25.0.0)
|
|
19
|
+
Requires-Dist: bs4 (>=0.0.2,<0.0.3)
|
|
20
|
+
Requires-Dist: click (>=8.0.1)
|
|
21
|
+
Requires-Dist: cloudpathlib[gs] (>=0.17.0)
|
|
22
|
+
Requires-Dist: coverage (>=7.6.1,<8.0.0)
|
|
23
|
+
Requires-Dist: dapla-toolbelt (>=1.3.3)
|
|
24
|
+
Requires-Dist: faker (>=26.1.0,<27.0.0)
|
|
25
|
+
Requires-Dist: furo (>=2024.7.18,<2025.0.0)
|
|
26
|
+
Requires-Dist: gunicorn (>=21.2.0)
|
|
27
|
+
Requires-Dist: pandas (>=1.4.2)
|
|
28
|
+
Requires-Dist: pre-commit (>=3.8.0,<4.0.0)
|
|
29
|
+
Requires-Dist: pyarrow (>=8.0.0)
|
|
30
|
+
Requires-Dist: pydantic (>=2.5.2)
|
|
31
|
+
Requires-Dist: pygments (>=2.18.0,<3.0.0)
|
|
32
|
+
Requires-Dist: pyjwt (>=2.8.0)
|
|
33
|
+
Requires-Dist: pytest (>=8.3.2,<9.0.0)
|
|
34
|
+
Requires-Dist: pytest-mock (>=3.14.0,<4.0.0)
|
|
35
|
+
Requires-Dist: python-dotenv (>=1.0.1)
|
|
36
|
+
Requires-Dist: requests (>=2.31.0)
|
|
37
|
+
Requires-Dist: requests-mock (>=1.12.1,<2.0.0)
|
|
38
|
+
Requires-Dist: ruff (>=0.5.6,<0.6.0)
|
|
39
|
+
Requires-Dist: ssb-datadoc-model (>=6.0.0,<7.0.0)
|
|
40
|
+
Requires-Dist: ssb-klass-python (>=0.0.9)
|
|
41
|
+
Requires-Dist: types-beautifulsoup4 (>=4.12.0.20240511,<5.0.0.0)
|
|
42
|
+
Project-URL: Changelog, https://github.com/statisticsnorway/dapla-toolbelt-metadata/releases
|
|
43
|
+
Project-URL: Documentation, https://statisticsnorway.github.io/dapla-toolbelt-metadata
|
|
44
|
+
Project-URL: Repository, https://github.com/statisticsnorway/dapla-toolbelt-metadata
|
|
45
|
+
Description-Content-Type: text/markdown
|
|
46
|
+
|
|
47
|
+
# Dapla Toolbelt Metadata
|
|
48
|
+
|
|
49
|
+
[][pypi status]
|
|
50
|
+
[][pypi status]
|
|
51
|
+
[][pypi status]
|
|
52
|
+
[][license]
|
|
53
|
+
|
|
54
|
+
[][documentation]
|
|
55
|
+
[][tests]
|
|
56
|
+
[][sonarcov]
|
|
57
|
+
[][sonarquality]
|
|
58
|
+
|
|
59
|
+
[][pre-commit]
|
|
60
|
+
[][black]
|
|
61
|
+
[](https://github.com/astral-sh/ruff)
|
|
62
|
+
[][poetry]
|
|
63
|
+
|
|
64
|
+
[pypi status]: https://pypi.org/project/dapla-toolbelt-metadata/
|
|
65
|
+
[documentation]: https://statisticsnorway.github.io/dapla-toolbelt-metadata
|
|
66
|
+
[tests]: https://github.com/statisticsnorway/dapla-toolbelt-metadata/actions?workflow=Tests
|
|
67
|
+
|
|
68
|
+
[sonarcov]: https://sonarcloud.io/summary/overall?id=statisticsnorway_dapla-toolbelt-metadata
|
|
69
|
+
[sonarquality]: https://sonarcloud.io/summary/overall?id=statisticsnorway_dapla-toolbelt-metadata
|
|
70
|
+
[pre-commit]: https://github.com/pre-commit/pre-commit
|
|
71
|
+
[black]: https://github.com/psf/black
|
|
72
|
+
[poetry]: https://python-poetry.org/
|
|
73
|
+
|
|
74
|
+
## Features
|
|
75
|
+
|
|
76
|
+
- TODO
|
|
77
|
+
|
|
78
|
+
## Requirements
|
|
79
|
+
|
|
80
|
+
- TODO
|
|
81
|
+
- Python
|
|
82
|
+
|
|
83
|
+
## Installation
|
|
84
|
+
|
|
85
|
+
You can install _Dapla Toolbelt Metadata_ via [pip] from [PyPI]:
|
|
86
|
+
|
|
87
|
+
```console
|
|
88
|
+
pip install dapla-toolbelt-metadata
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Usage
|
|
92
|
+
|
|
93
|
+
Please see the [Reference Guide] for details.
|
|
94
|
+
|
|
95
|
+
## Contributing
|
|
96
|
+
|
|
97
|
+
Contributions are very welcome.
|
|
98
|
+
To learn more, see the [Contributor Guide].
|
|
99
|
+
|
|
100
|
+
## License
|
|
101
|
+
|
|
102
|
+
Distributed under the terms of the [MIT license][license],
|
|
103
|
+
_Dapla Toolbelt Metadata_ is free and open source software.
|
|
104
|
+
|
|
105
|
+
## Issues
|
|
106
|
+
|
|
107
|
+
If you encounter any problems,
|
|
108
|
+
please [file an issue] along with a detailed description.
|
|
109
|
+
|
|
110
|
+
## Credits
|
|
111
|
+
|
|
112
|
+
This project was generated from [Statistics Norway]'s [SSB PyPI Template].
|
|
113
|
+
|
|
114
|
+
[statistics norway]: https://www.ssb.no/en
|
|
115
|
+
[pypi]: https://pypi.org/
|
|
116
|
+
[ssb pypi template]: https://github.com/statisticsnorway/ssb-pypitemplate
|
|
117
|
+
[file an issue]: https://github.com/statisticsnorway/dapla-toolbelt-metadata/issues
|
|
118
|
+
[pip]: https://pip.pypa.io/
|
|
119
|
+
|
|
120
|
+
<!-- github-only -->
|
|
121
|
+
|
|
122
|
+
[license]: https://github.com/statisticsnorway/dapla-toolbelt-metadata/blob/main/LICENSE
|
|
123
|
+
[contributor guide]: https://github.com/statisticsnorway/dapla-toolbelt-metadata/blob/main/CONTRIBUTING.md
|
|
124
|
+
[reference guide]: https://statisticsnorway.github.io/dapla-toolbelt-metadata/reference.html
|
|
125
|
+
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# Dapla Toolbelt Metadata
|
|
2
|
+
|
|
3
|
+
[][pypi status]
|
|
4
|
+
[][pypi status]
|
|
5
|
+
[][pypi status]
|
|
6
|
+
[][license]
|
|
7
|
+
|
|
8
|
+
[][documentation]
|
|
9
|
+
[][tests]
|
|
10
|
+
[][sonarcov]
|
|
11
|
+
[][sonarquality]
|
|
12
|
+
|
|
13
|
+
[][pre-commit]
|
|
14
|
+
[][black]
|
|
15
|
+
[](https://github.com/astral-sh/ruff)
|
|
16
|
+
[][poetry]
|
|
17
|
+
|
|
18
|
+
[pypi status]: https://pypi.org/project/dapla-toolbelt-metadata/
|
|
19
|
+
[documentation]: https://statisticsnorway.github.io/dapla-toolbelt-metadata
|
|
20
|
+
[tests]: https://github.com/statisticsnorway/dapla-toolbelt-metadata/actions?workflow=Tests
|
|
21
|
+
|
|
22
|
+
[sonarcov]: https://sonarcloud.io/summary/overall?id=statisticsnorway_dapla-toolbelt-metadata
|
|
23
|
+
[sonarquality]: https://sonarcloud.io/summary/overall?id=statisticsnorway_dapla-toolbelt-metadata
|
|
24
|
+
[pre-commit]: https://github.com/pre-commit/pre-commit
|
|
25
|
+
[black]: https://github.com/psf/black
|
|
26
|
+
[poetry]: https://python-poetry.org/
|
|
27
|
+
|
|
28
|
+
## Features
|
|
29
|
+
|
|
30
|
+
- TODO
|
|
31
|
+
|
|
32
|
+
## Requirements
|
|
33
|
+
|
|
34
|
+
- TODO
|
|
35
|
+
- Python
|
|
36
|
+
|
|
37
|
+
## Installation
|
|
38
|
+
|
|
39
|
+
You can install _Dapla Toolbelt Metadata_ via [pip] from [PyPI]:
|
|
40
|
+
|
|
41
|
+
```console
|
|
42
|
+
pip install dapla-toolbelt-metadata
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Usage
|
|
46
|
+
|
|
47
|
+
Please see the [Reference Guide] for details.
|
|
48
|
+
|
|
49
|
+
## Contributing
|
|
50
|
+
|
|
51
|
+
Contributions are very welcome.
|
|
52
|
+
To learn more, see the [Contributor Guide].
|
|
53
|
+
|
|
54
|
+
## License
|
|
55
|
+
|
|
56
|
+
Distributed under the terms of the [MIT license][license],
|
|
57
|
+
_Dapla Toolbelt Metadata_ is free and open source software.
|
|
58
|
+
|
|
59
|
+
## Issues
|
|
60
|
+
|
|
61
|
+
If you encounter any problems,
|
|
62
|
+
please [file an issue] along with a detailed description.
|
|
63
|
+
|
|
64
|
+
## Credits
|
|
65
|
+
|
|
66
|
+
This project was generated from [Statistics Norway]'s [SSB PyPI Template].
|
|
67
|
+
|
|
68
|
+
[statistics norway]: https://www.ssb.no/en
|
|
69
|
+
[pypi]: https://pypi.org/
|
|
70
|
+
[ssb pypi template]: https://github.com/statisticsnorway/ssb-pypitemplate
|
|
71
|
+
[file an issue]: https://github.com/statisticsnorway/dapla-toolbelt-metadata/issues
|
|
72
|
+
[pip]: https://pip.pypa.io/
|
|
73
|
+
|
|
74
|
+
<!-- github-only -->
|
|
75
|
+
|
|
76
|
+
[license]: https://github.com/statisticsnorway/dapla-toolbelt-metadata/blob/main/LICENSE
|
|
77
|
+
[contributor guide]: https://github.com/statisticsnorway/dapla-toolbelt-metadata/blob/main/CONTRIBUTING.md
|
|
78
|
+
[reference guide]: https://statisticsnorway.github.io/dapla-toolbelt-metadata/reference.html
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "dapla-toolbelt-metadata"
|
|
3
|
+
version = "0.1.1"
|
|
4
|
+
description = "Dapla Toolbelt Metadata"
|
|
5
|
+
authors = ["Team Metadata <metadata@ssb.no>"]
|
|
6
|
+
license = "MIT"
|
|
7
|
+
readme = "README.md"
|
|
8
|
+
homepage = "https://github.com/statisticsnorway/dapla-toolbelt-metadata"
|
|
9
|
+
repository = "https://github.com/statisticsnorway/dapla-toolbelt-metadata"
|
|
10
|
+
documentation = "https://statisticsnorway.github.io/dapla-toolbelt-metadata"
|
|
11
|
+
classifiers = ["Development Status :: 4 - Beta"]
|
|
12
|
+
|
|
13
|
+
packages = [{ include = "dataset", from = "src" }]
|
|
14
|
+
|
|
15
|
+
[tool.poetry.urls]
|
|
16
|
+
Changelog = "https://github.com/statisticsnorway/dapla-toolbelt-metadata/releases"
|
|
17
|
+
|
|
18
|
+
[tool.poetry.dependencies]
|
|
19
|
+
click = ">=8.0.1"
|
|
20
|
+
python = ">=3.10,<4.0"
|
|
21
|
+
pyarrow = ">=8.0.0"
|
|
22
|
+
pydantic = ">=2.5.2"
|
|
23
|
+
pandas = ">=1.4.2"
|
|
24
|
+
dapla-toolbelt = ">=1.3.3"
|
|
25
|
+
gunicorn = ">=21.2.0"
|
|
26
|
+
arrow = ">=1.3.0"
|
|
27
|
+
python-dotenv = ">=1.0.1"
|
|
28
|
+
requests = ">=2.31.0"
|
|
29
|
+
beautifulsoup4 = ">=4.12.3"
|
|
30
|
+
cloudpathlib = { extras = ["gs"], version = ">=0.17.0" }
|
|
31
|
+
pyjwt = ">=2.8.0"
|
|
32
|
+
ssb-klass-python = ">=0.0.9"
|
|
33
|
+
ssb-datadoc-model = "^6.0.0"
|
|
34
|
+
faker = "^26.1.0"
|
|
35
|
+
pygments = "^2.18.0"
|
|
36
|
+
black = "^24.8.0"
|
|
37
|
+
coverage = "^7.6.1"
|
|
38
|
+
furo = "^2024.7.18"
|
|
39
|
+
pre-commit = "^3.8.0"
|
|
40
|
+
ruff = "^0.5.6"
|
|
41
|
+
pytest = "^8.3.2"
|
|
42
|
+
pytest-mock = "^3.14.0"
|
|
43
|
+
requests-mock = "^1.12.1"
|
|
44
|
+
bs4 = "^0.0.2"
|
|
45
|
+
types-beautifulsoup4 = "^4.12.0.20240511"
|
|
46
|
+
|
|
47
|
+
[tool.poetry.group.dev.dependencies]
|
|
48
|
+
pygments = ">=2.10.0"
|
|
49
|
+
black = ">=22.3.0"
|
|
50
|
+
coverage = { extras = ["toml"], version = ">=6.2" }
|
|
51
|
+
furo = ">=2021.11.12"
|
|
52
|
+
pre-commit = "*"
|
|
53
|
+
ruff = ">=0.0.284"
|
|
54
|
+
pytest = ">=7.1.2"
|
|
55
|
+
sphinx = ">=6.2.1"
|
|
56
|
+
sphinx-autobuild = ">=2021.3.14"
|
|
57
|
+
sphinx-autodoc-typehints = ">=1.24.0"
|
|
58
|
+
sphinx-click = ">=3.0.2"
|
|
59
|
+
typeguard = ">=2.13.3"
|
|
60
|
+
xdoctest = { extras = ["colors"], version = ">=0.15.10" }
|
|
61
|
+
myst-parser = { version = ">=0.16.1" }
|
|
62
|
+
mypy = ">=0.950"
|
|
63
|
+
pytest-cov = ">=3.0.0"
|
|
64
|
+
nbstripout = ">=0.5.0"
|
|
65
|
+
python-kacl = "*"
|
|
66
|
+
pytest-mock = "*"
|
|
67
|
+
deptry = ">=0.12.0"
|
|
68
|
+
pandas-stubs = "*"
|
|
69
|
+
pyarrow-stubs = ">=10.0.1.9"
|
|
70
|
+
types-Pygments = "*"
|
|
71
|
+
types-colorama = "*"
|
|
72
|
+
types-setuptools = "*"
|
|
73
|
+
types-beautifulsoup4 = "*"
|
|
74
|
+
requests-mock = "*"
|
|
75
|
+
faker = "*"
|
|
76
|
+
|
|
77
|
+
[tool.pytest.ini_options]
|
|
78
|
+
pythonpath = ["src"]
|
|
79
|
+
|
|
80
|
+
[tool.coverage.paths]
|
|
81
|
+
source = ["src", "*/site-packages"]
|
|
82
|
+
tests = ["tests", "*/tests"]
|
|
83
|
+
|
|
84
|
+
[tool.coverage.run]
|
|
85
|
+
branch = true
|
|
86
|
+
source = ["dataset"]
|
|
87
|
+
omit = [
|
|
88
|
+
"tests/*",
|
|
89
|
+
"__init__.py",
|
|
90
|
+
]
|
|
91
|
+
relative_files = true
|
|
92
|
+
|
|
93
|
+
[tool.coverage.report]
|
|
94
|
+
show_missing = true
|
|
95
|
+
fail_under = 80
|
|
96
|
+
|
|
97
|
+
[tool.mypy]
|
|
98
|
+
plugins = [
|
|
99
|
+
"pydantic.mypy"
|
|
100
|
+
]
|
|
101
|
+
strict = false
|
|
102
|
+
warn_unreachable = true
|
|
103
|
+
pretty = true
|
|
104
|
+
show_column_numbers = true
|
|
105
|
+
show_error_context = true
|
|
106
|
+
|
|
107
|
+
[[tool.mypy.overrides]]
|
|
108
|
+
# Allow missing type hints in third-party libraries without type information.
|
|
109
|
+
module = [
|
|
110
|
+
"dapla",
|
|
111
|
+
"gcsfs",
|
|
112
|
+
"pyarrow",
|
|
113
|
+
"pyarrow.parquet",
|
|
114
|
+
"datadoc_model",
|
|
115
|
+
"datadoc_model.model",
|
|
116
|
+
"pytest_mock",
|
|
117
|
+
]
|
|
118
|
+
ignore_missing_imports = true
|
|
119
|
+
|
|
120
|
+
# Disable specific error codes in the 'tests' package
|
|
121
|
+
# Also don't require type annotations
|
|
122
|
+
[[tool.mypy.overrides]]
|
|
123
|
+
module = ["tests.*"]
|
|
124
|
+
disable_error_code = [
|
|
125
|
+
"var-annotated",
|
|
126
|
+
"has-type",
|
|
127
|
+
"no-any-return",
|
|
128
|
+
"no-untyped-def",
|
|
129
|
+
]
|
|
130
|
+
|
|
131
|
+
[tool.ruff]
|
|
132
|
+
force-exclude = true # Apply excludes to pre-commit
|
|
133
|
+
show-fixes = true
|
|
134
|
+
src = ["src", "tests"]
|
|
135
|
+
target-version = "py310" # Minimum Python version supported
|
|
136
|
+
include = ["*.py", "*.pyi", "**/pyproject.toml", "*.ipynb"]
|
|
137
|
+
extend-exclude = [
|
|
138
|
+
"__pycache__",
|
|
139
|
+
"old",
|
|
140
|
+
".ipynb_checkpoints",
|
|
141
|
+
"noxfile.py",
|
|
142
|
+
"docs/conf.py",
|
|
143
|
+
]
|
|
144
|
+
|
|
145
|
+
# Ruff rules may be customized as desired: https://docs.astral.sh/ruff/rules/
|
|
146
|
+
[tool.ruff.lint]
|
|
147
|
+
select = ["ALL"]
|
|
148
|
+
ignore = [
|
|
149
|
+
"ANN101", # Supress missing-type-self.
|
|
150
|
+
"ANN102", # Supress missing-type-cls.
|
|
151
|
+
"ANN202", # Don't requiere return type annotation for private functions.
|
|
152
|
+
"ANN401", # Allow type annotation with type Any.
|
|
153
|
+
"D100", # Supress undocumented-public-module. Only doc of public api required.
|
|
154
|
+
"E402", # Supress module-import-not-at-top-of-file, needed in jupyter notebooks.
|
|
155
|
+
"E501", # Supress line-too-long warnings: trust black's judgement on this one.
|
|
156
|
+
]
|
|
157
|
+
|
|
158
|
+
[tool.ruff.lint.isort]
|
|
159
|
+
force-single-line = true
|
|
160
|
+
|
|
161
|
+
[tool.ruff.lint.mccabe]
|
|
162
|
+
max-complexity = 15
|
|
163
|
+
|
|
164
|
+
[tool.ruff.lint.pydocstyle]
|
|
165
|
+
convention = "google" # You can also use "numpy".
|
|
166
|
+
|
|
167
|
+
[tool.ruff.lint.pep8-naming]
|
|
168
|
+
classmethod-decorators = ["classmethod", "validator", "root_validator", "pydantic.validator"]
|
|
169
|
+
|
|
170
|
+
[tool.ruff.lint.per-file-ignores]
|
|
171
|
+
"*/__init__.py" = ["F401"]
|
|
172
|
+
"**/tests/*" = [
|
|
173
|
+
"ANN001", # type annotations don't add value for test functions
|
|
174
|
+
"ANN002", # type annotations don't add value for test functions
|
|
175
|
+
"ANN003", # type annotations don't add value for test functions
|
|
176
|
+
"ANN201", # type annotations don't add value for test functions
|
|
177
|
+
"ANN204", # type annotations don't add value for test functions
|
|
178
|
+
"ANN205", # type annotations don't add value for test functions
|
|
179
|
+
"ANN206", # type annotations don't add value for test functions
|
|
180
|
+
"D100", # docstrings are overkill for test functions
|
|
181
|
+
"D101",
|
|
182
|
+
"D102",
|
|
183
|
+
"D103",
|
|
184
|
+
"S101", # asserts are encouraged in pytest
|
|
185
|
+
]
|
|
186
|
+
|
|
187
|
+
[build-system]
|
|
188
|
+
requires = ["poetry-core>=1.0.0"]
|
|
189
|
+
build-backend = "poetry.core.masonry.api"
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Document dataset."""
|
|
2
|
+
|
|
3
|
+
from datadoc_model import model
|
|
4
|
+
|
|
5
|
+
from .core import Datadoc
|
|
6
|
+
from .dapla_dataset_path_info import DaplaDatasetPathInfo
|
|
7
|
+
from .model_validation import ObligatoryDatasetWarning
|
|
8
|
+
from .model_validation import ObligatoryVariableWarning
|
|
9
|
+
from .utility.enums import DaplaRegion
|
|
10
|
+
from .utility.enums import DaplaService
|
|
11
|
+
from .utility.enums import SupportedLanguages
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
from dataset.external_sources.external_sources import GetExternalSource
|
|
8
|
+
from dataset.utility.enums import SupportedLanguages
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
12
|
+
|
|
13
|
+
import pandas as pd
|
|
14
|
+
from klass.classes.classification import KlassClassification
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class CodeListItem:
|
|
21
|
+
"""Data structure for a code list item.
|
|
22
|
+
|
|
23
|
+
Attributes:
|
|
24
|
+
titles: A dictionary mapping language codes to titles.
|
|
25
|
+
code: The code associated with the item.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
titles: dict[SupportedLanguages, str]
|
|
29
|
+
code: str
|
|
30
|
+
|
|
31
|
+
def get_title(self, language: SupportedLanguages) -> str:
|
|
32
|
+
"""Return the title in the specified language.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
language: The language code for which to get the title.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
The title in the specified language. It returns the title in Norwegian
|
|
39
|
+
Bokmål ("nb") if the language is either Norwegian Bokmål or Norwegian
|
|
40
|
+
Nynorsk, otherwise it returns the title in English ("en"). If none of
|
|
41
|
+
these are available, it returns an empty string and logs an exception.
|
|
42
|
+
"""
|
|
43
|
+
try:
|
|
44
|
+
return self.titles[language]
|
|
45
|
+
except KeyError:
|
|
46
|
+
try:
|
|
47
|
+
return self.titles[
|
|
48
|
+
(
|
|
49
|
+
SupportedLanguages.NORSK_BOKMÅL
|
|
50
|
+
if language
|
|
51
|
+
in [
|
|
52
|
+
SupportedLanguages.NORSK_BOKMÅL,
|
|
53
|
+
SupportedLanguages.NORSK_NYNORSK,
|
|
54
|
+
]
|
|
55
|
+
else SupportedLanguages.ENGLISH
|
|
56
|
+
)
|
|
57
|
+
]
|
|
58
|
+
except KeyError:
|
|
59
|
+
logger.exception(
|
|
60
|
+
"Could not find title for subject %s and language: %s",
|
|
61
|
+
self,
|
|
62
|
+
language.name,
|
|
63
|
+
)
|
|
64
|
+
return ""
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class CodeList(GetExternalSource):
|
|
68
|
+
"""Class for retrieving classifications from Klass.
|
|
69
|
+
|
|
70
|
+
This class fetches a classification given a classification ID
|
|
71
|
+
and supports multiple languages.
|
|
72
|
+
|
|
73
|
+
Attributes:
|
|
74
|
+
supported_languages: A list of supported language codes.
|
|
75
|
+
_classifications: A list to store classification items.
|
|
76
|
+
classification_id: The ID of the classification to retrieve.
|
|
77
|
+
classifications_dataframes: A dictionary to store dataframes of
|
|
78
|
+
classifications.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
def __init__(
|
|
82
|
+
self,
|
|
83
|
+
executor: ThreadPoolExecutor,
|
|
84
|
+
classification_id: int | None,
|
|
85
|
+
) -> None:
|
|
86
|
+
"""Initialize the CodeList with the given classification ID and executor.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
executor: An instance of ThreadPoolExecutor to manage the asynchronous
|
|
90
|
+
execution of data fetching.
|
|
91
|
+
classification_id: The ID of the classification to retrieve.
|
|
92
|
+
"""
|
|
93
|
+
self.supported_languages = [
|
|
94
|
+
SupportedLanguages.NORSK_BOKMÅL,
|
|
95
|
+
SupportedLanguages.ENGLISH,
|
|
96
|
+
]
|
|
97
|
+
self._classifications: list[CodeListItem] = []
|
|
98
|
+
self.classification_id = classification_id
|
|
99
|
+
self.classifications_dataframes: (
|
|
100
|
+
dict[SupportedLanguages, pd.DataFrame] | None
|
|
101
|
+
) = None
|
|
102
|
+
super().__init__(executor)
|
|
103
|
+
|
|
104
|
+
def _fetch_data_from_external_source(
|
|
105
|
+
self,
|
|
106
|
+
) -> dict[SupportedLanguages, pd.DataFrame] | None:
|
|
107
|
+
"""Fetch the classifications from Klass by classification ID.
|
|
108
|
+
|
|
109
|
+
This method retrieves classification data for each supported language and
|
|
110
|
+
stores it in a dictionary where the keys are language codes and the values
|
|
111
|
+
are pandas DataFrames containing the classification data.
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
A dictionary mapping language codes to pandas DataFrames containing the
|
|
115
|
+
classification data for the given classification ID.
|
|
116
|
+
If an exception occurs during the fetching process, logs the exception
|
|
117
|
+
and returns None.
|
|
118
|
+
"""
|
|
119
|
+
classifications_dataframes: dict[SupportedLanguages, pd.DataFrame] = {}
|
|
120
|
+
for i in self.supported_languages:
|
|
121
|
+
try:
|
|
122
|
+
classifications_dataframes[i] = (
|
|
123
|
+
KlassClassification(
|
|
124
|
+
str(self.classification_id),
|
|
125
|
+
i,
|
|
126
|
+
)
|
|
127
|
+
.get_codes()
|
|
128
|
+
.data
|
|
129
|
+
)
|
|
130
|
+
except Exception: # noqa: PERF203
|
|
131
|
+
logger.exception(
|
|
132
|
+
"Exception while getting classifications from Klass",
|
|
133
|
+
)
|
|
134
|
+
return None
|
|
135
|
+
else:
|
|
136
|
+
return classifications_dataframes
|
|
137
|
+
return None
|
|
138
|
+
|
|
139
|
+
def _extract_titles(
|
|
140
|
+
self,
|
|
141
|
+
dataframes: dict[SupportedLanguages, pd.DataFrame],
|
|
142
|
+
) -> list[dict[SupportedLanguages, str]]:
|
|
143
|
+
"""Extract titles from the dataframes for each supported language.
|
|
144
|
+
|
|
145
|
+
This method processes the provided dataframes and extracts the title from
|
|
146
|
+
each row for all supported languages, creating a list of dictionaries where
|
|
147
|
+
each dictionary maps language codes to titles.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
dataframes: A dictionary mapping language codes to pandas DataFrames
|
|
151
|
+
containing classification data.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
A list of dictionaries, each mapping language codes to titles.
|
|
155
|
+
If a title is not available in a dataframe, the corresponding dictionary
|
|
156
|
+
value will be None.
|
|
157
|
+
"""
|
|
158
|
+
list_of_titles = []
|
|
159
|
+
languages = list(dataframes)
|
|
160
|
+
for i in range(len(dataframes[SupportedLanguages.NORSK_BOKMÅL])):
|
|
161
|
+
titles = {}
|
|
162
|
+
for j in languages:
|
|
163
|
+
if "name" in dataframes[j]:
|
|
164
|
+
titles[j] = dataframes[j].loc[:, "name"][i]
|
|
165
|
+
else:
|
|
166
|
+
titles[j] = None
|
|
167
|
+
list_of_titles.append(titles)
|
|
168
|
+
return list_of_titles
|
|
169
|
+
|
|
170
|
+
def _create_code_list_from_dataframe(
|
|
171
|
+
self,
|
|
172
|
+
classifications_dataframes: dict[SupportedLanguages, pd.DataFrame],
|
|
173
|
+
) -> list[CodeListItem]:
|
|
174
|
+
"""Create a list of CodeListItem objects from the classification dataframes.
|
|
175
|
+
|
|
176
|
+
This method extracts titles from the provided dataframes and pairs them
|
|
177
|
+
with their corresponding classification codes to create a list of
|
|
178
|
+
CodeListItem objects.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
classifications_dataframes: A dictionary mapping language codes to
|
|
182
|
+
pandas DataFrames containing classification data.
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
A list of CodeListItem objects containing classification titles
|
|
186
|
+
and codes.
|
|
187
|
+
"""
|
|
188
|
+
classification_names = self._extract_titles(classifications_dataframes)
|
|
189
|
+
classification_codes: list
|
|
190
|
+
if "code" in classifications_dataframes[SupportedLanguages.NORSK_BOKMÅL]:
|
|
191
|
+
classification_codes = (
|
|
192
|
+
classifications_dataframes[SupportedLanguages.NORSK_BOKMÅL]
|
|
193
|
+
.loc[:, "code"]
|
|
194
|
+
.to_list()
|
|
195
|
+
)
|
|
196
|
+
else:
|
|
197
|
+
classification_codes = [None] * len(classification_names)
|
|
198
|
+
classification_items = []
|
|
199
|
+
for a, b in zip(classification_names, classification_codes, strict=False):
|
|
200
|
+
classification_items.append(
|
|
201
|
+
CodeListItem(a, b),
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
return classification_items
|
|
205
|
+
|
|
206
|
+
def _get_classification_dataframe_if_loaded(self) -> bool:
|
|
207
|
+
"""Check if the classification data from Klass is loaded.
|
|
208
|
+
|
|
209
|
+
This method verifies whether the classification data has been loaded.
|
|
210
|
+
If not, it retrieves the data from an external source and populates the
|
|
211
|
+
classifications. It logs the process and returns a boolean indicating the
|
|
212
|
+
success of the operation.
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
True if the data is loaded and classifications are successfully extracted,
|
|
216
|
+
False otherwise.
|
|
217
|
+
"""
|
|
218
|
+
if not self._classifications:
|
|
219
|
+
self.classifications_dataframes = self.retrieve_external_data()
|
|
220
|
+
if self.classifications_dataframes is not None:
|
|
221
|
+
self._classifications = self._create_code_list_from_dataframe(
|
|
222
|
+
self.classifications_dataframes,
|
|
223
|
+
)
|
|
224
|
+
logger.debug(
|
|
225
|
+
"Thread finished. found %s classifications",
|
|
226
|
+
len(self._classifications),
|
|
227
|
+
)
|
|
228
|
+
return True
|
|
229
|
+
logger.warning(
|
|
230
|
+
"Thread is not done. Cannot get classifications from the dataframe.",
|
|
231
|
+
)
|
|
232
|
+
return False
|
|
233
|
+
|
|
234
|
+
@property
|
|
235
|
+
def classifications(self) -> list[CodeListItem]:
|
|
236
|
+
"""Get the list of classifications.
|
|
237
|
+
|
|
238
|
+
Returns:
|
|
239
|
+
A list of CodeListItem objects.
|
|
240
|
+
"""
|
|
241
|
+
self._get_classification_dataframe_if_loaded()
|
|
242
|
+
|
|
243
|
+
logger.debug("Got %s classifications subjects", len(self._classifications))
|
|
244
|
+
return self._classifications
|