dapla-toolbelt-metadata 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dapla-toolbelt-metadata might be problematic. Click here for more details.

@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright © 2024 Statistics Norway
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,125 @@
1
+ Metadata-Version: 2.1
2
+ Name: dapla-toolbelt-metadata
3
+ Version: 0.1.1
4
+ Summary: Dapla Toolbelt Metadata
5
+ Home-page: https://github.com/statisticsnorway/dapla-toolbelt-metadata
6
+ License: MIT
7
+ Author: Team Metadata
8
+ Author-email: metadata@ssb.no
9
+ Requires-Python: >=3.10,<4.0
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Requires-Dist: arrow (>=1.3.0)
17
+ Requires-Dist: beautifulsoup4 (>=4.12.3)
18
+ Requires-Dist: black (>=24.8.0,<25.0.0)
19
+ Requires-Dist: bs4 (>=0.0.2,<0.0.3)
20
+ Requires-Dist: click (>=8.0.1)
21
+ Requires-Dist: cloudpathlib[gs] (>=0.17.0)
22
+ Requires-Dist: coverage (>=7.6.1,<8.0.0)
23
+ Requires-Dist: dapla-toolbelt (>=1.3.3)
24
+ Requires-Dist: faker (>=26.1.0,<27.0.0)
25
+ Requires-Dist: furo (>=2024.7.18,<2025.0.0)
26
+ Requires-Dist: gunicorn (>=21.2.0)
27
+ Requires-Dist: pandas (>=1.4.2)
28
+ Requires-Dist: pre-commit (>=3.8.0,<4.0.0)
29
+ Requires-Dist: pyarrow (>=8.0.0)
30
+ Requires-Dist: pydantic (>=2.5.2)
31
+ Requires-Dist: pygments (>=2.18.0,<3.0.0)
32
+ Requires-Dist: pyjwt (>=2.8.0)
33
+ Requires-Dist: pytest (>=8.3.2,<9.0.0)
34
+ Requires-Dist: pytest-mock (>=3.14.0,<4.0.0)
35
+ Requires-Dist: python-dotenv (>=1.0.1)
36
+ Requires-Dist: requests (>=2.31.0)
37
+ Requires-Dist: requests-mock (>=1.12.1,<2.0.0)
38
+ Requires-Dist: ruff (>=0.5.6,<0.6.0)
39
+ Requires-Dist: ssb-datadoc-model (>=6.0.0,<7.0.0)
40
+ Requires-Dist: ssb-klass-python (>=0.0.9)
41
+ Requires-Dist: types-beautifulsoup4 (>=4.12.0.20240511,<5.0.0.0)
42
+ Project-URL: Changelog, https://github.com/statisticsnorway/dapla-toolbelt-metadata/releases
43
+ Project-URL: Documentation, https://statisticsnorway.github.io/dapla-toolbelt-metadata
44
+ Project-URL: Repository, https://github.com/statisticsnorway/dapla-toolbelt-metadata
45
+ Description-Content-Type: text/markdown
46
+
47
+ # Dapla Toolbelt Metadata
48
+
49
+ [![PyPI](https://img.shields.io/pypi/v/dapla-toolbelt-metadata.svg)][pypi status]
50
+ [![Status](https://img.shields.io/pypi/status/dapla-toolbelt-metadata.svg)][pypi status]
51
+ [![Python Version](https://img.shields.io/pypi/pyversions/dapla-toolbelt-metadata)][pypi status]
52
+ [![License](https://img.shields.io/pypi/l/dapla-toolbelt-metadata)][license]
53
+
54
+ [![Documentation](https://github.com/statisticsnorway/dapla-toolbelt-metadata/actions/workflows/docs.yml/badge.svg)][documentation]
55
+ [![Tests](https://github.com/statisticsnorway/dapla-toolbelt-metadata/actions/workflows/tests.yml/badge.svg)][tests]
56
+ [![Coverage](https://sonarcloud.io/api/project_badges/measure?project=statisticsnorway_dapla-toolbelt-metadata&metric=coverage&token=ee0a4d273db2bf811222fca5dffccd4592e3d536)][sonarcov]
57
+ [![Quality Gate Status](https://sonarcloud.io/api/project_badges/measure?project=statisticsnorway_dapla-toolbelt-metadata&metric=alert_status&token=ee0a4d273db2bf811222fca5dffccd4592e3d536)][sonarquality]
58
+
59
+ [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)][pre-commit]
60
+ [![Black](https://img.shields.io/badge/code%20style-black-000000.svg)][black]
61
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
62
+ [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)][poetry]
63
+
64
+ [pypi status]: https://pypi.org/project/dapla-toolbelt-metadata/
65
+ [documentation]: https://statisticsnorway.github.io/dapla-toolbelt-metadata
66
+ [tests]: https://github.com/statisticsnorway/dapla-toolbelt-metadata/actions?workflow=Tests
67
+
68
+ [sonarcov]: https://sonarcloud.io/summary/overall?id=statisticsnorway_dapla-toolbelt-metadata
69
+ [sonarquality]: https://sonarcloud.io/summary/overall?id=statisticsnorway_dapla-toolbelt-metadata
70
+ [pre-commit]: https://github.com/pre-commit/pre-commit
71
+ [black]: https://github.com/psf/black
72
+ [poetry]: https://python-poetry.org/
73
+
74
+ ## Features
75
+
76
+ - TODO
77
+
78
+ ## Requirements
79
+
80
+ - TODO
81
+ - Python
82
+
83
+ ## Installation
84
+
85
+ You can install _Dapla Toolbelt Metadata_ via [pip] from [PyPI]:
86
+
87
+ ```console
88
+ pip install dapla-toolbelt-metadata
89
+ ```
90
+
91
+ ## Usage
92
+
93
+ Please see the [Reference Guide] for details.
94
+
95
+ ## Contributing
96
+
97
+ Contributions are very welcome.
98
+ To learn more, see the [Contributor Guide].
99
+
100
+ ## License
101
+
102
+ Distributed under the terms of the [MIT license][license],
103
+ _Dapla Toolbelt Metadata_ is free and open source software.
104
+
105
+ ## Issues
106
+
107
+ If you encounter any problems,
108
+ please [file an issue] along with a detailed description.
109
+
110
+ ## Credits
111
+
112
+ This project was generated from [Statistics Norway]'s [SSB PyPI Template].
113
+
114
+ [statistics norway]: https://www.ssb.no/en
115
+ [pypi]: https://pypi.org/
116
+ [ssb pypi template]: https://github.com/statisticsnorway/ssb-pypitemplate
117
+ [file an issue]: https://github.com/statisticsnorway/dapla-toolbelt-metadata/issues
118
+ [pip]: https://pip.pypa.io/
119
+
120
+ <!-- github-only -->
121
+
122
+ [license]: https://github.com/statisticsnorway/dapla-toolbelt-metadata/blob/main/LICENSE
123
+ [contributor guide]: https://github.com/statisticsnorway/dapla-toolbelt-metadata/blob/main/CONTRIBUTING.md
124
+ [reference guide]: https://statisticsnorway.github.io/dapla-toolbelt-metadata/reference.html
125
+
@@ -0,0 +1,78 @@
1
+ # Dapla Toolbelt Metadata
2
+
3
+ [![PyPI](https://img.shields.io/pypi/v/dapla-toolbelt-metadata.svg)][pypi status]
4
+ [![Status](https://img.shields.io/pypi/status/dapla-toolbelt-metadata.svg)][pypi status]
5
+ [![Python Version](https://img.shields.io/pypi/pyversions/dapla-toolbelt-metadata)][pypi status]
6
+ [![License](https://img.shields.io/pypi/l/dapla-toolbelt-metadata)][license]
7
+
8
+ [![Documentation](https://github.com/statisticsnorway/dapla-toolbelt-metadata/actions/workflows/docs.yml/badge.svg)][documentation]
9
+ [![Tests](https://github.com/statisticsnorway/dapla-toolbelt-metadata/actions/workflows/tests.yml/badge.svg)][tests]
10
+ [![Coverage](https://sonarcloud.io/api/project_badges/measure?project=statisticsnorway_dapla-toolbelt-metadata&metric=coverage&token=ee0a4d273db2bf811222fca5dffccd4592e3d536)][sonarcov]
11
+ [![Quality Gate Status](https://sonarcloud.io/api/project_badges/measure?project=statisticsnorway_dapla-toolbelt-metadata&metric=alert_status&token=ee0a4d273db2bf811222fca5dffccd4592e3d536)][sonarquality]
12
+
13
+ [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)][pre-commit]
14
+ [![Black](https://img.shields.io/badge/code%20style-black-000000.svg)][black]
15
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
16
+ [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)][poetry]
17
+
18
+ [pypi status]: https://pypi.org/project/dapla-toolbelt-metadata/
19
+ [documentation]: https://statisticsnorway.github.io/dapla-toolbelt-metadata
20
+ [tests]: https://github.com/statisticsnorway/dapla-toolbelt-metadata/actions?workflow=Tests
21
+
22
+ [sonarcov]: https://sonarcloud.io/summary/overall?id=statisticsnorway_dapla-toolbelt-metadata
23
+ [sonarquality]: https://sonarcloud.io/summary/overall?id=statisticsnorway_dapla-toolbelt-metadata
24
+ [pre-commit]: https://github.com/pre-commit/pre-commit
25
+ [black]: https://github.com/psf/black
26
+ [poetry]: https://python-poetry.org/
27
+
28
+ ## Features
29
+
30
+ - TODO
31
+
32
+ ## Requirements
33
+
34
+ - TODO
35
+ - Python
36
+
37
+ ## Installation
38
+
39
+ You can install _Dapla Toolbelt Metadata_ via [pip] from [PyPI]:
40
+
41
+ ```console
42
+ pip install dapla-toolbelt-metadata
43
+ ```
44
+
45
+ ## Usage
46
+
47
+ Please see the [Reference Guide] for details.
48
+
49
+ ## Contributing
50
+
51
+ Contributions are very welcome.
52
+ To learn more, see the [Contributor Guide].
53
+
54
+ ## License
55
+
56
+ Distributed under the terms of the [MIT license][license],
57
+ _Dapla Toolbelt Metadata_ is free and open source software.
58
+
59
+ ## Issues
60
+
61
+ If you encounter any problems,
62
+ please [file an issue] along with a detailed description.
63
+
64
+ ## Credits
65
+
66
+ This project was generated from [Statistics Norway]'s [SSB PyPI Template].
67
+
68
+ [statistics norway]: https://www.ssb.no/en
69
+ [pypi]: https://pypi.org/
70
+ [ssb pypi template]: https://github.com/statisticsnorway/ssb-pypitemplate
71
+ [file an issue]: https://github.com/statisticsnorway/dapla-toolbelt-metadata/issues
72
+ [pip]: https://pip.pypa.io/
73
+
74
+ <!-- github-only -->
75
+
76
+ [license]: https://github.com/statisticsnorway/dapla-toolbelt-metadata/blob/main/LICENSE
77
+ [contributor guide]: https://github.com/statisticsnorway/dapla-toolbelt-metadata/blob/main/CONTRIBUTING.md
78
+ [reference guide]: https://statisticsnorway.github.io/dapla-toolbelt-metadata/reference.html
@@ -0,0 +1,189 @@
1
+ [tool.poetry]
2
+ name = "dapla-toolbelt-metadata"
3
+ version = "0.1.1"
4
+ description = "Dapla Toolbelt Metadata"
5
+ authors = ["Team Metadata <metadata@ssb.no>"]
6
+ license = "MIT"
7
+ readme = "README.md"
8
+ homepage = "https://github.com/statisticsnorway/dapla-toolbelt-metadata"
9
+ repository = "https://github.com/statisticsnorway/dapla-toolbelt-metadata"
10
+ documentation = "https://statisticsnorway.github.io/dapla-toolbelt-metadata"
11
+ classifiers = ["Development Status :: 4 - Beta"]
12
+
13
+ packages = [{ include = "dataset", from = "src" }]
14
+
15
+ [tool.poetry.urls]
16
+ Changelog = "https://github.com/statisticsnorway/dapla-toolbelt-metadata/releases"
17
+
18
+ [tool.poetry.dependencies]
19
+ click = ">=8.0.1"
20
+ python = ">=3.10,<4.0"
21
+ pyarrow = ">=8.0.0"
22
+ pydantic = ">=2.5.2"
23
+ pandas = ">=1.4.2"
24
+ dapla-toolbelt = ">=1.3.3"
25
+ gunicorn = ">=21.2.0"
26
+ arrow = ">=1.3.0"
27
+ python-dotenv = ">=1.0.1"
28
+ requests = ">=2.31.0"
29
+ beautifulsoup4 = ">=4.12.3"
30
+ cloudpathlib = { extras = ["gs"], version = ">=0.17.0" }
31
+ pyjwt = ">=2.8.0"
32
+ ssb-klass-python = ">=0.0.9"
33
+ ssb-datadoc-model = "^6.0.0"
34
+ faker = "^26.1.0"
35
+ pygments = "^2.18.0"
36
+ black = "^24.8.0"
37
+ coverage = "^7.6.1"
38
+ furo = "^2024.7.18"
39
+ pre-commit = "^3.8.0"
40
+ ruff = "^0.5.6"
41
+ pytest = "^8.3.2"
42
+ pytest-mock = "^3.14.0"
43
+ requests-mock = "^1.12.1"
44
+ bs4 = "^0.0.2"
45
+ types-beautifulsoup4 = "^4.12.0.20240511"
46
+
47
+ [tool.poetry.group.dev.dependencies]
48
+ pygments = ">=2.10.0"
49
+ black = ">=22.3.0"
50
+ coverage = { extras = ["toml"], version = ">=6.2" }
51
+ furo = ">=2021.11.12"
52
+ pre-commit = "*"
53
+ ruff = ">=0.0.284"
54
+ pytest = ">=7.1.2"
55
+ sphinx = ">=6.2.1"
56
+ sphinx-autobuild = ">=2021.3.14"
57
+ sphinx-autodoc-typehints = ">=1.24.0"
58
+ sphinx-click = ">=3.0.2"
59
+ typeguard = ">=2.13.3"
60
+ xdoctest = { extras = ["colors"], version = ">=0.15.10" }
61
+ myst-parser = { version = ">=0.16.1" }
62
+ mypy = ">=0.950"
63
+ pytest-cov = ">=3.0.0"
64
+ nbstripout = ">=0.5.0"
65
+ python-kacl = "*"
66
+ pytest-mock = "*"
67
+ deptry = ">=0.12.0"
68
+ pandas-stubs = "*"
69
+ pyarrow-stubs = ">=10.0.1.9"
70
+ types-Pygments = "*"
71
+ types-colorama = "*"
72
+ types-setuptools = "*"
73
+ types-beautifulsoup4 = "*"
74
+ requests-mock = "*"
75
+ faker = "*"
76
+
77
+ [tool.pytest.ini_options]
78
+ pythonpath = ["src"]
79
+
80
+ [tool.coverage.paths]
81
+ source = ["src", "*/site-packages"]
82
+ tests = ["tests", "*/tests"]
83
+
84
+ [tool.coverage.run]
85
+ branch = true
86
+ source = ["dataset"]
87
+ omit = [
88
+ "tests/*",
89
+ "__init__.py",
90
+ ]
91
+ relative_files = true
92
+
93
+ [tool.coverage.report]
94
+ show_missing = true
95
+ fail_under = 80
96
+
97
+ [tool.mypy]
98
+ plugins = [
99
+ "pydantic.mypy"
100
+ ]
101
+ strict = false
102
+ warn_unreachable = true
103
+ pretty = true
104
+ show_column_numbers = true
105
+ show_error_context = true
106
+
107
+ [[tool.mypy.overrides]]
108
+ # Allow missing type hints in third-party libraries without type information.
109
+ module = [
110
+ "dapla",
111
+ "gcsfs",
112
+ "pyarrow",
113
+ "pyarrow.parquet",
114
+ "datadoc_model",
115
+ "datadoc_model.model",
116
+ "pytest_mock",
117
+ ]
118
+ ignore_missing_imports = true
119
+
120
+ # Disable specific error codes in the 'tests' package
121
+ # Also don't require type annotations
122
+ [[tool.mypy.overrides]]
123
+ module = ["tests.*"]
124
+ disable_error_code = [
125
+ "var-annotated",
126
+ "has-type",
127
+ "no-any-return",
128
+ "no-untyped-def",
129
+ ]
130
+
131
+ [tool.ruff]
132
+ force-exclude = true # Apply excludes to pre-commit
133
+ show-fixes = true
134
+ src = ["src", "tests"]
135
+ target-version = "py310" # Minimum Python version supported
136
+ include = ["*.py", "*.pyi", "**/pyproject.toml", "*.ipynb"]
137
+ extend-exclude = [
138
+ "__pycache__",
139
+ "old",
140
+ ".ipynb_checkpoints",
141
+ "noxfile.py",
142
+ "docs/conf.py",
143
+ ]
144
+
145
+ # Ruff rules may be customized as desired: https://docs.astral.sh/ruff/rules/
146
+ [tool.ruff.lint]
147
+ select = ["ALL"]
148
+ ignore = [
149
+ "ANN101", # Supress missing-type-self.
150
+ "ANN102", # Supress missing-type-cls.
151
+ "ANN202", # Don't requiere return type annotation for private functions.
152
+ "ANN401", # Allow type annotation with type Any.
153
+ "D100", # Supress undocumented-public-module. Only doc of public api required.
154
+ "E402", # Supress module-import-not-at-top-of-file, needed in jupyter notebooks.
155
+ "E501", # Supress line-too-long warnings: trust black's judgement on this one.
156
+ ]
157
+
158
+ [tool.ruff.lint.isort]
159
+ force-single-line = true
160
+
161
+ [tool.ruff.lint.mccabe]
162
+ max-complexity = 15
163
+
164
+ [tool.ruff.lint.pydocstyle]
165
+ convention = "google" # You can also use "numpy".
166
+
167
+ [tool.ruff.lint.pep8-naming]
168
+ classmethod-decorators = ["classmethod", "validator", "root_validator", "pydantic.validator"]
169
+
170
+ [tool.ruff.lint.per-file-ignores]
171
+ "*/__init__.py" = ["F401"]
172
+ "**/tests/*" = [
173
+ "ANN001", # type annotations don't add value for test functions
174
+ "ANN002", # type annotations don't add value for test functions
175
+ "ANN003", # type annotations don't add value for test functions
176
+ "ANN201", # type annotations don't add value for test functions
177
+ "ANN204", # type annotations don't add value for test functions
178
+ "ANN205", # type annotations don't add value for test functions
179
+ "ANN206", # type annotations don't add value for test functions
180
+ "D100", # docstrings are overkill for test functions
181
+ "D101",
182
+ "D102",
183
+ "D103",
184
+ "S101", # asserts are encouraged in pytest
185
+ ]
186
+
187
+ [build-system]
188
+ requires = ["poetry-core>=1.0.0"]
189
+ build-backend = "poetry.core.masonry.api"
@@ -0,0 +1,11 @@
1
+ """Document dataset."""
2
+
3
+ from datadoc_model import model
4
+
5
+ from .core import Datadoc
6
+ from .dapla_dataset_path_info import DaplaDatasetPathInfo
7
+ from .model_validation import ObligatoryDatasetWarning
8
+ from .model_validation import ObligatoryVariableWarning
9
+ from .utility.enums import DaplaRegion
10
+ from .utility.enums import DaplaService
11
+ from .utility.enums import SupportedLanguages
@@ -0,0 +1,244 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from dataclasses import dataclass
5
+ from typing import TYPE_CHECKING
6
+
7
+ from dataset.external_sources.external_sources import GetExternalSource
8
+ from dataset.utility.enums import SupportedLanguages
9
+
10
+ if TYPE_CHECKING:
11
+ from concurrent.futures import ThreadPoolExecutor
12
+
13
+ import pandas as pd
14
+ from klass.classes.classification import KlassClassification
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @dataclass
20
+ class CodeListItem:
21
+ """Data structure for a code list item.
22
+
23
+ Attributes:
24
+ titles: A dictionary mapping language codes to titles.
25
+ code: The code associated with the item.
26
+ """
27
+
28
+ titles: dict[SupportedLanguages, str]
29
+ code: str
30
+
31
+ def get_title(self, language: SupportedLanguages) -> str:
32
+ """Return the title in the specified language.
33
+
34
+ Args:
35
+ language: The language code for which to get the title.
36
+
37
+ Returns:
38
+ The title in the specified language. It returns the title in Norwegian
39
+ Bokmål ("nb") if the language is either Norwegian Bokmål or Norwegian
40
+ Nynorsk, otherwise it returns the title in English ("en"). If none of
41
+ these are available, it returns an empty string and logs an exception.
42
+ """
43
+ try:
44
+ return self.titles[language]
45
+ except KeyError:
46
+ try:
47
+ return self.titles[
48
+ (
49
+ SupportedLanguages.NORSK_BOKMÅL
50
+ if language
51
+ in [
52
+ SupportedLanguages.NORSK_BOKMÅL,
53
+ SupportedLanguages.NORSK_NYNORSK,
54
+ ]
55
+ else SupportedLanguages.ENGLISH
56
+ )
57
+ ]
58
+ except KeyError:
59
+ logger.exception(
60
+ "Could not find title for subject %s and language: %s",
61
+ self,
62
+ language.name,
63
+ )
64
+ return ""
65
+
66
+
67
+ class CodeList(GetExternalSource):
68
+ """Class for retrieving classifications from Klass.
69
+
70
+ This class fetches a classification given a classification ID
71
+ and supports multiple languages.
72
+
73
+ Attributes:
74
+ supported_languages: A list of supported language codes.
75
+ _classifications: A list to store classification items.
76
+ classification_id: The ID of the classification to retrieve.
77
+ classifications_dataframes: A dictionary to store dataframes of
78
+ classifications.
79
+ """
80
+
81
+ def __init__(
82
+ self,
83
+ executor: ThreadPoolExecutor,
84
+ classification_id: int | None,
85
+ ) -> None:
86
+ """Initialize the CodeList with the given classification ID and executor.
87
+
88
+ Args:
89
+ executor: An instance of ThreadPoolExecutor to manage the asynchronous
90
+ execution of data fetching.
91
+ classification_id: The ID of the classification to retrieve.
92
+ """
93
+ self.supported_languages = [
94
+ SupportedLanguages.NORSK_BOKMÅL,
95
+ SupportedLanguages.ENGLISH,
96
+ ]
97
+ self._classifications: list[CodeListItem] = []
98
+ self.classification_id = classification_id
99
+ self.classifications_dataframes: (
100
+ dict[SupportedLanguages, pd.DataFrame] | None
101
+ ) = None
102
+ super().__init__(executor)
103
+
104
+ def _fetch_data_from_external_source(
105
+ self,
106
+ ) -> dict[SupportedLanguages, pd.DataFrame] | None:
107
+ """Fetch the classifications from Klass by classification ID.
108
+
109
+ This method retrieves classification data for each supported language and
110
+ stores it in a dictionary where the keys are language codes and the values
111
+ are pandas DataFrames containing the classification data.
112
+
113
+ Returns:
114
+ A dictionary mapping language codes to pandas DataFrames containing the
115
+ classification data for the given classification ID.
116
+ If an exception occurs during the fetching process, logs the exception
117
+ and returns None.
118
+ """
119
+ classifications_dataframes: dict[SupportedLanguages, pd.DataFrame] = {}
120
+ for i in self.supported_languages:
121
+ try:
122
+ classifications_dataframes[i] = (
123
+ KlassClassification(
124
+ str(self.classification_id),
125
+ i,
126
+ )
127
+ .get_codes()
128
+ .data
129
+ )
130
+ except Exception: # noqa: PERF203
131
+ logger.exception(
132
+ "Exception while getting classifications from Klass",
133
+ )
134
+ return None
135
+ else:
136
+ return classifications_dataframes
137
+ return None
138
+
139
+ def _extract_titles(
140
+ self,
141
+ dataframes: dict[SupportedLanguages, pd.DataFrame],
142
+ ) -> list[dict[SupportedLanguages, str]]:
143
+ """Extract titles from the dataframes for each supported language.
144
+
145
+ This method processes the provided dataframes and extracts the title from
146
+ each row for all supported languages, creating a list of dictionaries where
147
+ each dictionary maps language codes to titles.
148
+
149
+ Args:
150
+ dataframes: A dictionary mapping language codes to pandas DataFrames
151
+ containing classification data.
152
+
153
+ Returns:
154
+ A list of dictionaries, each mapping language codes to titles.
155
+ If a title is not available in a dataframe, the corresponding dictionary
156
+ value will be None.
157
+ """
158
+ list_of_titles = []
159
+ languages = list(dataframes)
160
+ for i in range(len(dataframes[SupportedLanguages.NORSK_BOKMÅL])):
161
+ titles = {}
162
+ for j in languages:
163
+ if "name" in dataframes[j]:
164
+ titles[j] = dataframes[j].loc[:, "name"][i]
165
+ else:
166
+ titles[j] = None
167
+ list_of_titles.append(titles)
168
+ return list_of_titles
169
+
170
+ def _create_code_list_from_dataframe(
171
+ self,
172
+ classifications_dataframes: dict[SupportedLanguages, pd.DataFrame],
173
+ ) -> list[CodeListItem]:
174
+ """Create a list of CodeListItem objects from the classification dataframes.
175
+
176
+ This method extracts titles from the provided dataframes and pairs them
177
+ with their corresponding classification codes to create a list of
178
+ CodeListItem objects.
179
+
180
+ Args:
181
+ classifications_dataframes: A dictionary mapping language codes to
182
+ pandas DataFrames containing classification data.
183
+
184
+ Returns:
185
+ A list of CodeListItem objects containing classification titles
186
+ and codes.
187
+ """
188
+ classification_names = self._extract_titles(classifications_dataframes)
189
+ classification_codes: list
190
+ if "code" in classifications_dataframes[SupportedLanguages.NORSK_BOKMÅL]:
191
+ classification_codes = (
192
+ classifications_dataframes[SupportedLanguages.NORSK_BOKMÅL]
193
+ .loc[:, "code"]
194
+ .to_list()
195
+ )
196
+ else:
197
+ classification_codes = [None] * len(classification_names)
198
+ classification_items = []
199
+ for a, b in zip(classification_names, classification_codes, strict=False):
200
+ classification_items.append(
201
+ CodeListItem(a, b),
202
+ )
203
+
204
+ return classification_items
205
+
206
+ def _get_classification_dataframe_if_loaded(self) -> bool:
207
+ """Check if the classification data from Klass is loaded.
208
+
209
+ This method verifies whether the classification data has been loaded.
210
+ If not, it retrieves the data from an external source and populates the
211
+ classifications. It logs the process and returns a boolean indicating the
212
+ success of the operation.
213
+
214
+ Returns:
215
+ True if the data is loaded and classifications are successfully extracted,
216
+ False otherwise.
217
+ """
218
+ if not self._classifications:
219
+ self.classifications_dataframes = self.retrieve_external_data()
220
+ if self.classifications_dataframes is not None:
221
+ self._classifications = self._create_code_list_from_dataframe(
222
+ self.classifications_dataframes,
223
+ )
224
+ logger.debug(
225
+ "Thread finished. found %s classifications",
226
+ len(self._classifications),
227
+ )
228
+ return True
229
+ logger.warning(
230
+ "Thread is not done. Cannot get classifications from the dataframe.",
231
+ )
232
+ return False
233
+
234
+ @property
235
+ def classifications(self) -> list[CodeListItem]:
236
+ """Get the list of classifications.
237
+
238
+ Returns:
239
+ A list of CodeListItem objects.
240
+ """
241
+ self._get_classification_dataframe_if_loaded()
242
+
243
+ logger.debug("Got %s classifications subjects", len(self._classifications))
244
+ return self._classifications