PyPI - dapla-toolbelt-metadata - Versions diffs - 0.1.2__tar.gz → 0.2.1__tar.gz - Mend

dapla-toolbelt-metadata 0.1.2tar.gz → 0.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dapla-toolbelt-metadata might be problematic. Click here for more details.

Files changed (22) hide show

{dapla_toolbelt_metadata-0.1.2 → dapla_toolbelt_metadata-0.2.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dapla-toolbelt-metadata
-Version: 0.1.2
+Version: 0.2.1
 Summary: Dapla Toolbelt Metadata
 Home-page: https://github.com/statisticsnorway/dapla-toolbelt-metadata
 License: MIT
@@ -15,30 +15,17 @@ Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Requires-Dist: arrow (>=1.3.0)
 Requires-Dist: beautifulsoup4 (>=4.12.3)
-Requires-Dist: black (>=24.8.0,<25.0.0)
-Requires-Dist: bs4 (>=0.0.2,<0.0.3)
-Requires-Dist: click (>=8.0.1)
 Requires-Dist: cloudpathlib[gs] (>=0.17.0)
-Requires-Dist: coverage (>=7.6.1,<8.0.0)
 Requires-Dist: dapla-toolbelt (>=1.3.3)
-Requires-Dist: faker (>=26.1.0,<27.0.0)
-Requires-Dist: furo (>=2024.7.18,<2025.0.0)
-Requires-Dist: gunicorn (>=21.2.0)
 Requires-Dist: pandas (>=1.4.2)
-Requires-Dist: pre-commit (>=3.8.0,<4.0.0)
 Requires-Dist: pyarrow (>=8.0.0)
 Requires-Dist: pydantic (>=2.5.2)
-Requires-Dist: pygments (>=2.18.0,<3.0.0)
 Requires-Dist: pyjwt (>=2.8.0)
-Requires-Dist: pytest (>=8.3.2,<9.0.0)
-Requires-Dist: pytest-mock (>=3.14.0,<4.0.0)
 Requires-Dist: python-dotenv (>=1.0.1)
 Requires-Dist: requests (>=2.31.0)
-Requires-Dist: requests-mock (>=1.12.1,<2.0.0)
-Requires-Dist: ruff (>=0.5.6,<0.6.0)
 Requires-Dist: ssb-datadoc-model (>=6.0.0,<7.0.0)
 Requires-Dist: ssb-klass-python (>=0.0.9)
-Requires-Dist: types-beautifulsoup4 (>=4.12.0.20240511,<5.0.0.0)
+Requires-Dist: typing-extensions (>=4.12.2)
 Project-URL: Changelog, https://github.com/statisticsnorway/dapla-toolbelt-metadata/releases
 Project-URL: Documentation, https://statisticsnorway.github.io/dapla-toolbelt-metadata
 Project-URL: Repository, https://github.com/statisticsnorway/dapla-toolbelt-metadata
@@ -71,14 +58,16 @@ Description-Content-Type: text/markdown
 [black]: https://github.com/psf/black
 [poetry]: https://python-poetry.org/
+Tools and clients for working with the Dapla Metadata system.
 ## Features
-- TODO
+- Create and update metadata for datasets (Datadoc).
-## Requirements
+### Coming
-- TODO
-- Python
+- Read, create and update variable definitions.
+- Publish dataset metadata to Statistics Norway's data catalogue.
 ## Installation
@@ -90,7 +79,7 @@ pip install dapla-toolbelt-metadata
 ## Usage
-Please see the [Reference Guide] for details.
+Instructions and examples may be found in the [Dapla Manual](https://manual.dapla.ssb.no/statistikkere/). Please see the [Reference Guide] for API documentation.
 ## Contributing

{dapla_toolbelt_metadata-0.1.2 → dapla_toolbelt_metadata-0.2.1}/README.md RENAMED Viewed

@@ -25,14 +25,16 @@
 [black]: https://github.com/psf/black
 [poetry]: https://python-poetry.org/
+Tools and clients for working with the Dapla Metadata system.
 ## Features
-- TODO
+- Create and update metadata for datasets (Datadoc).
-## Requirements
+### Coming
-- TODO
-- Python
+- Read, create and update variable definitions.
+- Publish dataset metadata to Statistics Norway's data catalogue.
 ## Installation
@@ -44,7 +46,7 @@ pip install dapla-toolbelt-metadata
 ## Usage
-Please see the [Reference Guide] for details.
+Instructions and examples may be found in the [Dapla Manual](https://manual.dapla.ssb.no/statistikkere/). Please see the [Reference Guide] for API documentation.
 ## Contributing

{dapla_toolbelt_metadata-0.1.2 → dapla_toolbelt_metadata-0.2.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "dapla-toolbelt-metadata"
-version = "0.1.2"
+version = "0.2.1"
 description = "Dapla Toolbelt Metadata"
 authors = ["Team Metadata <metadata@ssb.no>"]
 license = "MIT"
@@ -10,19 +10,17 @@ repository = "https://github.com/statisticsnorway/dapla-toolbelt-metadata"
 documentation = "https://statisticsnorway.github.io/dapla-toolbelt-metadata"
 classifiers = ["Development Status :: 4 - Beta"]
-packages = [{ include = "dataset", from = "src" }]
+packages = [{ include = "dapla_metadata", from = "src" }]
 [tool.poetry.urls]
 Changelog = "https://github.com/statisticsnorway/dapla-toolbelt-metadata/releases"
 [tool.poetry.dependencies]
-click = ">=8.0.1"
 python = ">=3.10,<4.0"
 pyarrow = ">=8.0.0"
 pydantic = ">=2.5.2"
 pandas = ">=1.4.2"
 dapla-toolbelt = ">=1.3.3"
-gunicorn = ">=21.2.0"
 arrow = ">=1.3.0"
 python-dotenv = ">=1.0.1"
 requests = ">=2.31.0"
@@ -31,27 +29,17 @@ cloudpathlib = { extras = ["gs"], version = ">=0.17.0" }
 pyjwt = ">=2.8.0"
 ssb-klass-python = ">=0.0.9"
 ssb-datadoc-model = "^6.0.0"
-faker = "^26.1.0"
-pygments = "^2.18.0"
-black = "^24.8.0"
-coverage = "^7.6.1"
-furo = "^2024.7.18"
-pre-commit = "^3.8.0"
-ruff = "^0.5.6"
-pytest = "^8.3.2"
-pytest-mock = "^3.14.0"
-requests-mock = "^1.12.1"
-bs4 = "^0.0.2"
-types-beautifulsoup4 = "^4.12.0.20240511"
+typing-extensions = ">=4.12.2"
 [tool.poetry.group.dev.dependencies]
-pygments = ">=2.10.0"
-black = ">=22.3.0"
+black = ">=24.8.0"
+pygments = ">=2.18.0"
 coverage = { extras = ["toml"], version = ">=6.2" }
-furo = ">=2021.11.12"
-pre-commit = "*"
-ruff = ">=0.0.284"
-pytest = ">=7.1.2"
+faker = ">=26.1.0"
+furo = ">=2024.7.18"
+pre-commit = ">=3.8.0"
+ruff = ">=0.5.6"
+pytest = ">=8.3.2"
 sphinx = ">=6.2.1"
 sphinx-autobuild = ">=2021.3.14"
 sphinx-autodoc-typehints = ">=1.24.0"
@@ -63,31 +51,29 @@ mypy = ">=0.950"
 pytest-cov = ">=3.0.0"
 nbstripout = ">=0.5.0"
 python-kacl = "*"
-pytest-mock = "*"
+pytest-mock = ">=3.14.0"
 deptry = ">=0.12.0"
 pandas-stubs = "*"
 pyarrow-stubs = ">=10.0.1.9"
+requests-mock = ">=1.12.1"
 types-Pygments = "*"
 types-colorama = "*"
 types-setuptools = "*"
-types-beautifulsoup4 = "*"
-requests-mock = "*"
-faker = "*"
+types-beautifulsoup4 = ">=4.12.0.20240511"
+ipykernel = "^6.29.5"
 [tool.pytest.ini_options]
 pythonpath = ["src"]
 [tool.coverage.paths]
 source = ["src", "*/site-packages"]
 tests = ["tests", "*/tests"]
 [tool.coverage.run]
 branch = true
-source = ["dataset"]
-omit = [
-    "tests/*",
-    "__init__.py",
-]
+source = ["dapla_metadata"]
+omit = ["tests/*", "__init__.py"]
 relative_files = true
 [tool.coverage.report]
@@ -95,9 +81,7 @@ show_missing = true
 fail_under = 80
 [tool.mypy]
-plugins = [
-    "pydantic.mypy"
-]
+plugins = ["pydantic.mypy"]
 strict = false
 warn_unreachable = true
 pretty = true
@@ -129,10 +113,10 @@ disable_error_code = [
 ]
 [tool.ruff]
-force-exclude = true  # Apply excludes to pre-commit
+force-exclude = true # Apply excludes to pre-commit
 show-fixes = true
 src = ["src", "tests"]
-target-version = "py310"  # Minimum Python version supported
+target-version = "py310" # Minimum Python version supported
 include = ["*.py", "*.pyi", "**/pyproject.toml", "*.ipynb"]
 extend-exclude = [
     "__pycache__",
@@ -162,26 +146,31 @@ force-single-line = true
 max-complexity = 15
 [tool.ruff.lint.pydocstyle]
-convention = "google"  # You can also use "numpy".
+convention = "google" # You can also use "numpy".
 [tool.ruff.lint.pep8-naming]
-classmethod-decorators = ["classmethod", "validator", "root_validator", "pydantic.validator"]
+classmethod-decorators = [
+    "classmethod",
+    "validator",
+    "root_validator",
+    "pydantic.validator",
+]
 [tool.ruff.lint.per-file-ignores]
 "*/__init__.py" = ["F401"]
 "**/tests/*" = [
-    "ANN001",  # type annotations don't add value for test functions
-    "ANN002",  # type annotations don't add value for test functions
-    "ANN003",  # type annotations don't add value for test functions
-    "ANN201",  # type annotations don't add value for test functions
-    "ANN204",  # type annotations don't add value for test functions
-    "ANN205",  # type annotations don't add value for test functions
-    "ANN206",  # type annotations don't add value for test functions
-    "D100",    # docstrings are overkill for test functions
+    "ANN001", # type annotations don't add value for test functions
+    "ANN002", # type annotations don't add value for test functions
+    "ANN003", # type annotations don't add value for test functions
+    "ANN201", # type annotations don't add value for test functions
+    "ANN204", # type annotations don't add value for test functions
+    "ANN205", # type annotations don't add value for test functions
+    "ANN206", # type annotations don't add value for test functions
+    "D100",   # docstrings are overkill for test functions
     "D101",
     "D102",
     "D103",
-    "S101",    # asserts are encouraged in pytest
+    "S101",   # asserts are encouraged in pytest
 ]
 [build-system]

dapla_toolbelt_metadata-0.2.1/src/dapla_metadata/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Tools and clients for working with the Dapla Metadata system."""
+import datadoc_model.model as datadoc_model
+from . import datasets

{dapla_toolbelt_metadata-0.1.2/src/dataset → dapla_toolbelt_metadata-0.2.1/src/dapla_metadata/datasets}/code_list.py RENAMED Viewed

@@ -4,8 +4,8 @@ import logging
 from dataclasses import dataclass
 from typing import TYPE_CHECKING
-from dataset.external_sources.external_sources import GetExternalSource
-from dataset.utility.enums import SupportedLanguages
+from dapla_metadata.datasets.external_sources.external_sources import GetExternalSource
+from dapla_metadata.datasets.utility.enums import SupportedLanguages
 if TYPE_CHECKING:
     from concurrent.futures import ThreadPoolExecutor

{dapla_toolbelt_metadata-0.1.2/src/dataset → dapla_toolbelt_metadata-0.2.1/src/dapla_metadata/datasets}/config.py RENAMED Viewed

@@ -10,10 +10,11 @@ from pprint import pformat
 from dotenv import dotenv_values
 from dotenv import load_dotenv
-from dataset.utility.enums import DaplaRegion
-from dataset.utility.enums import DaplaService
-logging.basicConfig(level=logging.DEBUG, force=True)
+from dapla_metadata.datasets.utility.constants import (
+    DATADOC_STATISTICAL_SUBJECT_SOURCE_URL,
+)
+from dapla_metadata.datasets.utility.enums import DaplaRegion
+from dapla_metadata.datasets.utility.enums import DaplaService
 logger = logging.getLogger(__name__)
@@ -52,7 +53,10 @@ def get_jupyterhub_user() -> str | None:
 def get_statistical_subject_source_url() -> str | None:
     """Get the URL to the statistical subject source."""
-    return _get_config_item("DATADOC_STATISTICAL_SUBJECT_SOURCE_URL")
+    return (
+        _get_config_item("DATADOC_STATISTICAL_SUBJECT_SOURCE_URL")
+        or DATADOC_STATISTICAL_SUBJECT_SOURCE_URL
+    )
 def get_dapla_region() -> DaplaRegion | None:

{dapla_toolbelt_metadata-0.1.2/src/dataset → dapla_toolbelt_metadata-0.2.1/src/dapla_metadata/datasets}/core.py RENAMED Viewed

@@ -13,28 +13,37 @@ from typing import TYPE_CHECKING
 from datadoc_model import model
 from datadoc_model.model import DataSetStatus
-from dataset import config
-from dataset import user_info
-from dataset.dapla_dataset_path_info import DaplaDatasetPathInfo
-from dataset.dataset_parser import DatasetParser
-from dataset.model_backwards_compatibility import is_metadata_in_container_structure
-from dataset.model_backwards_compatibility import upgrade_metadata
-from dataset.model_validation import ValidateDatadocMetadata
-from dataset.statistic_subject_mapping import StatisticSubjectMapping
-from dataset.utility.constants import DATASET_FIELDS_FROM_EXISTING_METADATA
-from dataset.utility.constants import DEFAULT_SPATIAL_COVERAGE_DESCRIPTION
-from dataset.utility.constants import INCONSISTENCIES_MESSAGE
-from dataset.utility.constants import METADATA_DOCUMENT_FILE_SUFFIX
-from dataset.utility.constants import NUM_OBLIGATORY_DATASET_FIELDS
-from dataset.utility.constants import NUM_OBLIGATORY_VARIABLES_FIELDS
-from dataset.utility.utils import calculate_percentage
-from dataset.utility.utils import derive_assessment_from_state
-from dataset.utility.utils import get_timestamp_now
-from dataset.utility.utils import normalize_path
-from dataset.utility.utils import num_obligatory_dataset_fields_completed
-from dataset.utility.utils import num_obligatory_variables_fields_completed
-from dataset.utility.utils import set_default_values_dataset
-from dataset.utility.utils import set_default_values_variables
+from dapla_metadata.datasets import config
+from dapla_metadata.datasets import user_info
+from dapla_metadata.datasets.dapla_dataset_path_info import DaplaDatasetPathInfo
+from dapla_metadata.datasets.dataset_parser import DatasetParser
+from dapla_metadata.datasets.model_backwards_compatibility import (
+    is_metadata_in_container_structure,
+)
+from dapla_metadata.datasets.model_backwards_compatibility import upgrade_metadata
+from dapla_metadata.datasets.model_validation import ValidateDatadocMetadata
+from dapla_metadata.datasets.statistic_subject_mapping import StatisticSubjectMapping
+from dapla_metadata.datasets.utility.constants import (
+    DEFAULT_SPATIAL_COVERAGE_DESCRIPTION,
+)
+from dapla_metadata.datasets.utility.constants import INCONSISTENCIES_MESSAGE
+from dapla_metadata.datasets.utility.constants import METADATA_DOCUMENT_FILE_SUFFIX
+from dapla_metadata.datasets.utility.constants import NUM_OBLIGATORY_DATASET_FIELDS
+from dapla_metadata.datasets.utility.constants import NUM_OBLIGATORY_VARIABLES_FIELDS
+from dapla_metadata.datasets.utility.utils import calculate_percentage
+from dapla_metadata.datasets.utility.utils import derive_assessment_from_state
+from dapla_metadata.datasets.utility.utils import get_timestamp_now
+from dapla_metadata.datasets.utility.utils import merge_variables
+from dapla_metadata.datasets.utility.utils import normalize_path
+from dapla_metadata.datasets.utility.utils import (
+    num_obligatory_dataset_fields_completed,
+)
+from dapla_metadata.datasets.utility.utils import (
+    num_obligatory_variables_fields_completed,
+)
+from dapla_metadata.datasets.utility.utils import override_dataset_fields
+from dapla_metadata.datasets.utility.utils import set_default_values_dataset
+from dapla_metadata.datasets.utility.utils import set_default_values_variables
 if TYPE_CHECKING:
     import pathlib
@@ -138,10 +147,11 @@ class Datadoc:
         """
         extracted_metadata: model.DatadocMetadata | None = None
         existing_metadata: model.DatadocMetadata | None = None
-        if self.metadata_document is not None and self.metadata_document.exists():
+        if self.metadata_document and self.metadata_document.exists():
             existing_metadata = self._extract_metadata_from_existing_document(
                 self.metadata_document,
             )
         if (
             self.dataset_path is not None
             and self.dataset == model.Dataset()
@@ -157,14 +167,7 @@ class Datadoc:
             and extracted_metadata is not None
             and existing_metadata is not None
         ):
-            if (
-                extracted_metadata.dataset is not None
-                and extracted_metadata.dataset.file_path is not None
-            ):
-                existing_file_path = extracted_metadata.dataset.file_path
-            else:
-                msg = "Could not access existing dataset file path"
-                raise ValueError(msg)
+            existing_file_path = self._get_existing_file_path(extracted_metadata)
             self._check_ready_to_merge(
                 self.dataset_path,
                 Path(existing_file_path),
@@ -181,31 +184,39 @@ class Datadoc:
             self.metadata_document = self.build_metadata_document_path(
                 self.dataset_path,
             )
-            if merged_metadata.dataset and merged_metadata.variables:
-                self.dataset = merged_metadata.dataset
-                self.variables = merged_metadata.variables
-            else:
-                msg = "Could not read metadata"
-                raise ValueError(msg)
-        elif (
-            existing_metadata
-            and existing_metadata.dataset
-            and existing_metadata.variables
+            self._set_metadata(merged_metadata)
+        else:
+            self._set_metadata(existing_metadata or extracted_metadata)
+        set_default_values_variables(self.variables)
+        set_default_values_dataset(self.dataset)
+        self._create_variables_lookup()
+    def _get_existing_file_path(
+        self,
+        extracted_metadata: model.DatadocMetadata | None,
+    ) -> str:
+        if (
+            extracted_metadata is not None
+            and extracted_metadata.dataset is not None
+            and extracted_metadata.dataset.file_path is not None
         ):
-            self.dataset = existing_metadata.dataset
-            self.variables = existing_metadata.variables
-        elif (
-            extracted_metadata
-            and extracted_metadata.dataset
-            and extracted_metadata.variables
+            return extracted_metadata.dataset.file_path
+        msg = "Could not access existing dataset file path"
+        raise ValueError(msg)
+    def _set_metadata(
+        self,
+        merged_metadata: model.DatadocMetadata | None,
+    ) -> None:
+        if not merged_metadata or not (
+            merged_metadata.dataset and merged_metadata.variables
         ):
-            self.dataset = extracted_metadata.dataset
-            self.variables = extracted_metadata.variables
-        else:
             msg = "Could not read metadata"
             raise ValueError(msg)
-        set_default_values_variables(self.variables)
-        set_default_values_dataset(self.dataset)
+        self.dataset = merged_metadata.dataset
+        self.variables = merged_metadata.variables
+    def _create_variables_lookup(self) -> None:
         self.variables_lookup = {
             v.short_name: v for v in self.variables if v.short_name
         }
@@ -300,55 +311,28 @@ class Datadoc:
                 "No existing metadata found, no merge to perform. Continuing with extracted metadata.",
             )
             return extracted_metadata or model.DatadocMetadata()
         if not extracted_metadata:
             return existing_metadata
         # Use the extracted metadata as a base
         merged_metadata = model.DatadocMetadata(
             dataset=copy.deepcopy(extracted_metadata.dataset),
             variables=[],
         )
-        if (
-            merged_metadata.dataset is not None
-            and existing_metadata.dataset is not None
-        ):
-            # Override the fields as defined
-            for field in DATASET_FIELDS_FROM_EXISTING_METADATA:
-                setattr(
-                    merged_metadata.dataset,
-                    field,
-                    getattr(existing_metadata.dataset, field),
-                )
+        override_dataset_fields(
+            merged_metadata=merged_metadata,
+            existing_metadata=existing_metadata,
+        )
         # Merge variables.
         # For each extracted variable, copy existing metadata into the merged metadata
-        if (
-            existing_metadata.variables is not None
-            and extracted_metadata is not None
-            and extracted_metadata.variables is not None
-            and merged_metadata.variables is not None
-        ):
-            for extracted in extracted_metadata.variables:
-                existing = next(
-                    (
-                        existing
-                        for existing in existing_metadata.variables
-                        if existing.short_name == extracted.short_name
-                    ),
-                    None,
-                )
-                if existing:
-                    existing.id = None  # Set to None so that it will be set assigned a fresh ID later
-                    existing.contains_data_from = (
-                        extracted.contains_data_from or existing.contains_data_from
-                    )
-                    existing.contains_data_until = (
-                        extracted.contains_data_until or existing.contains_data_until
-                    )
-                    merged_metadata.variables.append(existing)
-                else:
-                    # If there is no existing metadata for this variable, we just use what we have extracted
-                    merged_metadata.variables.append(extracted)
-        return merged_metadata
+        return merge_variables(
+            existing_metadata=existing_metadata,
+            extracted_metadata=extracted_metadata,
+            merged_metadata=merged_metadata,
+        )
     def _extract_metadata_from_existing_document(
         self,

{dapla_toolbelt_metadata-0.1.2/src/dataset → dapla_toolbelt_metadata-0.2.1/src/dapla_metadata/datasets}/dataset_parser.py RENAMED Viewed

@@ -19,7 +19,7 @@ from datadoc_model.model import LanguageStringTypeItem
 from datadoc_model.model import Variable
 from pyarrow import parquet as pq
-from dataset.utility.enums import SupportedLanguages
+from dapla_metadata.datasets.utility.enums import SupportedLanguages
 if TYPE_CHECKING:
     import pyarrow as pa

{dapla_toolbelt_metadata-0.1.2/src/dataset → dapla_toolbelt_metadata-0.2.1/src/dapla_metadata/datasets}/model_validation.py RENAMED Viewed

@@ -11,17 +11,23 @@ from datadoc_model import model
 from pydantic import model_validator
 from typing_extensions import Self
-from dataset.utility.constants import DATE_VALIDATION_MESSAGE
-from dataset.utility.constants import NUM_OBLIGATORY_DATASET_FIELDS
-from dataset.utility.constants import NUM_OBLIGATORY_VARIABLES_FIELDS
-from dataset.utility.constants import OBLIGATORY_METADATA_WARNING
-from dataset.utility.utils import get_missing_obligatory_dataset_fields
-from dataset.utility.utils import get_missing_obligatory_variables_fields
-from dataset.utility.utils import get_timestamp_now
-from dataset.utility.utils import incorrect_date_order
-from dataset.utility.utils import num_obligatory_dataset_fields_completed
-from dataset.utility.utils import num_obligatory_variables_fields_completed
-from dataset.utility.utils import set_variables_inherit_from_dataset
+from dapla_metadata.datasets.utility.constants import DATE_VALIDATION_MESSAGE
+from dapla_metadata.datasets.utility.constants import NUM_OBLIGATORY_DATASET_FIELDS
+from dapla_metadata.datasets.utility.constants import NUM_OBLIGATORY_VARIABLES_FIELDS
+from dapla_metadata.datasets.utility.constants import OBLIGATORY_METADATA_WARNING
+from dapla_metadata.datasets.utility.utils import get_missing_obligatory_dataset_fields
+from dapla_metadata.datasets.utility.utils import (
+    get_missing_obligatory_variables_fields,
+)
+from dapla_metadata.datasets.utility.utils import get_timestamp_now
+from dapla_metadata.datasets.utility.utils import incorrect_date_order
+from dapla_metadata.datasets.utility.utils import (
+    num_obligatory_dataset_fields_completed,
+)
+from dapla_metadata.datasets.utility.utils import (
+    num_obligatory_variables_fields_completed,
+)
+from dapla_metadata.datasets.utility.utils import set_variables_inherit_from_dataset
 if TYPE_CHECKING:
     from datetime import datetime

{dapla_toolbelt_metadata-0.1.2/src/dataset → dapla_toolbelt_metadata-0.2.1/src/dapla_metadata/datasets}/statistic_subject_mapping.py RENAMED Viewed

@@ -9,8 +9,8 @@ import requests
 from bs4 import BeautifulSoup
 from bs4 import ResultSet
-from dataset.external_sources.external_sources import GetExternalSource
-from dataset.utility.enums import SupportedLanguages
+from dapla_metadata.datasets.external_sources.external_sources import GetExternalSource
+from dapla_metadata.datasets.utility.enums import SupportedLanguages
 if TYPE_CHECKING:
     from concurrent.futures import ThreadPoolExecutor
@@ -116,17 +116,18 @@ class StatisticSubjectMapping(GetExternalSource):
         Returns a BeautifulSoup ResultSet.
         """
+        if not self.source_url:
+            logger.debug("No statistic subject url supplied")
+            return None
         try:
-            url = str(self.source_url)
-            response = requests.get(url, timeout=30)
+            response = requests.get(str(self.source_url), timeout=30)
             response.encoding = "utf-8"
-            logger.debug("Got response %s from %s", response, url)
+            logger.debug("Got response %s from %s", response, self.source_url)
             soup = BeautifulSoup(response.text, features="xml")
             return soup.find_all("hovedemne")
         except requests.exceptions.RequestException:
-            logger.exception(
-                "Exception while fetching statistical structure ",
-            )
+            logger.exception("Exception while fetching statistical structure")
             return None
     def _parse_statistic_subject_structure_xml(

{dapla_toolbelt_metadata-0.1.2/src/dataset → dapla_toolbelt_metadata-0.2.1/src/dapla_metadata/datasets}/user_info.py RENAMED Viewed

@@ -6,9 +6,9 @@ from typing import Protocol
 import jwt
-from dataset import config
-from dataset.utility.enums import DaplaRegion
-from dataset.utility.enums import DaplaService
+from dapla_metadata.datasets import config
+from dapla_metadata.datasets.utility.enums import DaplaRegion
+from dapla_metadata.datasets.utility.enums import DaplaService
 logger = logging.getLogger(__name__)

{dapla_toolbelt_metadata-0.1.2/src/dataset → dapla_toolbelt_metadata-0.2.1/src/dapla_metadata/datasets}/utility/constants.py RENAMED Viewed

@@ -90,3 +90,7 @@ DATASET_FIELDS_FROM_EXISTING_METADATA = [
 ]
 METADATA_DOCUMENT_FILE_SUFFIX = "__DOC.json"
+DATADOC_STATISTICAL_SUBJECT_SOURCE_URL = (
+    "https://www.ssb.no/xp/_/service/mimir/subjectStructurStatistics"
+)

{dapla_toolbelt_metadata-0.1.2/src/dataset → dapla_toolbelt_metadata-0.2.1/src/dapla_metadata/datasets}/utility/utils.py RENAMED Viewed

@@ -14,13 +14,20 @@ from datadoc_model.model import Assessment
 from datadoc_model.model import DataSetState
 from datadoc_model.model import VariableRole
-from dataset.utility.constants import NUM_OBLIGATORY_VARIABLES_FIELDS
-from dataset.utility.constants import OBLIGATORY_DATASET_METADATA_IDENTIFIERS
-from dataset.utility.constants import (
+from dapla_metadata.datasets.utility.constants import (
+    DATASET_FIELDS_FROM_EXISTING_METADATA,
+)
+from dapla_metadata.datasets.utility.constants import NUM_OBLIGATORY_VARIABLES_FIELDS
+from dapla_metadata.datasets.utility.constants import (
+    OBLIGATORY_DATASET_METADATA_IDENTIFIERS,
+)
+from dapla_metadata.datasets.utility.constants import (
     OBLIGATORY_DATASET_METADATA_IDENTIFIERS_MULTILANGUAGE,
 )
-from dataset.utility.constants import OBLIGATORY_VARIABLES_METADATA_IDENTIFIERS
-from dataset.utility.constants import (
+from dapla_metadata.datasets.utility.constants import (
+    OBLIGATORY_VARIABLES_METADATA_IDENTIFIERS,
+)
+from dapla_metadata.datasets.utility.constants import (
     OBLIGATORY_VARIABLES_METADATA_IDENTIFIERS_MULTILANGUAGE,
 )
@@ -403,3 +410,83 @@ def running_in_notebook() -> bool:
         # interpreters and will throw a NameError. Therefore we're not running
         # in Jupyter.
         return False
+def override_dataset_fields(
+    merged_metadata: model.DatadocMetadata,
+    existing_metadata: model.DatadocMetadata,
+) -> None:
+    """Overrides specific fields in the dataset of `merged_metadata` with values from the dataset of `existing_metadata`.
+    This function iterates over a predefined list of fields, `DATASET_FIELDS_FROM_EXISTING_METADATA`,
+    and sets the corresponding fields in the `merged_metadata.dataset` object to the values
+    from the `existing_metadata.dataset` object.
+    Args:
+        merged_metadata: An instance of `DatadocMetadata` containing the dataset to be updated.
+        existing_metadata: An instance of `DatadocMetadata` containing the dataset whose values are used to update `merged_metadata.dataset`.
+    Returns:
+        `None`.
+    """
+    if merged_metadata.dataset and existing_metadata.dataset:
+        # Override the fields as defined
+        for field in DATASET_FIELDS_FROM_EXISTING_METADATA:
+            setattr(
+                merged_metadata.dataset,
+                field,
+                getattr(existing_metadata.dataset, field),
+            )
+def merge_variables(
+    existing_metadata: model.DatadocMetadata,
+    extracted_metadata: model.DatadocMetadata,
+    merged_metadata: model.DatadocMetadata,
+) -> model.DatadocMetadata:
+    """Merges variables from the extracted metadata into the existing metadata and updates the merged metadata.
+    This function compares the variables from `extracted_metadata` with those in `existing_metadata`.
+    For each variable in `extracted_metadata`, it checks if a variable with the same `short_name` exists
+    in `existing_metadata`. If a match is found, it updates the existing variable with information from
+    `extracted_metadata`. If no match is found, the variable from `extracted_metadata` is directly added to `merged_metadata`.
+    Args:
+        existing_metadata: The metadata object containing the current state of variables.
+        extracted_metadata: The metadata object containing new or updated variables to merge.
+        merged_metadata: The metadata object that will contain the result of the merge.
+    Returns:
+        model.DatadocMetadata: The `merged_metadata` object containing variables from both `existing_metadata`
+        and `extracted_metadata`.
+    """
+    if (
+        existing_metadata.variables is not None
+        and extracted_metadata is not None
+        and extracted_metadata.variables is not None
+        and merged_metadata.variables is not None
+    ):
+        for extracted in extracted_metadata.variables:
+            existing = next(
+                (
+                    existing
+                    for existing in existing_metadata.variables
+                    if existing.short_name == extracted.short_name
+                ),
+                None,
+            )
+            if existing:
+                existing.id = (
+                    None  # Set to None so that it will be set assigned a fresh ID later
+                )
+                existing.contains_data_from = (
+                    extracted.contains_data_from or existing.contains_data_from
+                )
+                existing.contains_data_until = (
+                    extracted.contains_data_until or existing.contains_data_until
+                )
+                merged_metadata.variables.append(existing)
+            else:
+                # If there is no existing metadata for this variable, we just use what we have extracted
+                merged_metadata.variables.append(extracted)
+    return merged_metadata