PyPI - invenio-vocabularies - Versions diffs - 4.0.0__py2.py3-none-any.whl → 4.1.1__py2.py3-none-any.whl - Mend

invenio-vocabularies 4.0.0py2.py3-none-any.whl → 4.1.1py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of invenio-vocabularies might be problematic. Click here for more details.

Files changed (63) hide show

invenio_vocabularies/contrib/common/ror/datastreams.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 #
 # Copyright (C) 2024 CERN.
+# Copyright (C) 2024 California Institute of Technology.
 #
 # Invenio-Vocabularies is free software; you can redistribute it and/or
 # modify it under the terms of the MIT License; see LICENSE file for more
@@ -11,9 +12,11 @@
 import io
 import requests
+from idutils import normalize_ror
-from invenio_vocabularies.datastreams.errors import ReaderError
+from invenio_vocabularies.datastreams.errors import ReaderError, TransformerError
 from invenio_vocabularies.datastreams.readers import BaseReader
+from invenio_vocabularies.datastreams.transformers import BaseTransformer
 class RORHTTPReader(BaseReader):
@@ -64,3 +67,100 @@ class RORHTTPReader(BaseReader):
 VOCABULARIES_DATASTREAM_READERS = {
     "ror-http": RORHTTPReader,
 }
+class RORTransformer(BaseTransformer):
+    """Transforms a JSON ROR record into a funders record."""
+    def __init__(
+        self, *args, vocab_schemes=None, funder_fundref_doi_prefix=None, **kwargs
+    ):
+        """Initializes the transformer."""
+        self.vocab_schemes = vocab_schemes
+        self.funder_fundref_doi_prefix = funder_fundref_doi_prefix
+        super().__init__(*args, **kwargs)
+    def apply(self, stream_entry, **kwargs):
+        """Applies the transformation to the stream entry."""
+        record = stream_entry.entry
+        ror = {}
+        ror["title"] = {}
+        ror["id"] = normalize_ror(record.get("id"))
+        if not ror["id"]:
+            raise TransformerError(_("Id not found in ROR entry."))
+        # Using set so aliases are unique
+        aliases = set()
+        acronym = None
+        for name in record.get("names"):
+            lang = name.get("lang", "en")
+            if lang == None:
+                lang = "en"
+            if "ror_display" in name["types"]:
+                ror["name"] = name["value"]
+            if "label" in name["types"]:
+                ror["title"][lang] = name["value"]
+            if "alias" in name["types"]:
+                aliases.add(name["value"])
+            if "acronym" in name["types"]:
+                # The first acronyn goes in acronym field to maintain
+                # compatability with existing data structure
+                if not acronym:
+                    acronym = name["value"]
+                else:
+                    aliases.add(name["value"])
+        if acronym:
+            ror["acronym"] = acronym
+        if aliases:
+            ror["aliases"] = list(aliases)
+        # ror_display is required and should be in every entry
+        if not ror["name"]:
+            raise TransformerError(
+                _("Name with type ror_display not found in ROR entry.")
+            )
+        # This only gets the first location, to maintain compatability
+        # with existing data structure
+        location = record.get("locations", [{}])[0].get("geonames_details", {})
+        ror["country"] = location.get("country_code")
+        ror["country_name"] = location.get("country_name")
+        ror["location_name"] = location.get("name")
+        ror["types"] = record.get("types")
+        status = record.get("status")
+        ror["status"] = status
+        # The ROR is always listed in identifiers, expected by serialization
+        ror["identifiers"] = [{"identifier": ror["id"], "scheme": "ror"}]
+        if self.vocab_schemes:
+            valid_schemes = set(self.vocab_schemes.keys())
+        else:
+            valid_schemes = set()
+        fund_ref = "fundref"
+        if self.funder_fundref_doi_prefix:
+            valid_schemes.add(fund_ref)
+        for identifier in record.get("external_ids"):
+            scheme = identifier["type"]
+            if scheme in valid_schemes:
+                value = identifier.get("preferred") or identifier.get("all")[0]
+                if scheme == fund_ref:
+                    if self.funder_fundref_doi_prefix:
+                        value = f"{self.funder_fundref_doi_prefix}/{value}"
+                        scheme = "doi"
+                ror["identifiers"].append(
+                    {
+                        "identifier": value,
+                        "scheme": scheme,
+                    }
+                )
+        stream_entry.entry = ror
+        return stream_entry
+VOCABULARIES_DATASTREAM_TRANSFORMERS = {
+    "ror": RORTransformer,
+}

invenio_vocabularies/contrib/funders/datastreams.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 #
-# Copyright (C) 2022 CERN.
+# Copyright (C) 2022-2024 CERN.
 # Copyright (C) 2024 California Institute of Technology.
 #
 # Invenio-Vocabularies is free software; you can redistribute it and/or
@@ -13,8 +13,6 @@ from idutils import normalize_ror
 from invenio_access.permissions import system_identity
 from invenio_i18n import lazy_gettext as _
-from ...datastreams.errors import TransformerError
-from ...datastreams.transformers import BaseTransformer
 from ...datastreams.writers import ServiceWriter
 from .config import funder_fundref_doi_prefix, funder_schemes
@@ -32,94 +30,6 @@ class FundersServiceWriter(ServiceWriter):
         return entry["id"]
-class RORTransformer(BaseTransformer):
-    """Transforms a JSON ROR record into a funders record."""
-    def apply(self, stream_entry, **kwargs):
-        """Applies the transformation to the stream entry."""
-        record = stream_entry.entry
-        funder = {}
-        funder["title"] = {}
-        funder["id"] = normalize_ror(record.get("id"))
-        if not funder["id"]:
-            raise TransformerError(_("Id not found in ROR entry."))
-        aliases = []
-        acronym = None
-        for name in record.get("names"):
-            # Some name entries have a `lang` key with a `None` value.
-            # Therefore, providing a default value to `name.get("lang")` is not enough,
-            # and we need instead to check if the result of `get` is None.
-            lang = name.get("lang")
-            if lang is None:
-                lang = "en"
-            if "ror_display" in name["types"]:
-                funder["name"] = name["value"]
-            if "label" in name["types"]:
-                funder["title"][lang] = name["value"]
-            if "alias" in name["types"]:
-                aliases.append(name["value"])
-            if "acronym" in name["types"]:
-                # The first acronyn goes in acronym field to maintain
-                # compatability with existing data structure
-                if not acronym:
-                    acronym = name["value"]
-                else:
-                    aliases.append(name["value"])
-        if acronym:
-            funder["acronym"] = acronym
-        if aliases:
-            funder["aliases"] = aliases
-        # ror_display is required and should be in every entry
-        if not funder["name"]:
-            raise TransformerError(
-                _("Name with type ror_display not found in ROR entry.")
-            )
-        # This only gets the first location, to maintain compatability
-        # with existing data structure
-        location = record.get("locations", [{}])[0].get("geonames_details", {})
-        funder["country"] = location.get("country_code")
-        funder["country_name"] = location.get("country_name")
-        funder["location_name"] = location.get("name")
-        funder["types"] = record.get("types")
-        status = record.get("status")
-        funder["status"] = status
-        # The ROR is always listed in identifiers, expected by serialization
-        funder["identifiers"] = [{"identifier": funder["id"], "scheme": "ror"}]
-        valid_schemes = set(funder_schemes.keys())
-        fund_ref = "fundref"
-        valid_schemes.add(fund_ref)
-        for identifier in record.get("external_ids"):
-            scheme = identifier["type"]
-            if scheme in valid_schemes:
-                value = identifier.get("preferred") or identifier.get("all")[0]
-                if scheme == fund_ref:
-                    value = f"{funder_fundref_doi_prefix}/{value}"
-                    scheme = "doi"
-                funder["identifiers"].append(
-                    {
-                        "identifier": value,
-                        "scheme": scheme,
-                    }
-                )
-        stream_entry.entry = funder
-        return stream_entry
-VOCABULARIES_DATASTREAM_TRANSFORMERS = {
-    "ror-funder": RORTransformer,
-}
-"""ROR Data Streams transformers."""
 VOCABULARIES_DATASTREAM_WRITERS = {
     "funders-service": FundersServiceWriter,
 }
@@ -137,7 +47,13 @@ DATASTREAM_CONFIG = {
         {"type": "json"},
     ],
     "transformers": [
-        {"type": "ror-funder"},
+        {
+            "type": "ror",
+            "args": {
+                "vocab_schemes": funder_schemes,
+                "funder_fundref_doi_prefix": funder_fundref_doi_prefix,
+            },
+        },
     ],
     "writers": [
         {

invenio_vocabularies/contrib/funders/jsonschemas/funders/funder-v1.0.0.json CHANGED Viewed

@@ -7,6 +7,9 @@
     "$schema": {
       "$ref": "local://definitions-v1.0.0.json#/$schema"
     },
+    "tags": {
+      "$ref": "local://vocabularies/definitions-v1.0.0.json#/tags"
+    },
     "country": {
       "type": "string",
       "description": "Represents a funder's origin country as a country code."

invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v1.0.0.json CHANGED Viewed

@@ -81,6 +81,9 @@
       "title": {
         "type": "object",
         "dynamic": "true"
+      },
+      "tags": {
+        "type": "keyword"
       }
     }
   }

invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v1.0.0.json CHANGED Viewed

@@ -81,6 +81,9 @@
       "title": {
         "type": "object",
         "dynamic": "true"
+      },
+      "tags": {
+        "type": "keyword"
       }
     }
   }

invenio_vocabularies/contrib/funders/mappings/v7/funders/funder-v1.0.0.json CHANGED Viewed

@@ -81,6 +81,9 @@
       "title": {
         "type": "object",
         "dynamic": "true"
+      },
+      "tags": {
+        "type": "keyword"
       }
     }
   }

invenio_vocabularies/contrib/funders/serializer.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 #
-# Copyright (C) 2022 CERN.
+# Copyright (C) 2022-2024 CERN.
 #
 # Invenio-Vocabularies is free software; you can redistribute it and/or
 # modify it under the terms of the MIT License; see LICENSE file for more
@@ -29,4 +29,5 @@ class FunderL10NItemSchema(Schema):
     props = fields.Dict(dump_only=True)
     name = fields.String(dump_only=True)
     country = fields.String(dump_only=True)
+    country_name = fields.String(dump_only=True)
     identifiers = fields.List(fields.Nested(IdentifierSchema), dump_only=True)

invenio_vocabularies/contrib/names/jsonschemas/names/name-v1.0.0.json CHANGED Viewed

@@ -8,6 +8,9 @@
     "$schema": {
       "$ref": "local://definitions-v1.0.0.json#/$schema"
     },
+    "tags": {
+      "$ref": "local://vocabularies/definitions-v1.0.0.json#/tags"
+    },
     "scheme": {
       "description": "Identifier of the name scheme.",
       "$ref": "local://definitions-v1.0.0.json#/identifier"

invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v1.0.0.json CHANGED Viewed

@@ -24,6 +24,9 @@
       "id": {
         "type": "keyword"
       },
+      "tags": {
+        "type": "keyword"
+      },
       "name_sort": {
         "type": "keyword"
       },

invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v1.0.0.json CHANGED Viewed

@@ -24,6 +24,9 @@
       "id": {
         "type": "keyword"
       },
+      "tags": {
+        "type": "keyword"
+      },
       "name_sort": {
         "type": "keyword"
       },

invenio_vocabularies/contrib/names/mappings/v7/names/name-v1.0.0.json CHANGED Viewed

@@ -24,6 +24,9 @@
       "id": {
         "type": "keyword"
       },
+      "tags": {
+        "type": "keyword"
+      },
       "name_sort": {
         "type": "keyword"
       },

invenio_vocabularies/contrib/subjects/jsonschemas/subjects/subject-v1.0.0.json CHANGED Viewed

@@ -8,6 +8,9 @@
     "$schema": {
       "$ref": "local://definitions-v1.0.0.json#/$schema"
     },
+    "tags": {
+      "$ref": "local://vocabularies/definitions-v1.0.0.json#/tags"
+    },
     "id": {
       "description": "URI or classification code as identifier - globally unique among all subject schemes.",
       "$ref": "local://definitions-v1.0.0.json#/identifier"

invenio_vocabularies/contrib/subjects/mappings/os-v1/subjects/subject-v1.0.0.json CHANGED Viewed

@@ -55,6 +55,9 @@
             "type": "keyword"
           }
         }
+      },
+      "tags": {
+        "type": "keyword"
       }
     }
   }

invenio_vocabularies/contrib/subjects/mappings/os-v2/subjects/subject-v1.0.0.json CHANGED Viewed

@@ -55,6 +55,9 @@
             "type": "keyword"
           }
         }
+      },
+      "tags": {
+        "type": "keyword"
       }
     }
   }

invenio_vocabularies/contrib/subjects/mappings/v7/subjects/subject-v1.0.0.json CHANGED Viewed

@@ -55,6 +55,9 @@
             "type": "keyword"
           }
         }
+      },
+      "tags": {
+        "type": "keyword"
       }
     }
   }

invenio_vocabularies/datastreams/factories.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 #
-# Copyright (C) 2021-2022 CERN.
+# Copyright (C) 2021-2024 CERN.
 #
 # Invenio-Vocabularies is free software; you can redistribute it and/or
 # modify it under the terms of the MIT License; see LICENSE file for more
@@ -36,7 +36,6 @@ class Factory:
         try:
             type_ = config["type"]
             args = config.get("args", {})
             return cls.options()[type_](**args)
         except KeyError:
             raise FactoryError(name=cls.FACTORY_NAME, key=type_)

invenio_vocabularies/datastreams/readers.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 #
 # Copyright (C) 2021-2024 CERN.
+# Copyright (C)      2024 University of Münster.
 #
 # Invenio-Vocabularies is free software; you can redistribute it and/or
 # modify it under the terms of the MIT License; see LICENSE file for more
@@ -19,11 +20,17 @@ from json.decoder import JSONDecodeError
 import requests
 import yaml
+from lxml import etree
 from lxml.html import parse as html_parse
 from .errors import ReaderError
 from .xml import etree_to_dict
+try:
+    import oaipmh_scythe
+except ImportError:
+    oaipmh_scythe = None
 class BaseReader(ABC):
     """Base reader."""
@@ -226,3 +233,80 @@ class XMLReader(BaseReader):
             raise ReaderError(f"Record not found in XML entry.")
         yield record
+class OAIPMHReader(BaseReader):
+    """OAIPMH reader."""
+    def __init__(
+        self,
+        *args,
+        base_url=None,
+        metadata_prefix=None,
+        set=None,
+        from_date=None,
+        until_date=None,
+        verb=None,
+        **kwargs,
+    ):
+        """Constructor."""
+        self._base_url = base_url
+        self._metadata_prefix = metadata_prefix if not None else "oai_dc"
+        self._set = set
+        self._until = until_date
+        self._from = from_date
+        self._verb = verb if not None else "ListRecords"
+        super().__init__(*args, **kwargs)
+    def _iter(self, scythe, *args, **kwargs):
+        """Read and parse an OAIPMH stream to dict."""
+        class OAIRecord(oaipmh_scythe.models.Record):
+            """An XML unpacking implementation for more complicated formats."""
+            def get_metadata(self):
+                """Extract and return the record's metadata as a dictionary."""
+                return xml_to_dict(
+                    self.xml.find(f".//{self._oai_namespace}metadata").getchildren()[0],
+                )
+        scythe.class_mapping["ListRecords"] = OAIRecord
+        try:
+            records = scythe.list_records(
+                from_=self._from,
+                until=self._until,
+                metadata_prefix=self._metadata_prefix,
+                set_=self._set,
+                ignore_deleted=True,
+            )
+            for record in records:
+                yield {"record": record}
+        except oaipmh_scythe.NoRecordsMatch:
+            raise ReaderError(f"No records found in OAI-PMH request.")
+    def read(self, item=None, *args, **kwargs):
+        """Reads from item or opens the file descriptor from origin."""
+        if item:
+            raise NotImplementedError(
+                "OAIPMHReader does not support being chained after another reader"
+            )
+        else:
+            with oaipmh_scythe.Scythe(self._base_url) as scythe:
+                yield from self._iter(scythe=scythe, *args, **kwargs)
+def xml_to_dict(tree: etree._Element):
+    """Convert an XML tree to a dictionary.
+    This function takes an XML element tree and converts it into a dictionary.
+    Args:
+        tree: The root element of the XML tree to be converted.
+    Returns:
+        A dictionary with the key "record".
+    """
+    dict_obj = dict()
+    dict_obj["record"] = etree.tostring(tree)
+    return dict_obj

invenio_vocabularies/datastreams/writers.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 #
-# Copyright (C) 2021-2022 CERN.
+# Copyright (C) 2021-2024 CERN.
 #
 # Invenio-Vocabularies is free software; you can redistribute it and/or
 # modify it under the terms of the MIT License; see LICENSE file for more
@@ -103,6 +103,6 @@ class YamlWriter(BaseWriter):
         with open(self._filepath, "a") as file:
             # made into array for safer append
             # will always read array (good for reader)
-            yaml.safe_dump([stream_entry.entry], file)
+            yaml.safe_dump([stream_entry.entry], file, allow_unicode=True)
         return stream_entry

invenio_vocabularies/ext.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 #
-# Copyright (C) 2020-2022 CERN.
+# Copyright (C) 2020-2024 CERN.
 # Copyright (C) 2023 Graz University of Technology.
 #
 # Invenio-Vocabularies is free software; you can redistribute it and/or
@@ -40,8 +40,14 @@ from .contrib.subjects import (
     SubjectsService,
     SubjectsServiceConfig,
 )
-from .resources.resource import VocabulariesResource
-from .services.service import VocabulariesService
+from .resources import (
+    VocabulariesAdminResource,
+    VocabulariesResource,
+    VocabulariesResourceConfig,
+    VocabularyTypeResourceConfig,
+)
+from .services.config import VocabularyTypesServiceConfig
+from .services.service import VocabulariesService, VocabularyTypeService
 class InvenioVocabularies(object):
@@ -76,6 +82,7 @@ class InvenioVocabularies(object):
             funders = FundersServiceConfig
             names = NamesServiceConfig
             subjects = SubjectsServiceConfig
+            vocabulary_types = VocabularyTypesServiceConfig
         return ServiceConfigs
@@ -93,9 +100,12 @@ class InvenioVocabularies(object):
         self.funders_service = FundersService(config=service_configs.funders)
         self.names_service = NamesService(config=service_configs.names)
         self.subjects_service = SubjectsService(config=service_configs.subjects)
-        self.service = VocabulariesService(
+        self.vocabularies_service = VocabulariesService(
             config=app.config["VOCABULARIES_SERVICE_CONFIG"],
         )
+        self.vocabulary_types_service = VocabularyTypeService(
+            config=service_configs.vocabulary_types
+        )
     def init_resource(self, app):
         """Initialize vocabulary resources."""
@@ -121,9 +131,13 @@ class InvenioVocabularies(object):
             config=SubjectsResourceConfig,
         )
         self.resource = VocabulariesResource(
-            service=self.service,
+            service=self.vocabularies_service,
             config=app.config["VOCABULARIES_RESOURCE_CONFIG"],
         )
+        self.vocabulary_admin_resource = VocabulariesAdminResource(
+            service=self.vocabulary_types_service,
+            config=VocabularyTypeResourceConfig,
+        )
 def finalize_app(app):
@@ -153,7 +167,8 @@ def init(app):
     sregistry.register(ext.funders_service, service_id="funders")
     sregistry.register(ext.names_service, service_id="names")
     sregistry.register(ext.subjects_service, service_id="subjects")
-    sregistry.register(ext.service, service_id="vocabularies")
+    sregistry.register(ext.vocabularies_service, service_id="vocabularies")
+    sregistry.register(ext.vocabulary_types_service, service_id="vocabulary-types")
     # Register indexers
     iregistry = app.extensions["invenio-indexer"].registry
     iregistry.register(ext.affiliations_service.indexer, indexer_id="affiliations")
@@ -161,4 +176,4 @@ def init(app):
     iregistry.register(ext.funders_service.indexer, indexer_id="funders")
     iregistry.register(ext.names_service.indexer, indexer_id="names")
     iregistry.register(ext.subjects_service.indexer, indexer_id="subjects")
-    iregistry.register(ext.service.indexer, indexer_id="vocabularies")
+    iregistry.register(ext.vocabularies_service.indexer, indexer_id="vocabularies")

invenio_vocabularies/factories.py CHANGED Viewed

@@ -12,6 +12,9 @@ from copy import deepcopy
 import yaml
 from invenio_records_resources.proxies import current_service_registry
+from .contrib.affiliations.datastreams import (
+    DATASTREAM_CONFIG as affiliations_ds_config,
+)
 from .contrib.awards.datastreams import DATASTREAM_CONFIG as awards_ds_config
 from .contrib.funders.datastreams import DATASTREAM_CONFIG as funders_ds_config
 from .contrib.names.datastreams import DATASTREAM_CONFIG as names_ds_config
@@ -68,11 +71,23 @@ class AwardsVocabularyConfig(VocabularyConfig):
         raise NotImplementedError("Service not implemented for Awards")
+class AffiliationsVocabularyConfig(VocabularyConfig):
+    """Affiliations Vocabulary Config."""
+    config = affiliations_ds_config
+    vocabulary_name = "affiliations"
+    def get_service(self):
+        """Get the service for the vocabulary."""
+        raise NotImplementedError("Service not implemented for Affiliations")
 def get_vocabulary_config(vocabulary):
     """Factory function to get the appropriate Vocabulary Config."""
     vocab_config = {
         "names": NamesVocabularyConfig,
         "funders": FundersVocabularyConfig,
         "awards": AwardsVocabularyConfig,
+        "affiliations": AffiliationsVocabularyConfig,
     }
     return vocab_config.get(vocabulary, VocabularyConfig)()

invenio_vocabularies/proxies.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 #
-# Copyright (C) 2021 CERN.
+# Copyright (C) 2021-2024 CERN.
 # Copyright (C) 2021 Northwestern University.
 #
 # Invenio-Vocabularies is free software; you can redistribute it and/or
@@ -19,7 +19,7 @@ def _ext_proxy(attr):
     )
-current_service = _ext_proxy("service")
+current_service = _ext_proxy("vocabularies_service")
 """Proxy to the instantiated vocabulary service."""

invenio_vocabularies/records/jsonschemas/vocabularies/definitions-v1.0.0.json CHANGED Viewed

@@ -10,5 +10,12 @@
   },
   "icon": {
     "type": "string"
+  },
+  "tags": {
+    "type": "array",
+    "description": "Tags for a vocabulary item.",
+    "items": {
+      "type": "string"
+    }
   }
 }

invenio-vocabularies 4.0.0__py2.py3-none-any.whl → 4.1.1__py2.py3-none-any.whl

Potentially problematic release.

invenio-vocabularies 4.0.0py2.py3-none-any.whl → 4.1.1py2.py3-none-any.whl