PyPI - invenio-vocabularies - Versions diffs - 6.6.0__py2.py3-none-any.whl → 6.8.0__py2.py3-none-any.whl - Mend

invenio-vocabularies 6.6.0py2.py3-none-any.whl → 6.8.0py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of invenio-vocabularies might be problematic. Click here for more details.

Files changed (46) hide show

invenio_vocabularies/__init__.py CHANGED Viewed

@@ -10,6 +10,6 @@
 from .ext import InvenioVocabularies
-__version__ = "6.6.0"
+__version__ = "6.8.0"
 __all__ = ("__version__", "InvenioVocabularies")

invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingModal.js CHANGED Viewed

@@ -52,37 +52,13 @@ const CustomFundingSchema = Yup.object().shape({
       id: Yup.string().required(i18next.t("Funder is required.")),
     }),
     award: Yup.object().shape({
-      title: Yup.string().test({
-        name: "testTitle",
-        message: i18next.t("Title must be set alongside number."),
-        test: function testTitle(value) {
-          const { number } = this.parent;
-          if (number && !value) {
-            return false;
-          }
-          return true;
-        },
-      }),
-      number: Yup.string().test({
-        name: "testNumber",
-        message: i18next.t("Number must be set alongside title."),
-        test: function testNumber(value) {
-          const { title } = this.parent;
-          if (title && !value) {
-            return false;
-          }
-          return true;
-        },
-      }),
+      title: Yup.string(),
+      number: Yup.string(),
       url: Yup.string()
         .url(i18next.t("URL must be valid."))
         .test({
           name: "validateUrlDependencies",
-          message: i18next.t("URL must be set alongside title and number."),
+          message: i18next.t("URL must be set alongside title or number."),
           test: function testUrl(value) {
             const { title, number } = this.parent;

invenio_vocabularies/cli.py CHANGED Viewed

@@ -29,6 +29,8 @@ def _process_vocab(config, num_samples=None):
         readers_config=config["readers"],
         transformers_config=config.get("transformers"),
         writers_config=config["writers"],
+        batch_size=config.get("batch_size", 1000),
+        write_many=config.get("write_many", False),
     )
     success, errored, filtered = 0, 0, 0

invenio_vocabularies/config.py CHANGED Viewed

@@ -10,6 +10,8 @@
 """Vocabularies configuration."""
+import re
 import idutils
 from invenio_i18n import lazy_gettext as _
@@ -19,6 +21,9 @@ from .datastreams.readers import (
     JsonLinesReader,
     JsonReader,
     OAIPMHReader,
+    RDFReader,
+    SimpleHTTPReader,
+    SPARQLReader,
     TarReader,
     XMLReader,
     YamlReader,
@@ -43,6 +48,8 @@ VOCABULARIES_IDENTIFIER_SCHEMES = {
 }
 """"Generic identifier schemes, usable by other vocabularies."""
+edmo_regexp = re.compile(r"^https://edmo\.seadatanet\.org/report/\d+$")
 def is_pic(val):
     """Test if argument is a Participant Identification Code (PIC)."""
@@ -51,9 +58,15 @@ def is_pic(val):
     return val.isdigit()
+def is_edmo(val):
+    """Test if argument is a European Directory of Marine Organisations (EDMO) identifier."""
+    return edmo_regexp.match(val)
 VOCABULARIES_AFFILIATION_SCHEMES = {
     **VOCABULARIES_IDENTIFIER_SCHEMES,
     "pic": {"label": _("PIC"), "validator": is_pic},
+    "edmo": {"label": _("EDMO"), "validator": is_edmo},
 }
 """Affiliations allowed identifier schemes."""
@@ -135,6 +148,9 @@ VOCABULARIES_DATASTREAM_READERS = {
     "jsonl": JsonLinesReader,
     "gzip": GzipReader,
     "tar": TarReader,
+    "http": SimpleHTTPReader,
+    "rdf": RDFReader,
+    "sparql": SPARQLReader,
     "yaml": YamlReader,
     "zip": ZipReader,
     "xml": XMLReader,
@@ -172,9 +188,22 @@ VOCABULARIES_TYPES_SEARCH = {
 }
 """Vocabulary type search configuration."""
-SUBJECTS_EUROSCIVOC_FILE_URL = "https://publications.europa.eu/resource/distribution/euroscivoc/rdf/skos_ap_eu/EuroSciVoc-skos-ap-eu.rdf"
+VOCABULARIES_SUBJECTS_EUROSCIVOC_FILE_URL = "https://publications.europa.eu/resource/distribution/euroscivoc/rdf/skos_ap_eu/EuroSciVoc-skos-ap-eu.rdf"
 """Subject EuroSciVoc file download link."""
+VOCABULARIES_SUBJECTS_GEMET_FILE_URL = (
+    "https://www.eionet.europa.eu/gemet/latest/gemet.rdf.gz"
+)
+"""Subject GEMET file download link."""
+VOCABULARIES_SUBJECTS_BODC_PUV_FILE_URL = "http://vocab.nerc.ac.uk/collection/P01/current/?_profile=nvs&_mediatype=application/rdf+xml"
+"""Subject BODC-PUV file download link."""
+VOCABULARIES_AFFILIATIONS_EDMO_COUNTRY_MAPPING = {
+    "Cape Verde": "Cabo Verde",
+}
+"""Affiliations EDMO Country name remapping dictionary."""
 VOCABULARIES_ORCID_ACCESS_KEY = "TODO"
 """ORCID access key to access the s3 bucket."""
 VOCABULARIES_ORCID_SECRET_KEY = "TODO"
@@ -187,3 +216,16 @@ VOCABULARIES_ORCID_SYNC_SINCE = {
     "days": 1,
 }
 """ORCID time shift to sync. Parameters accepted are the ones passed to 'datetime.timedelta'."""
+VOCABULARIES_ORCID_ORG_IDS_MAPPING_PATH = None
+"""Path to the CSV file for mapping ORCiD organization IDs to affiliation IDs.
+The path can be specified as either an absolute path or a relative path within the
+Flask app instance folder (i.e. ``current_app.instance_path``).
+The CSV file should have the following columns:
+- `org_scheme`: The ORCiD organization ID.
+- `org_id`: The ORCiD organization ID.
+- `aff_id`: The affiliation ID to map to.
+"""

invenio_vocabularies/contrib/affiliations/config.py CHANGED Viewed

@@ -13,7 +13,9 @@ from invenio_i18n import get_locale
 from invenio_i18n import lazy_gettext as _
 from invenio_records_resources.services import SearchOptions
 from invenio_records_resources.services.records.components import DataComponent
-from invenio_records_resources.services.records.params import SuggestQueryParser
+from invenio_records_resources.services.records.queryparser import (
+    CompositeSuggestQueryParser,
+)
 from werkzeug.local import LocalProxy
 from ...services.components import PIDComponent
@@ -21,23 +23,32 @@ from ...services.components import PIDComponent
 affiliation_schemes = LocalProxy(
     lambda: current_app.config["VOCABULARIES_AFFILIATION_SCHEMES"]
 )
-localized_title = LocalProxy(lambda: f"title.{get_locale()}^20")
+affiliation_edmo_country_mappings = LocalProxy(
+    lambda: current_app.config["VOCABULARIES_AFFILIATIONS_EDMO_COUNTRY_MAPPING"]
+)
+localized_title = LocalProxy(lambda: f"title.{get_locale()}^2")
 class AffiliationsSearchOptions(SearchOptions):
     """Search options."""
-    suggest_parser_cls = SuggestQueryParser.factory(
+    suggest_parser_cls = CompositeSuggestQueryParser.factory(
         fields=[
-            "name^100",
-            "acronym.keyword^100",
-            "acronym^40",
+            # We boost the acronym fields, since they're smaller words and are more
+            # likely to be used in a query.
+            "acronym.keyword^50",
+            "acronym^10",
+            "name^10",
+            # Aliases can sometimes be shorter, so we boost them a bit.
+            "aliases^5",
             localized_title,
-            "id^20",
-            "aliases^20",
+            "id^2",
+            # Allow to search identifiers directly (e.g. ROR)
+            "identifiers.identifier",
+            "country",
+            "country_name",
+            "types",
         ],
-        type="most_fields",  # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html#multi-match-types
-        fuzziness="AUTO",  # https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#fuzziness
     )
     sort_default = "bestmatch"

invenio_vocabularies/contrib/affiliations/datastreams.py CHANGED Viewed

@@ -11,12 +11,14 @@
 from copy import deepcopy
+import pycountry
 from flask import current_app
 from ...datastreams import StreamEntry
-from ...datastreams.errors import TransformerError, WriterError
+from ...datastreams.errors import TransformerError
 from ...datastreams.transformers import BaseTransformer
 from ...datastreams.writers import ServiceWriter
+from ..affiliations.config import affiliation_edmo_country_mappings
 from ..common.ror.datastreams import RORTransformer
@@ -119,6 +121,66 @@ class OpenAIREAffiliationsServiceWriter(ServiceWriter):
         return StreamEntry(self._service.update(self._identity, vocab_id, updated))
+class EDMOOrganizationTransformer(BaseTransformer):
+    """Transformer class to convert EDMO RDF data to a dictionary format."""
+    def apply(self, stream_entry, **kwargs):
+        """Applies the transformation to the stream entry."""
+        record = stream_entry.entry
+        edmo_uri = record["org"]["value"]
+        id_ = "edmo:" + edmo_uri.split("/")[-1]
+        name = record["name"]["value"]
+        alt_name = record.get("altName", {}).get("value")
+        country_name = record.get("countryName", {}).get("value")
+        locality = record.get("locality", {}).get("value")
+        deprecated = record["deprecated"]["value"]
+        # Fix organizations with the old country name "Cape Verde".
+        # "Cabo Verde" is the new official name also used by ROR, e.g. https://api.ror.org/v2/organizations/001fphc23
+        if country_name in affiliation_edmo_country_mappings:
+            country_name = affiliation_edmo_country_mappings[country_name]
+        # Logic to convert a country name to a 2 letters country code.
+        country = None
+        if country_name:
+            country_dict = pycountry.countries.get(name=country_name)
+            if country_dict:
+                country = country_dict.alpha_2
+            else:
+                raise TransformerError([f"No alpha_2 country found for: {record}"])
+        # Mandatory fields
+        organization = {
+            "id": id_,
+            "identifiers": [
+                {
+                    "scheme": "edmo",
+                    "identifier": edmo_uri,
+                }
+            ],
+            "name": name,
+            "title": {
+                "en": name,
+            },
+        }
+        # Optional fields
+        if alt_name:
+            organization["acronym"] = alt_name
+        if country_name:
+            organization["country_name"] = country_name
+        if country:
+            organization["country"] = country
+        if locality:
+            organization["location_name"] = locality
+        stream_entry.entry = organization
+        return stream_entry
 VOCABULARIES_DATASTREAM_READERS = {}
 """Affiliations datastream readers."""
@@ -131,6 +193,7 @@ VOCABULARIES_DATASTREAM_WRITERS = {
 VOCABULARIES_DATASTREAM_TRANSFORMERS = {
     "ror-affiliations": AffiliationsRORTransformer,
     "openaire-organization": OpenAIREOrganizationTransformer,
+    "edmo-organization": EDMOOrganizationTransformer,
 }
 """Affiliations datastream transformers."""
@@ -196,3 +259,42 @@ DATASTREAM_CONFIG_OPENAIRE = {
     ],
 }
 """Alternative Data Stream configuration for OpenAIRE Affiliations."""
+DATASTREAM_CONFIG_EDMO = {
+    "readers": [
+        {
+            "type": "sparql",
+            "args": {
+                "origin": "https://edmo.seadatanet.org/sparql/sparql",
+                "query": """
+                    SELECT ?org ?name ?altName ?countryName ?locality ?deprecated
+                    WHERE {
+                        ?org a <http://www.w3.org/ns/org#Organization> .
+                        ?org <http://www.w3.org/ns/org#name> ?name .
+                        OPTIONAL { ?org <http://www.w3.org/2004/02/skos/core#altName> ?altName } .
+                        OPTIONAL { ?org <http://www.w3.org/2006/vcard/ns#country-name> ?countryName } .
+                        OPTIONAL { ?org <http://www.w3.org/2006/vcard/ns#locality> ?locality } .
+                        OPTIONAL { ?org <http://www.w3.org/2002/07/owl#deprecated> ?deprecated } .
+                        FILTER (!?deprecated)
+                    }
+                    """,
+            },
+        }
+    ],
+    "transformers": [
+        {
+            "type": "edmo-organization",
+        },
+    ],
+    "writers": [
+        {
+            "type": "async",
+            "args": {
+                "writer": {
+                    "type": "affiliations-service",
+                }
+            },
+        },
+    ],
+}
+"""Alternative Data Stream configuration for EDMO Affiliations."""

invenio_vocabularies/contrib/awards/datastreams.py CHANGED Viewed

@@ -117,6 +117,13 @@ class OpenAIREProjectTransformer(BaseTransformer):
         if acronym:
             award["acronym"] = acronym
+        if "startDate" in record:
+            award["start_date"] = record["startDate"]
+        if "endDate" in record:
+            award["end_date"] = record["endDate"]
+        if "summary" in record:
+            award["description"] = {"en": record["summary"]}
         stream_entry.entry = award
         return stream_entry

invenio_vocabularies/contrib/awards/jsonschemas/awards/award-v1.0.0.json CHANGED Viewed

@@ -43,6 +43,15 @@
     "program": {
       "type": "string"
     },
+    "start_date": {
+      "type": "string"
+    },
+    "end_date": {
+      "type": "string"
+    },
+    "description": {
+      "$ref": "local://vocabularies/definitions-v1.0.0.json#/description"
+    },
     "subjects": {
       "description": "Award's subjects.",
       "type": "array",

invenio_vocabularies/contrib/awards/mappings/os-v1/awards/award-v1.0.0.json CHANGED Viewed

@@ -9,6 +9,15 @@
             "type": "search_as_you_type"
           }
         }
+      },
+      {
+        "i18n_description": {
+          "path_match": "description.*",
+          "match_mapping_type": "string",
+          "mapping": {
+            "type": "text"
+          }
+        }
       }
     ],
     "dynamic": "strict",
@@ -58,9 +67,21 @@
       "acronym": {
         "type": "keyword",
         "fields": {
-          "text": { "type": "text" }
+          "text": {
+            "type": "text"
+          }
         }
       },
+      "start_date": {
+        "type": "date"
+      },
+      "end_date": {
+        "type": "date"
+      },
+      "description": {
+        "type": "object",
+        "dynamic": "true"
+      },
       "program": {
         "type": "keyword"
       },

invenio_vocabularies/contrib/awards/mappings/os-v2/awards/award-v1.0.0.json CHANGED Viewed

@@ -9,6 +9,15 @@
             "type": "search_as_you_type"
           }
         }
+      },
+      {
+        "i18n_description": {
+          "path_match": "description.*",
+          "match_mapping_type": "string",
+          "mapping": {
+            "type": "text"
+          }
+        }
       }
     ],
     "dynamic": "strict",
@@ -58,9 +67,21 @@
       "acronym": {
         "type": "keyword",
         "fields": {
-          "text": { "type": "text" }
+          "text": {
+            "type": "text"
+          }
         }
       },
+      "start_date": {
+        "type": "date"
+      },
+      "end_date": {
+        "type": "date"
+      },
+      "description": {
+        "type": "object",
+        "dynamic": "true"
+      },
       "program": {
         "type": "keyword"
       },

invenio_vocabularies/contrib/awards/mappings/v7/awards/award-v1.0.0.json CHANGED Viewed

@@ -9,6 +9,15 @@
             "type": "search_as_you_type"
           }
         }
+      },
+      {
+        "i18n_description": {
+          "path_match": "description.*",
+          "match_mapping_type": "string",
+          "mapping": {
+            "type": "text"
+          }
+        }
       }
     ],
     "dynamic": "strict",
@@ -58,9 +67,21 @@
       "acronym": {
         "type": "keyword",
         "fields": {
-          "text": { "type": "text" }
+          "text": {
+            "type": "text"
+          }
         }
       },
+      "start_date": {
+        "type": "date"
+      },
+      "end_date": {
+        "type": "date"
+      },
+      "description": {
+        "type": "object",
+        "dynamic": "true"
+      },
       "program": {
         "type": "keyword"
       },

invenio_vocabularies/contrib/awards/schema.py CHANGED Viewed

@@ -12,7 +12,7 @@ from functools import partial
 from invenio_i18n import lazy_gettext as _
 from marshmallow import Schema, ValidationError, fields, validate, validates_schema
-from marshmallow_utils.fields import IdentifierSet, SanitizedUnicode
+from marshmallow_utils.fields import IdentifierSet, ISODateString, SanitizedUnicode
 from marshmallow_utils.schemas import IdentifierSchema
 from ...services.schema import (
@@ -61,6 +61,10 @@ class AwardSchema(BaseVocabularySchema, ModePIDFieldVocabularyMixin):
     organizations = fields.List(fields.Nested(AwardOrganizationRelationSchema))
+    start_date = ISODateString()
+    end_date = ISODateString()
     id = SanitizedUnicode(
         validate=validate.Length(min=1, error=_("PID cannot be blank."))
     )
@@ -90,9 +94,11 @@ class AwardRelationSchema(Schema):
         id_ = data.get("id")
         number = data.get("number")
         title = data.get("title")
-        if not id_ and not (number and title):
+        if not id_ and not (number or title):
             raise ValidationError(
-                _("An existing id or number/title must be present."), "award"
+                _("An existing id or either number or title must be present."),
+                "award",
             )

invenio_vocabularies/contrib/funders/config.py CHANGED Viewed

@@ -13,7 +13,9 @@ from invenio_i18n import get_locale
 from invenio_i18n import lazy_gettext as _
 from invenio_records_resources.services import SearchOptions
 from invenio_records_resources.services.records.components import DataComponent
-from invenio_records_resources.services.records.params import SuggestQueryParser
+from invenio_records_resources.services.records.queryparser import (
+    CompositeSuggestQueryParser,
+)
 from werkzeug.local import LocalProxy
 from ...services.components import ModelPIDComponent
@@ -23,24 +25,29 @@ funder_schemes = LocalProxy(lambda: current_app.config["VOCABULARIES_FUNDER_SCHE
 funder_fundref_doi_prefix = LocalProxy(
     lambda: current_app.config["VOCABULARIES_FUNDER_DOI_PREFIX"]
 )
-localized_title = LocalProxy(lambda: f"title.{get_locale()}^20")
+localized_title = LocalProxy(lambda: f"title.{get_locale()}^2")
 class FundersSearchOptions(SearchOptions):
     """Search options."""
-    suggest_parser_cls = SuggestQueryParser.factory(
+    suggest_parser_cls = CompositeSuggestQueryParser.factory(
         fields=[
-            "name^100",
-            "acronym.keyword^100",
-            "acronym^40",
+            # We boost the acronym fields, since they're smaller words and are more
+            # likely to be used in a query.
+            "acronym.keyword^50",
+            "acronym^10",
+            "name^10",
+            # Aliases can sometimes be shorter, so we boost them a bit.
+            "aliases^5",
             localized_title,
-            "id^20",
-            "aliases^20",
-            "identifiers.identifier^10",
-        ],
-        type="most_fields",  # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html#multi-match-types
-        fuzziness="AUTO",  # https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#fuzziness
+            "id^2",
+            # Allow to search identifiers directly (e.g. ROR)
+            "identifiers.identifier",
+            "country",
+            "country_name",
+            "types",
+        ]
     )
     sort_default = "bestmatch"

invenio_vocabularies/contrib/names/config.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 #
-# Copyright (C) 2021 CERN.
+# Copyright (C) 2021-2024 CERN.
 #
 # Invenio-Vocabularies is free software; you can redistribute it and/or
 # modify it under the terms of the MIT License; see LICENSE file for more
@@ -15,7 +15,9 @@ from invenio_records_resources.services.records.components import (
     DataComponent,
     RelationsComponent,
 )
-from invenio_records_resources.services.records.params import SuggestQueryParser
+from invenio_records_resources.services.records.queryparser import (
+    CompositeSuggestQueryParser,
+)
 from werkzeug.local import LocalProxy
 from ...services.components import PIDComponent
@@ -26,16 +28,17 @@ names_schemes = LocalProxy(lambda: current_app.config["VOCABULARIES_NAMES_SCHEME
 class NamesSearchOptions(SearchOptions):
     """Search options."""
-    suggest_parser_cls = SuggestQueryParser.factory(
+    suggest_parser_cls = CompositeSuggestQueryParser.factory(
         fields=[
-            "given_name^100",
-            "name^70",
-            "family_name^50",
-            "identifiers.identifier^20",
-            "affiliations.name^20",
+            "name^5",
+            # We boost the affiliation acronym fields, since they're short and more
+            # likely to be used in a query.
+            "affiliations.acronym.keyword^3",
+            "affiliations.acronym",
+            "affiliations.name",
+            # Allow to search identifiers directly (e.g. ORCID)
+            "identifiers.identifier",
         ],
-        type="most_fields",  # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html#multi-match-types
-        fuzziness="AUTO",
     )
     sort_default = "bestmatch"

invenio-vocabularies 6.6.0__py2.py3-none-any.whl → 6.8.0__py2.py3-none-any.whl

Potentially problematic release.

invenio-vocabularies 6.6.0py2.py3-none-any.whl → 6.8.0py2.py3-none-any.whl