invenio-vocabularies 2.3.1__py2.py3-none-any.whl → 6.3.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of invenio-vocabularies might be problematic. Click here for more details.
- invenio_vocabularies/__init__.py +2 -2
- invenio_vocabularies/administration/__init__.py +10 -0
- invenio_vocabularies/administration/views/__init__.py +10 -0
- invenio_vocabularies/administration/views/vocabularies.py +45 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/package.json +1 -7
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/CustomAwardForm.js +80 -64
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingField.js +49 -41
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingModal.js +5 -7
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/NoAwardResults.js +3 -3
- invenio_vocabularies/cli.py +31 -44
- invenio_vocabularies/config.py +68 -4
- invenio_vocabularies/contrib/affiliations/affiliations.py +11 -0
- invenio_vocabularies/contrib/affiliations/api.py +1 -2
- invenio_vocabularies/contrib/affiliations/config.py +13 -2
- invenio_vocabularies/contrib/affiliations/datastreams.py +186 -0
- invenio_vocabularies/contrib/affiliations/jsonschemas/affiliations/affiliation-v1.0.0.json +38 -1
- invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v1.0.0.json +22 -1
- invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v2.0.0.json +171 -0
- invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v1.0.0.json +22 -1
- invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v2.0.0.json +171 -0
- invenio_vocabularies/contrib/affiliations/mappings/v7/affiliations/affiliation-v1.0.0.json +22 -1
- invenio_vocabularies/contrib/affiliations/schema.py +17 -3
- invenio_vocabularies/contrib/affiliations/services.py +1 -2
- invenio_vocabularies/contrib/awards/awards.py +17 -5
- invenio_vocabularies/contrib/awards/datastreams.py +241 -7
- invenio_vocabularies/contrib/awards/jsonschemas/awards/award-v1.0.0.json +38 -0
- invenio_vocabularies/contrib/awards/mappings/os-v1/awards/award-v1.0.0.json +51 -2
- invenio_vocabularies/contrib/awards/mappings/os-v2/awards/award-v1.0.0.json +51 -2
- invenio_vocabularies/contrib/awards/mappings/v7/awards/award-v1.0.0.json +51 -2
- invenio_vocabularies/contrib/awards/schema.py +16 -1
- invenio_vocabularies/contrib/awards/serializer.py +8 -1
- invenio_vocabularies/contrib/awards/services.py +1 -2
- invenio_vocabularies/contrib/common/__init__.py +9 -0
- invenio_vocabularies/contrib/common/openaire/__init__.py +9 -0
- invenio_vocabularies/contrib/common/openaire/datastreams.py +84 -0
- invenio_vocabularies/contrib/common/ror/__init__.py +9 -0
- invenio_vocabularies/contrib/common/ror/datastreams.py +220 -0
- invenio_vocabularies/contrib/funders/config.py +11 -2
- invenio_vocabularies/contrib/funders/datastreams.py +40 -62
- invenio_vocabularies/contrib/funders/funders.py +3 -1
- invenio_vocabularies/contrib/funders/jsonschemas/funders/funder-v1.0.0.json +36 -1
- invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v1.0.0.json +22 -1
- invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v2.0.0.json +156 -0
- invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v1.0.0.json +22 -1
- invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v2.0.0.json +156 -0
- invenio_vocabularies/contrib/funders/mappings/v7/funders/funder-v1.0.0.json +22 -1
- invenio_vocabularies/contrib/funders/schema.py +8 -0
- invenio_vocabularies/contrib/funders/serializer.py +2 -1
- invenio_vocabularies/contrib/names/config.py +5 -3
- invenio_vocabularies/contrib/names/datastreams.py +172 -4
- invenio_vocabularies/contrib/names/jsonschemas/names/name-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v2.0.0.json +150 -0
- invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v2.0.0.json +150 -0
- invenio_vocabularies/contrib/names/mappings/v7/names/name-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/names/names.py +15 -3
- invenio_vocabularies/contrib/names/permissions.py +20 -0
- invenio_vocabularies/contrib/names/s3client.py +44 -0
- invenio_vocabularies/contrib/names/schema.py +14 -0
- invenio_vocabularies/contrib/subjects/config.py +9 -3
- invenio_vocabularies/contrib/subjects/datastreams.py +61 -0
- invenio_vocabularies/contrib/subjects/euroscivoc/__init__.py +9 -0
- invenio_vocabularies/contrib/subjects/euroscivoc/datastreams.py +171 -0
- invenio_vocabularies/contrib/subjects/jsonschemas/subjects/subject-v1.0.0.json +31 -0
- invenio_vocabularies/contrib/subjects/mappings/os-v1/subjects/subject-v1.0.0.json +35 -0
- invenio_vocabularies/contrib/subjects/mappings/os-v2/subjects/subject-v1.0.0.json +35 -0
- invenio_vocabularies/contrib/subjects/mappings/v7/subjects/subject-v1.0.0.json +35 -0
- invenio_vocabularies/contrib/subjects/mesh/__init__.py +9 -0
- invenio_vocabularies/contrib/subjects/mesh/datastreams.py +43 -0
- invenio_vocabularies/contrib/subjects/schema.py +47 -5
- invenio_vocabularies/contrib/subjects/subjects.py +10 -0
- invenio_vocabularies/datastreams/datastreams.py +61 -13
- invenio_vocabularies/datastreams/factories.py +1 -2
- invenio_vocabularies/datastreams/readers.py +138 -29
- invenio_vocabularies/datastreams/tasks.py +37 -0
- invenio_vocabularies/datastreams/transformers.py +17 -27
- invenio_vocabularies/datastreams/writers.py +116 -14
- invenio_vocabularies/datastreams/xml.py +34 -0
- invenio_vocabularies/ext.py +59 -5
- invenio_vocabularies/factories.py +137 -0
- invenio_vocabularies/jobs.py +133 -0
- invenio_vocabularies/proxies.py +2 -2
- invenio_vocabularies/records/jsonschemas/vocabularies/definitions-v1.0.0.json +7 -0
- invenio_vocabularies/records/jsonschemas/vocabularies/vocabulary-v1.0.0.json +1 -4
- invenio_vocabularies/records/mappings/os-v1/vocabularies/vocabulary-v1.0.0.json +3 -3
- invenio_vocabularies/records/mappings/os-v2/vocabularies/vocabulary-v1.0.0.json +3 -3
- invenio_vocabularies/records/mappings/v7/vocabularies/vocabulary-v1.0.0.json +3 -3
- invenio_vocabularies/records/models.py +2 -4
- invenio_vocabularies/records/pidprovider.py +1 -2
- invenio_vocabularies/records/systemfields/relations.py +2 -2
- invenio_vocabularies/resources/__init__.py +9 -1
- invenio_vocabularies/resources/config.py +105 -0
- invenio_vocabularies/resources/resource.py +31 -41
- invenio_vocabularies/resources/schema.py +2 -1
- invenio_vocabularies/services/__init__.py +5 -2
- invenio_vocabularies/services/config.py +179 -0
- invenio_vocabularies/services/custom_fields/__init__.py +6 -2
- invenio_vocabularies/services/custom_fields/subject.py +82 -0
- invenio_vocabularies/services/custom_fields/vocabulary.py +5 -3
- invenio_vocabularies/services/permissions.py +3 -1
- invenio_vocabularies/services/results.py +110 -0
- invenio_vocabularies/services/schema.py +11 -2
- invenio_vocabularies/services/service.py +46 -94
- invenio_vocabularies/services/tasks.py +1 -1
- invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/subjects.html +23 -0
- invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabularies-list.html +12 -0
- invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabulary-details.html +71 -0
- invenio_vocabularies/translations/af/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ar/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/bg/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ca/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/cs/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/da/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/de/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/de_AT/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/de_DE/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/el/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/en/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/en_AT/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/en_HU/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/es/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/es_CU/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/es_MX/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/et/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/et_EE/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/fa/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/fa_IR/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/fr/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/fr_CI/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/fr_FR/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/gl/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/hi_IN/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/hr/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/hu/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/hu_HU/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/it/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ja/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ka/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/lt/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/messages.pot +95 -48
- invenio_vocabularies/translations/ne/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/no/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/pl/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/pt/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ro/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ru/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/rw/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/sk/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/sv/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/sv_SE/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/tr/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/uk/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/uk_UA/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/zh_CN/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/zh_TW/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/views.py +12 -26
- invenio_vocabularies/webpack.py +3 -3
- {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/METADATA +150 -6
- {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/RECORD +165 -132
- {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/WHEEL +1 -1
- {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/entry_points.txt +17 -0
- {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/AUTHORS.rst +0 -0
- {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/LICENSE +0 -0
- {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
#
|
|
3
|
+
# This file is part of Invenio.
|
|
4
|
+
# Copyright (C) 2024 CERN.
|
|
5
|
+
#
|
|
6
|
+
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
8
|
+
# details.
|
|
9
|
+
|
|
10
|
+
"""S3 client."""
|
|
11
|
+
|
|
12
|
+
from flask import current_app
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
import s3fs
|
|
16
|
+
except ImportError:
|
|
17
|
+
s3fs = None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class S3Client:
|
|
21
|
+
"""S3 client."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, access_key, secret_key):
|
|
24
|
+
"""Constructor."""
|
|
25
|
+
if s3fs is None:
|
|
26
|
+
raise Exception("s3fs is not installed.")
|
|
27
|
+
|
|
28
|
+
self.fs = s3fs.S3FileSystem(key=access_key, secret=secret_key)
|
|
29
|
+
|
|
30
|
+
def read_file(self, s3_path):
|
|
31
|
+
"""Reads a file from S3."""
|
|
32
|
+
with self.fs.open(s3_path, "rb") as f:
|
|
33
|
+
return f.read()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class S3OrcidClient(S3Client):
|
|
37
|
+
"""S3 ORCiD client."""
|
|
38
|
+
|
|
39
|
+
def __init__(self):
|
|
40
|
+
"""Constructor."""
|
|
41
|
+
super().__init__(
|
|
42
|
+
access_key=current_app.config["VOCABULARIES_ORCID_ACCESS_KEY"],
|
|
43
|
+
secret_key=current_app.config["VOCABULARIES_ORCID_SECRET_KEY"],
|
|
44
|
+
)
|
|
@@ -56,6 +56,20 @@ class NameSchema(BaseVocabularySchema, ModePIDFieldVocabularyMixin):
|
|
|
56
56
|
]
|
|
57
57
|
raise ValidationError({"family_name": messages})
|
|
58
58
|
|
|
59
|
+
@validates_schema
|
|
60
|
+
def validate_affiliatons(self, data, **kwargs):
|
|
61
|
+
"""Validate names."""
|
|
62
|
+
affiliations = data.get("affiliations", [])
|
|
63
|
+
seen_names = set()
|
|
64
|
+
for affiliation in affiliations:
|
|
65
|
+
name = affiliation.get("name")
|
|
66
|
+
if not affiliation.get("id") and name:
|
|
67
|
+
if name in seen_names:
|
|
68
|
+
messages = [_("Duplicated affiliations.")]
|
|
69
|
+
raise ValidationError({"affiliations": messages})
|
|
70
|
+
else:
|
|
71
|
+
seen_names.add(name)
|
|
72
|
+
|
|
59
73
|
@post_load
|
|
60
74
|
def update_name(self, data, **kwargs):
|
|
61
75
|
"""Update names for person.
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
#
|
|
3
3
|
# Copyright (C) 2021 CERN.
|
|
4
4
|
# Copyright (C) 2021 Northwestern University.
|
|
5
|
+
# Copyright (C) 2024 University of Münster.
|
|
5
6
|
#
|
|
6
7
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
7
8
|
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
@@ -9,13 +10,19 @@
|
|
|
9
10
|
|
|
10
11
|
"""Subjects configuration."""
|
|
11
12
|
|
|
13
|
+
from flask import current_app
|
|
12
14
|
from invenio_i18n import lazy_gettext as _
|
|
13
15
|
from invenio_records_resources.services import SearchOptions
|
|
14
16
|
from invenio_records_resources.services.records.components import DataComponent
|
|
17
|
+
from werkzeug.local import LocalProxy
|
|
15
18
|
|
|
16
19
|
from ...services.components import PIDComponent
|
|
17
20
|
from ...services.querystr import FilteredSuggestQueryParser
|
|
18
21
|
|
|
22
|
+
subject_schemes = LocalProxy(
|
|
23
|
+
lambda: current_app.config["VOCABULARIES_SUBJECTS_SCHEMES"]
|
|
24
|
+
)
|
|
25
|
+
|
|
19
26
|
|
|
20
27
|
class SubjectsSearchOptions(SearchOptions):
|
|
21
28
|
"""Search options."""
|
|
@@ -23,9 +30,8 @@ class SubjectsSearchOptions(SearchOptions):
|
|
|
23
30
|
suggest_parser_cls = FilteredSuggestQueryParser.factory(
|
|
24
31
|
filter_field="scheme",
|
|
25
32
|
fields=[ # suggest fields
|
|
26
|
-
"
|
|
27
|
-
"
|
|
28
|
-
"subject._3gram",
|
|
33
|
+
"title.*^100",
|
|
34
|
+
"synonyms^20",
|
|
29
35
|
],
|
|
30
36
|
)
|
|
31
37
|
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
#
|
|
3
|
+
# Copyright (C) 2024 University of Münster.
|
|
4
|
+
#
|
|
5
|
+
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
|
+
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
7
|
+
# details.
|
|
8
|
+
|
|
9
|
+
"""Names datastreams, transformers, writers and readers."""
|
|
10
|
+
|
|
11
|
+
from invenio_access.permissions import system_identity
|
|
12
|
+
from invenio_i18n import lazy_gettext as _
|
|
13
|
+
|
|
14
|
+
from ...datastreams.writers import ServiceWriter
|
|
15
|
+
from .euroscivoc import datastreams as euroscivoc_datastreams
|
|
16
|
+
from .mesh import datastreams as mesh_datastreams
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SubjectsServiceWriter(ServiceWriter):
|
|
20
|
+
"""Subjects Service Writer."""
|
|
21
|
+
|
|
22
|
+
def __init__(self, *args, **kwargs):
|
|
23
|
+
"""Constructor."""
|
|
24
|
+
service_or_name = kwargs.pop("service_or_name", "subjects")
|
|
25
|
+
super().__init__(service_or_name=service_or_name, *args, **kwargs)
|
|
26
|
+
|
|
27
|
+
def _entry_id(self, entry):
|
|
28
|
+
"""Get the id from an entry."""
|
|
29
|
+
return entry["id"]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
VOCABULARIES_DATASTREAM_READERS = {
|
|
33
|
+
**mesh_datastreams.VOCABULARIES_DATASTREAM_READERS,
|
|
34
|
+
**euroscivoc_datastreams.VOCABULARIES_DATASTREAM_READERS,
|
|
35
|
+
}
|
|
36
|
+
"""Subjects Data Streams readers."""
|
|
37
|
+
|
|
38
|
+
VOCABULARIES_DATASTREAM_TRANSFORMERS = {
|
|
39
|
+
**mesh_datastreams.VOCABULARIES_DATASTREAM_TRANSFORMERS,
|
|
40
|
+
**euroscivoc_datastreams.VOCABULARIES_DATASTREAM_TRANSFORMERS,
|
|
41
|
+
}
|
|
42
|
+
"""Subjects Data Streams transformers."""
|
|
43
|
+
|
|
44
|
+
VOCABULARIES_DATASTREAM_WRITERS = {
|
|
45
|
+
"subjects-service": SubjectsServiceWriter,
|
|
46
|
+
**mesh_datastreams.VOCABULARIES_DATASTREAM_WRITERS,
|
|
47
|
+
**euroscivoc_datastreams.VOCABULARIES_DATASTREAM_WRITERS,
|
|
48
|
+
}
|
|
49
|
+
"""Subjects Data Streams writers."""
|
|
50
|
+
|
|
51
|
+
DATASTREAM_CONFIG = {
|
|
52
|
+
"readers": [
|
|
53
|
+
{"type": "yaml"},
|
|
54
|
+
],
|
|
55
|
+
"writers": [
|
|
56
|
+
{
|
|
57
|
+
"type": "subjects-service",
|
|
58
|
+
}
|
|
59
|
+
],
|
|
60
|
+
}
|
|
61
|
+
"""Data Stream configuration."""
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
#
|
|
3
|
+
# Copyright (C) 2022-2024 CERN.
|
|
4
|
+
#
|
|
5
|
+
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
|
+
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
7
|
+
# details.
|
|
8
|
+
|
|
9
|
+
"""EuroSciVoc subjects datastreams, readers, transformers, and writers."""
|
|
10
|
+
|
|
11
|
+
import io
|
|
12
|
+
from collections import namedtuple
|
|
13
|
+
|
|
14
|
+
import requests
|
|
15
|
+
from rdflib import OWL, RDF, Graph, Namespace
|
|
16
|
+
|
|
17
|
+
from invenio_vocabularies.config import SUBJECTS_EUROSCIVOC_FILE_URL
|
|
18
|
+
from invenio_vocabularies.datastreams.readers import BaseReader
|
|
19
|
+
from invenio_vocabularies.datastreams.transformers import BaseTransformer
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class EuroSciVocSubjectsHTTPReader(BaseReader):
|
|
23
|
+
"""Reader class to fetch and process EuroSciVoc RDF data."""
|
|
24
|
+
|
|
25
|
+
def __init__(self, origin=None, mode="r", since=None, *args, **kwargs):
|
|
26
|
+
"""Initialize the reader with the data source.
|
|
27
|
+
|
|
28
|
+
:param origin: The URL from which to fetch the RDF data.
|
|
29
|
+
:param mode: Mode of operation (default is 'r' for reading).
|
|
30
|
+
"""
|
|
31
|
+
self.origin = origin or SUBJECTS_EUROSCIVOC_FILE_URL
|
|
32
|
+
super().__init__(origin=origin, mode=mode, *args, **kwargs)
|
|
33
|
+
|
|
34
|
+
def _iter(self, rdf_graph):
|
|
35
|
+
"""Iterate over the RDF graph, yielding one subject at a time.
|
|
36
|
+
|
|
37
|
+
:param rdf_graph: The RDF graph to process.
|
|
38
|
+
:yield: Subject and graph to be transformed.
|
|
39
|
+
"""
|
|
40
|
+
SKOS_CORE = Namespace("http://www.w3.org/2004/02/skos/core#")
|
|
41
|
+
|
|
42
|
+
for subject, _, _ in rdf_graph.triples((None, RDF.type, SKOS_CORE.Concept)):
|
|
43
|
+
yield {"subject": subject, "rdf_graph": rdf_graph}
|
|
44
|
+
|
|
45
|
+
def read(self, item=None, *args, **kwargs):
|
|
46
|
+
"""Fetch and process the EuroSciVoc RDF data, yielding it one subject at a time.
|
|
47
|
+
|
|
48
|
+
:param item: The RDF data provided as bytes (optional).
|
|
49
|
+
:yield: Processed EuroSciVoc subject data.
|
|
50
|
+
"""
|
|
51
|
+
if item:
|
|
52
|
+
raise NotImplementedError(
|
|
53
|
+
"EuroSciVocSubjectsHTTPReader does not support being chained after another reader"
|
|
54
|
+
)
|
|
55
|
+
# Fetch the RDF data from the specified origin URL
|
|
56
|
+
response = requests.get(self.origin)
|
|
57
|
+
response.raise_for_status()
|
|
58
|
+
|
|
59
|
+
# Treat the response content as a file-like object
|
|
60
|
+
rdf_data = io.BytesIO(response.content)
|
|
61
|
+
|
|
62
|
+
# Parse the RDF data into a graph
|
|
63
|
+
rdf_graph = Graph()
|
|
64
|
+
rdf_graph.parse(rdf_data, format="xml")
|
|
65
|
+
|
|
66
|
+
# Yield each processed subject from the RDF graph
|
|
67
|
+
yield from self._iter(rdf_graph)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class EuroSciVocSubjectsTransformer(BaseTransformer):
|
|
71
|
+
"""Transformer class to convert EuroSciVoc RDF data to a dictionary format."""
|
|
72
|
+
|
|
73
|
+
SKOS_CORE = Namespace("http://www.w3.org/2004/02/skos/core#")
|
|
74
|
+
SPLITCHAR = ","
|
|
75
|
+
|
|
76
|
+
def _get_notation(self, subject, rdf_graph):
|
|
77
|
+
"""Extract the numeric notation for a subject."""
|
|
78
|
+
for _, _, notation in rdf_graph.triples(
|
|
79
|
+
(subject, self.SKOS_CORE.notation, None)
|
|
80
|
+
):
|
|
81
|
+
if str(notation).isdigit():
|
|
82
|
+
return str(notation)
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
def _get_labels(self, subject, rdf_graph):
|
|
86
|
+
"""Extract prefLabel and altLabel languages for a subject."""
|
|
87
|
+
labels = {
|
|
88
|
+
label.language: label.value.capitalize()
|
|
89
|
+
for _, _, label in rdf_graph.triples(
|
|
90
|
+
(subject, self.SKOS_CORE.prefLabel, None)
|
|
91
|
+
)
|
|
92
|
+
}
|
|
93
|
+
if "en" not in labels:
|
|
94
|
+
for _, _, label in rdf_graph.triples(
|
|
95
|
+
(subject, self.SKOS_CORE.altLabel, None)
|
|
96
|
+
):
|
|
97
|
+
labels.setdefault(label.language, label.value.capitalize())
|
|
98
|
+
return labels
|
|
99
|
+
|
|
100
|
+
def _find_parents(self, subject, rdf_graph):
|
|
101
|
+
"""Find parent notations."""
|
|
102
|
+
parents = []
|
|
103
|
+
|
|
104
|
+
# Traverse the broader hierarchy
|
|
105
|
+
for broader in rdf_graph.transitive_objects(subject, self.SKOS_CORE.broader):
|
|
106
|
+
if broader != subject: # Ensure we don't include the current subject
|
|
107
|
+
parent_notation = self._get_notation(broader, rdf_graph)
|
|
108
|
+
if parent_notation:
|
|
109
|
+
parents.append(parent_notation)
|
|
110
|
+
|
|
111
|
+
return parents
|
|
112
|
+
|
|
113
|
+
def _transform_entry(self, subject, rdf_graph):
|
|
114
|
+
"""Transform an entry to the required dictionary format."""
|
|
115
|
+
# Get subject notation with euroscivoc prefix
|
|
116
|
+
notation = self._get_notation(subject, rdf_graph)
|
|
117
|
+
id = f"euroscivoc:{notation}" if notation else None
|
|
118
|
+
# Get labels for the current subject
|
|
119
|
+
labels = self._get_labels(subject, rdf_graph)
|
|
120
|
+
# Join parent notations with SPLITCHAR separator and add euroscivoc prefix
|
|
121
|
+
parents = self.SPLITCHAR.join(
|
|
122
|
+
f"euroscivoc:{n}" for n in reversed(self._find_parents(subject, rdf_graph))
|
|
123
|
+
)
|
|
124
|
+
# Create identifiers list
|
|
125
|
+
identifiers = [{"scheme": "url", "identifier": str(subject)}]
|
|
126
|
+
|
|
127
|
+
return {
|
|
128
|
+
"id": id,
|
|
129
|
+
"scheme": "EuroSciVoc",
|
|
130
|
+
"subject": labels.get("en", "").capitalize(),
|
|
131
|
+
"title": labels,
|
|
132
|
+
"props": {"parents": parents} if parents else {},
|
|
133
|
+
"identifiers": identifiers,
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
def apply(self, stream_entry, *args, **kwargs):
|
|
137
|
+
"""Transform a stream entry to the required dictionary format.
|
|
138
|
+
|
|
139
|
+
:param stream_entry: The entry to be transformed, which includes the subject and the RDF graph.
|
|
140
|
+
:return: The transformed stream entry.
|
|
141
|
+
"""
|
|
142
|
+
# Apply transformations
|
|
143
|
+
entry_data = self._transform_entry(
|
|
144
|
+
stream_entry.entry["subject"], stream_entry.entry["rdf_graph"]
|
|
145
|
+
)
|
|
146
|
+
stream_entry.entry = entry_data
|
|
147
|
+
return stream_entry
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
# Configuration for datastream readers, transformers, and writers
|
|
151
|
+
VOCABULARIES_DATASTREAM_READERS = {"euroscivoc-reader": EuroSciVocSubjectsHTTPReader}
|
|
152
|
+
|
|
153
|
+
VOCABULARIES_DATASTREAM_WRITERS = {}
|
|
154
|
+
|
|
155
|
+
VOCABULARIES_DATASTREAM_TRANSFORMERS = {
|
|
156
|
+
"euroscivoc-transformer": EuroSciVocSubjectsTransformer
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
DATASTREAM_CONFIG = {
|
|
160
|
+
"readers": [
|
|
161
|
+
{
|
|
162
|
+
"type": "euroscivoc-reader",
|
|
163
|
+
}
|
|
164
|
+
],
|
|
165
|
+
"transformers": [{"type": "euroscivoc-transformer"}],
|
|
166
|
+
"writers": [
|
|
167
|
+
{
|
|
168
|
+
"type": "subjects-service",
|
|
169
|
+
}
|
|
170
|
+
],
|
|
171
|
+
}
|
|
@@ -8,6 +8,9 @@
|
|
|
8
8
|
"$schema": {
|
|
9
9
|
"$ref": "local://definitions-v1.0.0.json#/$schema"
|
|
10
10
|
},
|
|
11
|
+
"tags": {
|
|
12
|
+
"$ref": "local://vocabularies/definitions-v1.0.0.json#/tags"
|
|
13
|
+
},
|
|
11
14
|
"id": {
|
|
12
15
|
"description": "URI or classification code as identifier - globally unique among all subject schemes.",
|
|
13
16
|
"$ref": "local://definitions-v1.0.0.json#/identifier"
|
|
@@ -22,6 +25,34 @@
|
|
|
22
25
|
"subject": {
|
|
23
26
|
"description": "Human readable label.",
|
|
24
27
|
"type": "string"
|
|
28
|
+
},
|
|
29
|
+
"title": {
|
|
30
|
+
"description": "Human readable label in different languages.",
|
|
31
|
+
"$ref": "local://vocabularies/definitions-v1.0.0.json#/title"
|
|
32
|
+
},
|
|
33
|
+
"props": {
|
|
34
|
+
"type": "object",
|
|
35
|
+
"patternProperties": {
|
|
36
|
+
"^.*$": {
|
|
37
|
+
"type": "string"
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
},
|
|
41
|
+
"identifiers": {
|
|
42
|
+
"description": "Alternate identifiers for the subject.",
|
|
43
|
+
"type": "array",
|
|
44
|
+
"items": {
|
|
45
|
+
"$ref": "local://definitions-v2.0.0.json#/identifiers_with_scheme"
|
|
46
|
+
},
|
|
47
|
+
"uniqueItems": true
|
|
48
|
+
},
|
|
49
|
+
"synonyms": {
|
|
50
|
+
"description": "Synonyms of the subject label.",
|
|
51
|
+
"type": "array",
|
|
52
|
+
"items": {
|
|
53
|
+
"type": "string"
|
|
54
|
+
},
|
|
55
|
+
"uniqueItems": true
|
|
25
56
|
}
|
|
26
57
|
}
|
|
27
58
|
}
|
|
@@ -1,5 +1,16 @@
|
|
|
1
1
|
{
|
|
2
2
|
"mappings": {
|
|
3
|
+
"dynamic_templates": [
|
|
4
|
+
{
|
|
5
|
+
"i18n_title": {
|
|
6
|
+
"path_match": "title.*",
|
|
7
|
+
"match_mapping_type": "string",
|
|
8
|
+
"mapping": {
|
|
9
|
+
"type": "search_as_you_type"
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
],
|
|
3
14
|
"dynamic": "strict",
|
|
4
15
|
"properties": {
|
|
5
16
|
"$schema": {
|
|
@@ -55,6 +66,30 @@
|
|
|
55
66
|
"type": "keyword"
|
|
56
67
|
}
|
|
57
68
|
}
|
|
69
|
+
},
|
|
70
|
+
"title": {
|
|
71
|
+
"type": "object",
|
|
72
|
+
"dynamic": "true"
|
|
73
|
+
},
|
|
74
|
+
"props": {
|
|
75
|
+
"type": "object",
|
|
76
|
+
"dynamic": "true"
|
|
77
|
+
},
|
|
78
|
+
"identifiers": {
|
|
79
|
+
"properties": {
|
|
80
|
+
"identifier": {
|
|
81
|
+
"type": "keyword"
|
|
82
|
+
},
|
|
83
|
+
"scheme": {
|
|
84
|
+
"type": "keyword"
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
},
|
|
88
|
+
"synonyms": {
|
|
89
|
+
"type": "text"
|
|
90
|
+
},
|
|
91
|
+
"tags": {
|
|
92
|
+
"type": "keyword"
|
|
58
93
|
}
|
|
59
94
|
}
|
|
60
95
|
}
|
|
@@ -1,5 +1,16 @@
|
|
|
1
1
|
{
|
|
2
2
|
"mappings": {
|
|
3
|
+
"dynamic_templates": [
|
|
4
|
+
{
|
|
5
|
+
"i18n_title": {
|
|
6
|
+
"path_match": "title.*",
|
|
7
|
+
"match_mapping_type": "string",
|
|
8
|
+
"mapping": {
|
|
9
|
+
"type": "search_as_you_type"
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
],
|
|
3
14
|
"dynamic": "strict",
|
|
4
15
|
"properties": {
|
|
5
16
|
"$schema": {
|
|
@@ -55,6 +66,30 @@
|
|
|
55
66
|
"type": "keyword"
|
|
56
67
|
}
|
|
57
68
|
}
|
|
69
|
+
},
|
|
70
|
+
"title": {
|
|
71
|
+
"type": "object",
|
|
72
|
+
"dynamic": "true"
|
|
73
|
+
},
|
|
74
|
+
"synonyms": {
|
|
75
|
+
"type": "text"
|
|
76
|
+
},
|
|
77
|
+
"props": {
|
|
78
|
+
"type": "object",
|
|
79
|
+
"dynamic": "true"
|
|
80
|
+
},
|
|
81
|
+
"identifiers": {
|
|
82
|
+
"properties": {
|
|
83
|
+
"identifier": {
|
|
84
|
+
"type": "keyword"
|
|
85
|
+
},
|
|
86
|
+
"scheme": {
|
|
87
|
+
"type": "keyword"
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
},
|
|
91
|
+
"tags": {
|
|
92
|
+
"type": "keyword"
|
|
58
93
|
}
|
|
59
94
|
}
|
|
60
95
|
}
|
|
@@ -1,5 +1,16 @@
|
|
|
1
1
|
{
|
|
2
2
|
"mappings": {
|
|
3
|
+
"dynamic_templates": [
|
|
4
|
+
{
|
|
5
|
+
"i18n_title": {
|
|
6
|
+
"path_match": "title.*",
|
|
7
|
+
"match_mapping_type": "string",
|
|
8
|
+
"mapping": {
|
|
9
|
+
"type": "search_as_you_type"
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
],
|
|
3
14
|
"dynamic": "strict",
|
|
4
15
|
"properties": {
|
|
5
16
|
"$schema": {
|
|
@@ -55,6 +66,30 @@
|
|
|
55
66
|
"type": "keyword"
|
|
56
67
|
}
|
|
57
68
|
}
|
|
69
|
+
},
|
|
70
|
+
"title": {
|
|
71
|
+
"type": "object",
|
|
72
|
+
"dynamic": "true"
|
|
73
|
+
},
|
|
74
|
+
"props": {
|
|
75
|
+
"type": "object",
|
|
76
|
+
"dynamic": "true"
|
|
77
|
+
},
|
|
78
|
+
"identifiers": {
|
|
79
|
+
"properties": {
|
|
80
|
+
"identifier": {
|
|
81
|
+
"type": "keyword"
|
|
82
|
+
},
|
|
83
|
+
"scheme": {
|
|
84
|
+
"type": "keyword"
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
},
|
|
88
|
+
"synonyms": {
|
|
89
|
+
"type": "text"
|
|
90
|
+
},
|
|
91
|
+
"tags": {
|
|
92
|
+
"type": "keyword"
|
|
58
93
|
}
|
|
59
94
|
}
|
|
60
95
|
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
#
|
|
3
|
+
# Copyright (C) 2022-2024 CERN.
|
|
4
|
+
# Copyright (C) 2024 California Institute of Technology.
|
|
5
|
+
#
|
|
6
|
+
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
8
|
+
# details.
|
|
9
|
+
|
|
10
|
+
"""MeSH subjects datastreams, transformers, writers and readers."""
|
|
11
|
+
|
|
12
|
+
from invenio_vocabularies.datastreams.transformers import (
|
|
13
|
+
BaseTransformer,
|
|
14
|
+
TransformerError,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class MeshSubjectsTransformer(BaseTransformer):
|
|
19
|
+
"""MeSH subjects Transformer."""
|
|
20
|
+
|
|
21
|
+
def apply(self, stream_entry, *args, **kwargs):
|
|
22
|
+
"""Apply transformation on steam entry."""
|
|
23
|
+
entry_data = stream_entry.entry
|
|
24
|
+
|
|
25
|
+
# ID in MeSH data is the URL, ex. https://id.nlm.nih.gov/mesh/D000001
|
|
26
|
+
# We just want to use the ID prefixed by "mesh:""
|
|
27
|
+
try:
|
|
28
|
+
mesh_id = entry_data["id"].split("/")[-1]
|
|
29
|
+
except Exception:
|
|
30
|
+
raise TransformerError("Not a valid MeSH ID.")
|
|
31
|
+
|
|
32
|
+
entry_data["id"] = "mesh:" + mesh_id
|
|
33
|
+
return stream_entry
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
VOCABULARIES_DATASTREAM_READERS = {}
|
|
37
|
+
"""MeSH datastream readers."""
|
|
38
|
+
|
|
39
|
+
VOCABULARIES_DATASTREAM_WRITERS = {}
|
|
40
|
+
"""MeSH subject datastream writers."""
|
|
41
|
+
|
|
42
|
+
VOCABULARIES_DATASTREAM_TRANSFORMERS = {"mesh-subjects": MeshSubjectsTransformer}
|
|
43
|
+
"""MeSH subjects datastream transformers."""
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
3
|
# Copyright (C) 2021 Northwestern University.
|
|
4
|
-
# Copyright (C) 2021-
|
|
4
|
+
# Copyright (C) 2021-2024 CERN.
|
|
5
|
+
# Copyright (C) 2024 University of Münster.
|
|
5
6
|
#
|
|
6
7
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
7
8
|
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
@@ -9,10 +10,19 @@
|
|
|
9
10
|
|
|
10
11
|
"""Subjects schema."""
|
|
11
12
|
|
|
12
|
-
from
|
|
13
|
-
from marshmallow_utils.fields import SanitizedUnicode
|
|
13
|
+
from functools import partial
|
|
14
14
|
|
|
15
|
-
from
|
|
15
|
+
from invenio_i18n import get_locale
|
|
16
|
+
from marshmallow import Schema, fields, pre_load
|
|
17
|
+
from marshmallow_utils.fields import IdentifierSet, SanitizedUnicode
|
|
18
|
+
from marshmallow_utils.schemas import IdentifierSchema
|
|
19
|
+
|
|
20
|
+
from ...services.schema import (
|
|
21
|
+
BaseVocabularySchema,
|
|
22
|
+
ContribVocabularyRelationSchema,
|
|
23
|
+
i18n_strings,
|
|
24
|
+
)
|
|
25
|
+
from .config import subject_schemes
|
|
16
26
|
|
|
17
27
|
|
|
18
28
|
class SubjectSchema(BaseVocabularySchema):
|
|
@@ -24,6 +34,26 @@ class SubjectSchema(BaseVocabularySchema):
|
|
|
24
34
|
id = SanitizedUnicode(required=True)
|
|
25
35
|
scheme = SanitizedUnicode(required=True)
|
|
26
36
|
subject = SanitizedUnicode(required=True)
|
|
37
|
+
title = i18n_strings
|
|
38
|
+
props = fields.Dict(keys=SanitizedUnicode(), values=SanitizedUnicode())
|
|
39
|
+
identifiers = IdentifierSet(
|
|
40
|
+
fields.Nested(
|
|
41
|
+
partial(
|
|
42
|
+
IdentifierSchema,
|
|
43
|
+
allowed_schemes=subject_schemes,
|
|
44
|
+
identifier_required=False,
|
|
45
|
+
)
|
|
46
|
+
)
|
|
47
|
+
)
|
|
48
|
+
synonyms = fields.List(SanitizedUnicode())
|
|
49
|
+
|
|
50
|
+
@pre_load
|
|
51
|
+
def add_subject_from_title(self, data, **kwargs):
|
|
52
|
+
"""Add subject from title if not present."""
|
|
53
|
+
locale = get_locale().language
|
|
54
|
+
if "subject" not in data:
|
|
55
|
+
data["subject"] = data["title"].get(locale) or data["title"].values()[0]
|
|
56
|
+
return data
|
|
27
57
|
|
|
28
58
|
|
|
29
59
|
class SubjectRelationSchema(ContribVocabularyRelationSchema):
|
|
@@ -32,4 +62,16 @@ class SubjectRelationSchema(ContribVocabularyRelationSchema):
|
|
|
32
62
|
ftf_name = "subject"
|
|
33
63
|
parent_field_name = "subjects"
|
|
34
64
|
subject = SanitizedUnicode()
|
|
35
|
-
scheme = SanitizedUnicode()
|
|
65
|
+
scheme = SanitizedUnicode(dump_only=True)
|
|
66
|
+
title = fields.Dict(dump_only=True)
|
|
67
|
+
props = fields.Dict(dump_only=True)
|
|
68
|
+
identifiers = IdentifierSet(
|
|
69
|
+
fields.Nested(
|
|
70
|
+
partial(
|
|
71
|
+
IdentifierSchema,
|
|
72
|
+
allowed_schemes=subject_schemes,
|
|
73
|
+
identifier_required=False,
|
|
74
|
+
)
|
|
75
|
+
)
|
|
76
|
+
)
|
|
77
|
+
synonyms = fields.List(SanitizedUnicode(), dump_only=True)
|