invenio-vocabularies 4.0.0__py2.py3-none-any.whl → 4.1.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of invenio-vocabularies might be problematic. Click here for more details.
- invenio_vocabularies/__init__.py +1 -1
- invenio_vocabularies/administration/__init__.py +10 -0
- invenio_vocabularies/administration/views/__init__.py +10 -0
- invenio_vocabularies/administration/views/vocabularies.py +44 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/CustomAwardForm.js +8 -20
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingField.js +2 -2
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingModal.js +5 -7
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/NoAwardResults.js +3 -3
- invenio_vocabularies/config.py +33 -3
- invenio_vocabularies/contrib/affiliations/config.py +2 -2
- invenio_vocabularies/contrib/affiliations/datastreams.py +67 -0
- invenio_vocabularies/contrib/affiliations/jsonschemas/affiliations/affiliation-v1.0.0.json +38 -1
- invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v1.0.0.json +21 -0
- invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v1.0.0.json +21 -0
- invenio_vocabularies/contrib/affiliations/mappings/v7/affiliations/affiliation-v1.0.0.json +21 -0
- invenio_vocabularies/contrib/affiliations/schema.py +17 -3
- invenio_vocabularies/contrib/awards/jsonschemas/awards/award-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/awards/mappings/os-v1/awards/award-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/awards/mappings/os-v2/awards/award-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/awards/mappings/v7/awards/award-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/common/ror/datastreams.py +101 -1
- invenio_vocabularies/contrib/funders/datastreams.py +8 -92
- invenio_vocabularies/contrib/funders/jsonschemas/funders/funder-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/funders/mappings/v7/funders/funder-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/funders/serializer.py +2 -1
- invenio_vocabularies/contrib/names/jsonschemas/names/name-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/names/mappings/v7/names/name-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/subjects/jsonschemas/subjects/subject-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/subjects/mappings/os-v1/subjects/subject-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/subjects/mappings/os-v2/subjects/subject-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/subjects/mappings/v7/subjects/subject-v1.0.0.json +3 -0
- invenio_vocabularies/datastreams/factories.py +1 -2
- invenio_vocabularies/datastreams/readers.py +84 -0
- invenio_vocabularies/datastreams/writers.py +2 -2
- invenio_vocabularies/ext.py +22 -7
- invenio_vocabularies/factories.py +15 -0
- invenio_vocabularies/proxies.py +2 -2
- invenio_vocabularies/records/jsonschemas/vocabularies/definitions-v1.0.0.json +7 -0
- invenio_vocabularies/records/jsonschemas/vocabularies/vocabulary-v1.0.0.json +1 -4
- invenio_vocabularies/resources/__init__.py +8 -1
- invenio_vocabularies/resources/config.py +105 -0
- invenio_vocabularies/resources/resource.py +31 -41
- invenio_vocabularies/services/__init__.py +5 -2
- invenio_vocabularies/services/config.py +179 -0
- invenio_vocabularies/services/permissions.py +3 -1
- invenio_vocabularies/services/results.py +110 -0
- invenio_vocabularies/services/schema.py +11 -2
- invenio_vocabularies/services/service.py +41 -86
- invenio_vocabularies/services/tasks.py +2 -2
- invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabularies-list.html +12 -0
- invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabulary-details.html +71 -0
- invenio_vocabularies/views.py +7 -0
- {invenio_vocabularies-4.0.0.dist-info → invenio_vocabularies-4.1.1.dist-info}/METADATA +25 -1
- {invenio_vocabularies-4.0.0.dist-info → invenio_vocabularies-4.1.1.dist-info}/RECORD +63 -54
- {invenio_vocabularies-4.0.0.dist-info → invenio_vocabularies-4.1.1.dist-info}/entry_points.txt +4 -0
- {invenio_vocabularies-4.0.0.dist-info → invenio_vocabularies-4.1.1.dist-info}/AUTHORS.rst +0 -0
- {invenio_vocabularies-4.0.0.dist-info → invenio_vocabularies-4.1.1.dist-info}/LICENSE +0 -0
- {invenio_vocabularies-4.0.0.dist-info → invenio_vocabularies-4.1.1.dist-info}/WHEEL +0 -0
- {invenio_vocabularies-4.0.0.dist-info → invenio_vocabularies-4.1.1.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
3
|
# Copyright (C) 2024 CERN.
|
|
4
|
+
# Copyright (C) 2024 California Institute of Technology.
|
|
4
5
|
#
|
|
5
6
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
7
|
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
@@ -11,9 +12,11 @@
|
|
|
11
12
|
import io
|
|
12
13
|
|
|
13
14
|
import requests
|
|
15
|
+
from idutils import normalize_ror
|
|
14
16
|
|
|
15
|
-
from invenio_vocabularies.datastreams.errors import ReaderError
|
|
17
|
+
from invenio_vocabularies.datastreams.errors import ReaderError, TransformerError
|
|
16
18
|
from invenio_vocabularies.datastreams.readers import BaseReader
|
|
19
|
+
from invenio_vocabularies.datastreams.transformers import BaseTransformer
|
|
17
20
|
|
|
18
21
|
|
|
19
22
|
class RORHTTPReader(BaseReader):
|
|
@@ -64,3 +67,100 @@ class RORHTTPReader(BaseReader):
|
|
|
64
67
|
VOCABULARIES_DATASTREAM_READERS = {
|
|
65
68
|
"ror-http": RORHTTPReader,
|
|
66
69
|
}
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class RORTransformer(BaseTransformer):
|
|
73
|
+
"""Transforms a JSON ROR record into a funders record."""
|
|
74
|
+
|
|
75
|
+
def __init__(
|
|
76
|
+
self, *args, vocab_schemes=None, funder_fundref_doi_prefix=None, **kwargs
|
|
77
|
+
):
|
|
78
|
+
"""Initializes the transformer."""
|
|
79
|
+
self.vocab_schemes = vocab_schemes
|
|
80
|
+
self.funder_fundref_doi_prefix = funder_fundref_doi_prefix
|
|
81
|
+
super().__init__(*args, **kwargs)
|
|
82
|
+
|
|
83
|
+
def apply(self, stream_entry, **kwargs):
|
|
84
|
+
"""Applies the transformation to the stream entry."""
|
|
85
|
+
record = stream_entry.entry
|
|
86
|
+
ror = {}
|
|
87
|
+
ror["title"] = {}
|
|
88
|
+
|
|
89
|
+
ror["id"] = normalize_ror(record.get("id"))
|
|
90
|
+
if not ror["id"]:
|
|
91
|
+
raise TransformerError(_("Id not found in ROR entry."))
|
|
92
|
+
|
|
93
|
+
# Using set so aliases are unique
|
|
94
|
+
aliases = set()
|
|
95
|
+
acronym = None
|
|
96
|
+
for name in record.get("names"):
|
|
97
|
+
lang = name.get("lang", "en")
|
|
98
|
+
if lang == None:
|
|
99
|
+
lang = "en"
|
|
100
|
+
if "ror_display" in name["types"]:
|
|
101
|
+
ror["name"] = name["value"]
|
|
102
|
+
if "label" in name["types"]:
|
|
103
|
+
ror["title"][lang] = name["value"]
|
|
104
|
+
if "alias" in name["types"]:
|
|
105
|
+
aliases.add(name["value"])
|
|
106
|
+
if "acronym" in name["types"]:
|
|
107
|
+
# The first acronyn goes in acronym field to maintain
|
|
108
|
+
# compatability with existing data structure
|
|
109
|
+
if not acronym:
|
|
110
|
+
acronym = name["value"]
|
|
111
|
+
else:
|
|
112
|
+
aliases.add(name["value"])
|
|
113
|
+
if acronym:
|
|
114
|
+
ror["acronym"] = acronym
|
|
115
|
+
if aliases:
|
|
116
|
+
ror["aliases"] = list(aliases)
|
|
117
|
+
|
|
118
|
+
# ror_display is required and should be in every entry
|
|
119
|
+
if not ror["name"]:
|
|
120
|
+
raise TransformerError(
|
|
121
|
+
_("Name with type ror_display not found in ROR entry.")
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# This only gets the first location, to maintain compatability
|
|
125
|
+
# with existing data structure
|
|
126
|
+
location = record.get("locations", [{}])[0].get("geonames_details", {})
|
|
127
|
+
ror["country"] = location.get("country_code")
|
|
128
|
+
ror["country_name"] = location.get("country_name")
|
|
129
|
+
ror["location_name"] = location.get("name")
|
|
130
|
+
|
|
131
|
+
ror["types"] = record.get("types")
|
|
132
|
+
|
|
133
|
+
status = record.get("status")
|
|
134
|
+
ror["status"] = status
|
|
135
|
+
|
|
136
|
+
# The ROR is always listed in identifiers, expected by serialization
|
|
137
|
+
ror["identifiers"] = [{"identifier": ror["id"], "scheme": "ror"}]
|
|
138
|
+
if self.vocab_schemes:
|
|
139
|
+
valid_schemes = set(self.vocab_schemes.keys())
|
|
140
|
+
else:
|
|
141
|
+
valid_schemes = set()
|
|
142
|
+
fund_ref = "fundref"
|
|
143
|
+
if self.funder_fundref_doi_prefix:
|
|
144
|
+
valid_schemes.add(fund_ref)
|
|
145
|
+
for identifier in record.get("external_ids"):
|
|
146
|
+
scheme = identifier["type"]
|
|
147
|
+
if scheme in valid_schemes:
|
|
148
|
+
value = identifier.get("preferred") or identifier.get("all")[0]
|
|
149
|
+
if scheme == fund_ref:
|
|
150
|
+
if self.funder_fundref_doi_prefix:
|
|
151
|
+
value = f"{self.funder_fundref_doi_prefix}/{value}"
|
|
152
|
+
scheme = "doi"
|
|
153
|
+
ror["identifiers"].append(
|
|
154
|
+
{
|
|
155
|
+
"identifier": value,
|
|
156
|
+
"scheme": scheme,
|
|
157
|
+
}
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
stream_entry.entry = ror
|
|
161
|
+
return stream_entry
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
VOCABULARIES_DATASTREAM_TRANSFORMERS = {
|
|
165
|
+
"ror": RORTransformer,
|
|
166
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
|
-
# Copyright (C) 2022 CERN.
|
|
3
|
+
# Copyright (C) 2022-2024 CERN.
|
|
4
4
|
# Copyright (C) 2024 California Institute of Technology.
|
|
5
5
|
#
|
|
6
6
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
@@ -13,8 +13,6 @@ from idutils import normalize_ror
|
|
|
13
13
|
from invenio_access.permissions import system_identity
|
|
14
14
|
from invenio_i18n import lazy_gettext as _
|
|
15
15
|
|
|
16
|
-
from ...datastreams.errors import TransformerError
|
|
17
|
-
from ...datastreams.transformers import BaseTransformer
|
|
18
16
|
from ...datastreams.writers import ServiceWriter
|
|
19
17
|
from .config import funder_fundref_doi_prefix, funder_schemes
|
|
20
18
|
|
|
@@ -32,94 +30,6 @@ class FundersServiceWriter(ServiceWriter):
|
|
|
32
30
|
return entry["id"]
|
|
33
31
|
|
|
34
32
|
|
|
35
|
-
class RORTransformer(BaseTransformer):
|
|
36
|
-
"""Transforms a JSON ROR record into a funders record."""
|
|
37
|
-
|
|
38
|
-
def apply(self, stream_entry, **kwargs):
|
|
39
|
-
"""Applies the transformation to the stream entry."""
|
|
40
|
-
record = stream_entry.entry
|
|
41
|
-
funder = {}
|
|
42
|
-
funder["title"] = {}
|
|
43
|
-
|
|
44
|
-
funder["id"] = normalize_ror(record.get("id"))
|
|
45
|
-
if not funder["id"]:
|
|
46
|
-
raise TransformerError(_("Id not found in ROR entry."))
|
|
47
|
-
|
|
48
|
-
aliases = []
|
|
49
|
-
acronym = None
|
|
50
|
-
for name in record.get("names"):
|
|
51
|
-
# Some name entries have a `lang` key with a `None` value.
|
|
52
|
-
# Therefore, providing a default value to `name.get("lang")` is not enough,
|
|
53
|
-
# and we need instead to check if the result of `get` is None.
|
|
54
|
-
lang = name.get("lang")
|
|
55
|
-
if lang is None:
|
|
56
|
-
lang = "en"
|
|
57
|
-
|
|
58
|
-
if "ror_display" in name["types"]:
|
|
59
|
-
funder["name"] = name["value"]
|
|
60
|
-
if "label" in name["types"]:
|
|
61
|
-
funder["title"][lang] = name["value"]
|
|
62
|
-
if "alias" in name["types"]:
|
|
63
|
-
aliases.append(name["value"])
|
|
64
|
-
if "acronym" in name["types"]:
|
|
65
|
-
# The first acronyn goes in acronym field to maintain
|
|
66
|
-
# compatability with existing data structure
|
|
67
|
-
if not acronym:
|
|
68
|
-
acronym = name["value"]
|
|
69
|
-
else:
|
|
70
|
-
aliases.append(name["value"])
|
|
71
|
-
if acronym:
|
|
72
|
-
funder["acronym"] = acronym
|
|
73
|
-
if aliases:
|
|
74
|
-
funder["aliases"] = aliases
|
|
75
|
-
|
|
76
|
-
# ror_display is required and should be in every entry
|
|
77
|
-
if not funder["name"]:
|
|
78
|
-
raise TransformerError(
|
|
79
|
-
_("Name with type ror_display not found in ROR entry.")
|
|
80
|
-
)
|
|
81
|
-
|
|
82
|
-
# This only gets the first location, to maintain compatability
|
|
83
|
-
# with existing data structure
|
|
84
|
-
location = record.get("locations", [{}])[0].get("geonames_details", {})
|
|
85
|
-
funder["country"] = location.get("country_code")
|
|
86
|
-
funder["country_name"] = location.get("country_name")
|
|
87
|
-
funder["location_name"] = location.get("name")
|
|
88
|
-
|
|
89
|
-
funder["types"] = record.get("types")
|
|
90
|
-
|
|
91
|
-
status = record.get("status")
|
|
92
|
-
funder["status"] = status
|
|
93
|
-
|
|
94
|
-
# The ROR is always listed in identifiers, expected by serialization
|
|
95
|
-
funder["identifiers"] = [{"identifier": funder["id"], "scheme": "ror"}]
|
|
96
|
-
valid_schemes = set(funder_schemes.keys())
|
|
97
|
-
fund_ref = "fundref"
|
|
98
|
-
valid_schemes.add(fund_ref)
|
|
99
|
-
for identifier in record.get("external_ids"):
|
|
100
|
-
scheme = identifier["type"]
|
|
101
|
-
if scheme in valid_schemes:
|
|
102
|
-
value = identifier.get("preferred") or identifier.get("all")[0]
|
|
103
|
-
if scheme == fund_ref:
|
|
104
|
-
value = f"{funder_fundref_doi_prefix}/{value}"
|
|
105
|
-
scheme = "doi"
|
|
106
|
-
funder["identifiers"].append(
|
|
107
|
-
{
|
|
108
|
-
"identifier": value,
|
|
109
|
-
"scheme": scheme,
|
|
110
|
-
}
|
|
111
|
-
)
|
|
112
|
-
|
|
113
|
-
stream_entry.entry = funder
|
|
114
|
-
return stream_entry
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
VOCABULARIES_DATASTREAM_TRANSFORMERS = {
|
|
118
|
-
"ror-funder": RORTransformer,
|
|
119
|
-
}
|
|
120
|
-
"""ROR Data Streams transformers."""
|
|
121
|
-
|
|
122
|
-
|
|
123
33
|
VOCABULARIES_DATASTREAM_WRITERS = {
|
|
124
34
|
"funders-service": FundersServiceWriter,
|
|
125
35
|
}
|
|
@@ -137,7 +47,13 @@ DATASTREAM_CONFIG = {
|
|
|
137
47
|
{"type": "json"},
|
|
138
48
|
],
|
|
139
49
|
"transformers": [
|
|
140
|
-
{
|
|
50
|
+
{
|
|
51
|
+
"type": "ror",
|
|
52
|
+
"args": {
|
|
53
|
+
"vocab_schemes": funder_schemes,
|
|
54
|
+
"funder_fundref_doi_prefix": funder_fundref_doi_prefix,
|
|
55
|
+
},
|
|
56
|
+
},
|
|
141
57
|
],
|
|
142
58
|
"writers": [
|
|
143
59
|
{
|
|
@@ -7,6 +7,9 @@
|
|
|
7
7
|
"$schema": {
|
|
8
8
|
"$ref": "local://definitions-v1.0.0.json#/$schema"
|
|
9
9
|
},
|
|
10
|
+
"tags": {
|
|
11
|
+
"$ref": "local://vocabularies/definitions-v1.0.0.json#/tags"
|
|
12
|
+
},
|
|
10
13
|
"country": {
|
|
11
14
|
"type": "string",
|
|
12
15
|
"description": "Represents a funder's origin country as a country code."
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
|
-
# Copyright (C) 2022 CERN.
|
|
3
|
+
# Copyright (C) 2022-2024 CERN.
|
|
4
4
|
#
|
|
5
5
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
6
|
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
@@ -29,4 +29,5 @@ class FunderL10NItemSchema(Schema):
|
|
|
29
29
|
props = fields.Dict(dump_only=True)
|
|
30
30
|
name = fields.String(dump_only=True)
|
|
31
31
|
country = fields.String(dump_only=True)
|
|
32
|
+
country_name = fields.String(dump_only=True)
|
|
32
33
|
identifiers = fields.List(fields.Nested(IdentifierSchema), dump_only=True)
|
|
@@ -8,6 +8,9 @@
|
|
|
8
8
|
"$schema": {
|
|
9
9
|
"$ref": "local://definitions-v1.0.0.json#/$schema"
|
|
10
10
|
},
|
|
11
|
+
"tags": {
|
|
12
|
+
"$ref": "local://vocabularies/definitions-v1.0.0.json#/tags"
|
|
13
|
+
},
|
|
11
14
|
"scheme": {
|
|
12
15
|
"description": "Identifier of the name scheme.",
|
|
13
16
|
"$ref": "local://definitions-v1.0.0.json#/identifier"
|
|
@@ -8,6 +8,9 @@
|
|
|
8
8
|
"$schema": {
|
|
9
9
|
"$ref": "local://definitions-v1.0.0.json#/$schema"
|
|
10
10
|
},
|
|
11
|
+
"tags": {
|
|
12
|
+
"$ref": "local://vocabularies/definitions-v1.0.0.json#/tags"
|
|
13
|
+
},
|
|
11
14
|
"id": {
|
|
12
15
|
"description": "URI or classification code as identifier - globally unique among all subject schemes.",
|
|
13
16
|
"$ref": "local://definitions-v1.0.0.json#/identifier"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
|
-
# Copyright (C) 2021-
|
|
3
|
+
# Copyright (C) 2021-2024 CERN.
|
|
4
4
|
#
|
|
5
5
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
6
|
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
@@ -36,7 +36,6 @@ class Factory:
|
|
|
36
36
|
try:
|
|
37
37
|
type_ = config["type"]
|
|
38
38
|
args = config.get("args", {})
|
|
39
|
-
|
|
40
39
|
return cls.options()[type_](**args)
|
|
41
40
|
except KeyError:
|
|
42
41
|
raise FactoryError(name=cls.FACTORY_NAME, key=type_)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
3
|
# Copyright (C) 2021-2024 CERN.
|
|
4
|
+
# Copyright (C) 2024 University of Münster.
|
|
4
5
|
#
|
|
5
6
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
7
|
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
@@ -19,11 +20,17 @@ from json.decoder import JSONDecodeError
|
|
|
19
20
|
|
|
20
21
|
import requests
|
|
21
22
|
import yaml
|
|
23
|
+
from lxml import etree
|
|
22
24
|
from lxml.html import parse as html_parse
|
|
23
25
|
|
|
24
26
|
from .errors import ReaderError
|
|
25
27
|
from .xml import etree_to_dict
|
|
26
28
|
|
|
29
|
+
try:
|
|
30
|
+
import oaipmh_scythe
|
|
31
|
+
except ImportError:
|
|
32
|
+
oaipmh_scythe = None
|
|
33
|
+
|
|
27
34
|
|
|
28
35
|
class BaseReader(ABC):
|
|
29
36
|
"""Base reader."""
|
|
@@ -226,3 +233,80 @@ class XMLReader(BaseReader):
|
|
|
226
233
|
raise ReaderError(f"Record not found in XML entry.")
|
|
227
234
|
|
|
228
235
|
yield record
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
class OAIPMHReader(BaseReader):
|
|
239
|
+
"""OAIPMH reader."""
|
|
240
|
+
|
|
241
|
+
def __init__(
|
|
242
|
+
self,
|
|
243
|
+
*args,
|
|
244
|
+
base_url=None,
|
|
245
|
+
metadata_prefix=None,
|
|
246
|
+
set=None,
|
|
247
|
+
from_date=None,
|
|
248
|
+
until_date=None,
|
|
249
|
+
verb=None,
|
|
250
|
+
**kwargs,
|
|
251
|
+
):
|
|
252
|
+
"""Constructor."""
|
|
253
|
+
self._base_url = base_url
|
|
254
|
+
self._metadata_prefix = metadata_prefix if not None else "oai_dc"
|
|
255
|
+
self._set = set
|
|
256
|
+
self._until = until_date
|
|
257
|
+
self._from = from_date
|
|
258
|
+
self._verb = verb if not None else "ListRecords"
|
|
259
|
+
super().__init__(*args, **kwargs)
|
|
260
|
+
|
|
261
|
+
def _iter(self, scythe, *args, **kwargs):
|
|
262
|
+
"""Read and parse an OAIPMH stream to dict."""
|
|
263
|
+
|
|
264
|
+
class OAIRecord(oaipmh_scythe.models.Record):
|
|
265
|
+
"""An XML unpacking implementation for more complicated formats."""
|
|
266
|
+
|
|
267
|
+
def get_metadata(self):
|
|
268
|
+
"""Extract and return the record's metadata as a dictionary."""
|
|
269
|
+
return xml_to_dict(
|
|
270
|
+
self.xml.find(f".//{self._oai_namespace}metadata").getchildren()[0],
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
scythe.class_mapping["ListRecords"] = OAIRecord
|
|
274
|
+
try:
|
|
275
|
+
records = scythe.list_records(
|
|
276
|
+
from_=self._from,
|
|
277
|
+
until=self._until,
|
|
278
|
+
metadata_prefix=self._metadata_prefix,
|
|
279
|
+
set_=self._set,
|
|
280
|
+
ignore_deleted=True,
|
|
281
|
+
)
|
|
282
|
+
for record in records:
|
|
283
|
+
yield {"record": record}
|
|
284
|
+
except oaipmh_scythe.NoRecordsMatch:
|
|
285
|
+
raise ReaderError(f"No records found in OAI-PMH request.")
|
|
286
|
+
|
|
287
|
+
def read(self, item=None, *args, **kwargs):
|
|
288
|
+
"""Reads from item or opens the file descriptor from origin."""
|
|
289
|
+
if item:
|
|
290
|
+
raise NotImplementedError(
|
|
291
|
+
"OAIPMHReader does not support being chained after another reader"
|
|
292
|
+
)
|
|
293
|
+
else:
|
|
294
|
+
with oaipmh_scythe.Scythe(self._base_url) as scythe:
|
|
295
|
+
yield from self._iter(scythe=scythe, *args, **kwargs)
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def xml_to_dict(tree: etree._Element):
|
|
299
|
+
"""Convert an XML tree to a dictionary.
|
|
300
|
+
|
|
301
|
+
This function takes an XML element tree and converts it into a dictionary.
|
|
302
|
+
|
|
303
|
+
Args:
|
|
304
|
+
tree: The root element of the XML tree to be converted.
|
|
305
|
+
|
|
306
|
+
Returns:
|
|
307
|
+
A dictionary with the key "record".
|
|
308
|
+
"""
|
|
309
|
+
dict_obj = dict()
|
|
310
|
+
dict_obj["record"] = etree.tostring(tree)
|
|
311
|
+
|
|
312
|
+
return dict_obj
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
|
-
# Copyright (C) 2021-
|
|
3
|
+
# Copyright (C) 2021-2024 CERN.
|
|
4
4
|
#
|
|
5
5
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
6
|
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
@@ -103,6 +103,6 @@ class YamlWriter(BaseWriter):
|
|
|
103
103
|
with open(self._filepath, "a") as file:
|
|
104
104
|
# made into array for safer append
|
|
105
105
|
# will always read array (good for reader)
|
|
106
|
-
yaml.safe_dump([stream_entry.entry], file)
|
|
106
|
+
yaml.safe_dump([stream_entry.entry], file, allow_unicode=True)
|
|
107
107
|
|
|
108
108
|
return stream_entry
|
invenio_vocabularies/ext.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
|
-
# Copyright (C) 2020-
|
|
3
|
+
# Copyright (C) 2020-2024 CERN.
|
|
4
4
|
# Copyright (C) 2023 Graz University of Technology.
|
|
5
5
|
#
|
|
6
6
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
@@ -40,8 +40,14 @@ from .contrib.subjects import (
|
|
|
40
40
|
SubjectsService,
|
|
41
41
|
SubjectsServiceConfig,
|
|
42
42
|
)
|
|
43
|
-
from .resources
|
|
44
|
-
|
|
43
|
+
from .resources import (
|
|
44
|
+
VocabulariesAdminResource,
|
|
45
|
+
VocabulariesResource,
|
|
46
|
+
VocabulariesResourceConfig,
|
|
47
|
+
VocabularyTypeResourceConfig,
|
|
48
|
+
)
|
|
49
|
+
from .services.config import VocabularyTypesServiceConfig
|
|
50
|
+
from .services.service import VocabulariesService, VocabularyTypeService
|
|
45
51
|
|
|
46
52
|
|
|
47
53
|
class InvenioVocabularies(object):
|
|
@@ -76,6 +82,7 @@ class InvenioVocabularies(object):
|
|
|
76
82
|
funders = FundersServiceConfig
|
|
77
83
|
names = NamesServiceConfig
|
|
78
84
|
subjects = SubjectsServiceConfig
|
|
85
|
+
vocabulary_types = VocabularyTypesServiceConfig
|
|
79
86
|
|
|
80
87
|
return ServiceConfigs
|
|
81
88
|
|
|
@@ -93,9 +100,12 @@ class InvenioVocabularies(object):
|
|
|
93
100
|
self.funders_service = FundersService(config=service_configs.funders)
|
|
94
101
|
self.names_service = NamesService(config=service_configs.names)
|
|
95
102
|
self.subjects_service = SubjectsService(config=service_configs.subjects)
|
|
96
|
-
self.
|
|
103
|
+
self.vocabularies_service = VocabulariesService(
|
|
97
104
|
config=app.config["VOCABULARIES_SERVICE_CONFIG"],
|
|
98
105
|
)
|
|
106
|
+
self.vocabulary_types_service = VocabularyTypeService(
|
|
107
|
+
config=service_configs.vocabulary_types
|
|
108
|
+
)
|
|
99
109
|
|
|
100
110
|
def init_resource(self, app):
|
|
101
111
|
"""Initialize vocabulary resources."""
|
|
@@ -121,9 +131,13 @@ class InvenioVocabularies(object):
|
|
|
121
131
|
config=SubjectsResourceConfig,
|
|
122
132
|
)
|
|
123
133
|
self.resource = VocabulariesResource(
|
|
124
|
-
service=self.
|
|
134
|
+
service=self.vocabularies_service,
|
|
125
135
|
config=app.config["VOCABULARIES_RESOURCE_CONFIG"],
|
|
126
136
|
)
|
|
137
|
+
self.vocabulary_admin_resource = VocabulariesAdminResource(
|
|
138
|
+
service=self.vocabulary_types_service,
|
|
139
|
+
config=VocabularyTypeResourceConfig,
|
|
140
|
+
)
|
|
127
141
|
|
|
128
142
|
|
|
129
143
|
def finalize_app(app):
|
|
@@ -153,7 +167,8 @@ def init(app):
|
|
|
153
167
|
sregistry.register(ext.funders_service, service_id="funders")
|
|
154
168
|
sregistry.register(ext.names_service, service_id="names")
|
|
155
169
|
sregistry.register(ext.subjects_service, service_id="subjects")
|
|
156
|
-
sregistry.register(ext.
|
|
170
|
+
sregistry.register(ext.vocabularies_service, service_id="vocabularies")
|
|
171
|
+
sregistry.register(ext.vocabulary_types_service, service_id="vocabulary-types")
|
|
157
172
|
# Register indexers
|
|
158
173
|
iregistry = app.extensions["invenio-indexer"].registry
|
|
159
174
|
iregistry.register(ext.affiliations_service.indexer, indexer_id="affiliations")
|
|
@@ -161,4 +176,4 @@ def init(app):
|
|
|
161
176
|
iregistry.register(ext.funders_service.indexer, indexer_id="funders")
|
|
162
177
|
iregistry.register(ext.names_service.indexer, indexer_id="names")
|
|
163
178
|
iregistry.register(ext.subjects_service.indexer, indexer_id="subjects")
|
|
164
|
-
iregistry.register(ext.
|
|
179
|
+
iregistry.register(ext.vocabularies_service.indexer, indexer_id="vocabularies")
|
|
@@ -12,6 +12,9 @@ from copy import deepcopy
|
|
|
12
12
|
import yaml
|
|
13
13
|
from invenio_records_resources.proxies import current_service_registry
|
|
14
14
|
|
|
15
|
+
from .contrib.affiliations.datastreams import (
|
|
16
|
+
DATASTREAM_CONFIG as affiliations_ds_config,
|
|
17
|
+
)
|
|
15
18
|
from .contrib.awards.datastreams import DATASTREAM_CONFIG as awards_ds_config
|
|
16
19
|
from .contrib.funders.datastreams import DATASTREAM_CONFIG as funders_ds_config
|
|
17
20
|
from .contrib.names.datastreams import DATASTREAM_CONFIG as names_ds_config
|
|
@@ -68,11 +71,23 @@ class AwardsVocabularyConfig(VocabularyConfig):
|
|
|
68
71
|
raise NotImplementedError("Service not implemented for Awards")
|
|
69
72
|
|
|
70
73
|
|
|
74
|
+
class AffiliationsVocabularyConfig(VocabularyConfig):
|
|
75
|
+
"""Affiliations Vocabulary Config."""
|
|
76
|
+
|
|
77
|
+
config = affiliations_ds_config
|
|
78
|
+
vocabulary_name = "affiliations"
|
|
79
|
+
|
|
80
|
+
def get_service(self):
|
|
81
|
+
"""Get the service for the vocabulary."""
|
|
82
|
+
raise NotImplementedError("Service not implemented for Affiliations")
|
|
83
|
+
|
|
84
|
+
|
|
71
85
|
def get_vocabulary_config(vocabulary):
|
|
72
86
|
"""Factory function to get the appropriate Vocabulary Config."""
|
|
73
87
|
vocab_config = {
|
|
74
88
|
"names": NamesVocabularyConfig,
|
|
75
89
|
"funders": FundersVocabularyConfig,
|
|
76
90
|
"awards": AwardsVocabularyConfig,
|
|
91
|
+
"affiliations": AffiliationsVocabularyConfig,
|
|
77
92
|
}
|
|
78
93
|
return vocab_config.get(vocabulary, VocabularyConfig)()
|
invenio_vocabularies/proxies.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
|
-
# Copyright (C) 2021 CERN.
|
|
3
|
+
# Copyright (C) 2021-2024 CERN.
|
|
4
4
|
# Copyright (C) 2021 Northwestern University.
|
|
5
5
|
#
|
|
6
6
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
@@ -19,7 +19,7 @@ def _ext_proxy(attr):
|
|
|
19
19
|
)
|
|
20
20
|
|
|
21
21
|
|
|
22
|
-
current_service = _ext_proxy("
|
|
22
|
+
current_service = _ext_proxy("vocabularies_service")
|
|
23
23
|
"""Proxy to the instantiated vocabulary service."""
|
|
24
24
|
|
|
25
25
|
|