invenio-vocabularies 2.3.1__py2.py3-none-any.whl → 6.3.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of invenio-vocabularies might be problematic. Click here for more details.
- invenio_vocabularies/__init__.py +2 -2
- invenio_vocabularies/administration/__init__.py +10 -0
- invenio_vocabularies/administration/views/__init__.py +10 -0
- invenio_vocabularies/administration/views/vocabularies.py +45 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/package.json +1 -7
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/CustomAwardForm.js +80 -64
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingField.js +49 -41
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingModal.js +5 -7
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/NoAwardResults.js +3 -3
- invenio_vocabularies/cli.py +31 -44
- invenio_vocabularies/config.py +68 -4
- invenio_vocabularies/contrib/affiliations/affiliations.py +11 -0
- invenio_vocabularies/contrib/affiliations/api.py +1 -2
- invenio_vocabularies/contrib/affiliations/config.py +13 -2
- invenio_vocabularies/contrib/affiliations/datastreams.py +186 -0
- invenio_vocabularies/contrib/affiliations/jsonschemas/affiliations/affiliation-v1.0.0.json +38 -1
- invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v1.0.0.json +22 -1
- invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v2.0.0.json +171 -0
- invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v1.0.0.json +22 -1
- invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v2.0.0.json +171 -0
- invenio_vocabularies/contrib/affiliations/mappings/v7/affiliations/affiliation-v1.0.0.json +22 -1
- invenio_vocabularies/contrib/affiliations/schema.py +17 -3
- invenio_vocabularies/contrib/affiliations/services.py +1 -2
- invenio_vocabularies/contrib/awards/awards.py +17 -5
- invenio_vocabularies/contrib/awards/datastreams.py +241 -7
- invenio_vocabularies/contrib/awards/jsonschemas/awards/award-v1.0.0.json +38 -0
- invenio_vocabularies/contrib/awards/mappings/os-v1/awards/award-v1.0.0.json +51 -2
- invenio_vocabularies/contrib/awards/mappings/os-v2/awards/award-v1.0.0.json +51 -2
- invenio_vocabularies/contrib/awards/mappings/v7/awards/award-v1.0.0.json +51 -2
- invenio_vocabularies/contrib/awards/schema.py +16 -1
- invenio_vocabularies/contrib/awards/serializer.py +8 -1
- invenio_vocabularies/contrib/awards/services.py +1 -2
- invenio_vocabularies/contrib/common/__init__.py +9 -0
- invenio_vocabularies/contrib/common/openaire/__init__.py +9 -0
- invenio_vocabularies/contrib/common/openaire/datastreams.py +84 -0
- invenio_vocabularies/contrib/common/ror/__init__.py +9 -0
- invenio_vocabularies/contrib/common/ror/datastreams.py +220 -0
- invenio_vocabularies/contrib/funders/config.py +11 -2
- invenio_vocabularies/contrib/funders/datastreams.py +40 -62
- invenio_vocabularies/contrib/funders/funders.py +3 -1
- invenio_vocabularies/contrib/funders/jsonschemas/funders/funder-v1.0.0.json +36 -1
- invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v1.0.0.json +22 -1
- invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v2.0.0.json +156 -0
- invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v1.0.0.json +22 -1
- invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v2.0.0.json +156 -0
- invenio_vocabularies/contrib/funders/mappings/v7/funders/funder-v1.0.0.json +22 -1
- invenio_vocabularies/contrib/funders/schema.py +8 -0
- invenio_vocabularies/contrib/funders/serializer.py +2 -1
- invenio_vocabularies/contrib/names/config.py +5 -3
- invenio_vocabularies/contrib/names/datastreams.py +172 -4
- invenio_vocabularies/contrib/names/jsonschemas/names/name-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v2.0.0.json +150 -0
- invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v2.0.0.json +150 -0
- invenio_vocabularies/contrib/names/mappings/v7/names/name-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/names/names.py +15 -3
- invenio_vocabularies/contrib/names/permissions.py +20 -0
- invenio_vocabularies/contrib/names/s3client.py +44 -0
- invenio_vocabularies/contrib/names/schema.py +14 -0
- invenio_vocabularies/contrib/subjects/config.py +9 -3
- invenio_vocabularies/contrib/subjects/datastreams.py +61 -0
- invenio_vocabularies/contrib/subjects/euroscivoc/__init__.py +9 -0
- invenio_vocabularies/contrib/subjects/euroscivoc/datastreams.py +171 -0
- invenio_vocabularies/contrib/subjects/jsonschemas/subjects/subject-v1.0.0.json +31 -0
- invenio_vocabularies/contrib/subjects/mappings/os-v1/subjects/subject-v1.0.0.json +35 -0
- invenio_vocabularies/contrib/subjects/mappings/os-v2/subjects/subject-v1.0.0.json +35 -0
- invenio_vocabularies/contrib/subjects/mappings/v7/subjects/subject-v1.0.0.json +35 -0
- invenio_vocabularies/contrib/subjects/mesh/__init__.py +9 -0
- invenio_vocabularies/contrib/subjects/mesh/datastreams.py +43 -0
- invenio_vocabularies/contrib/subjects/schema.py +47 -5
- invenio_vocabularies/contrib/subjects/subjects.py +10 -0
- invenio_vocabularies/datastreams/datastreams.py +61 -13
- invenio_vocabularies/datastreams/factories.py +1 -2
- invenio_vocabularies/datastreams/readers.py +138 -29
- invenio_vocabularies/datastreams/tasks.py +37 -0
- invenio_vocabularies/datastreams/transformers.py +17 -27
- invenio_vocabularies/datastreams/writers.py +116 -14
- invenio_vocabularies/datastreams/xml.py +34 -0
- invenio_vocabularies/ext.py +59 -5
- invenio_vocabularies/factories.py +137 -0
- invenio_vocabularies/jobs.py +133 -0
- invenio_vocabularies/proxies.py +2 -2
- invenio_vocabularies/records/jsonschemas/vocabularies/definitions-v1.0.0.json +7 -0
- invenio_vocabularies/records/jsonschemas/vocabularies/vocabulary-v1.0.0.json +1 -4
- invenio_vocabularies/records/mappings/os-v1/vocabularies/vocabulary-v1.0.0.json +3 -3
- invenio_vocabularies/records/mappings/os-v2/vocabularies/vocabulary-v1.0.0.json +3 -3
- invenio_vocabularies/records/mappings/v7/vocabularies/vocabulary-v1.0.0.json +3 -3
- invenio_vocabularies/records/models.py +2 -4
- invenio_vocabularies/records/pidprovider.py +1 -2
- invenio_vocabularies/records/systemfields/relations.py +2 -2
- invenio_vocabularies/resources/__init__.py +9 -1
- invenio_vocabularies/resources/config.py +105 -0
- invenio_vocabularies/resources/resource.py +31 -41
- invenio_vocabularies/resources/schema.py +2 -1
- invenio_vocabularies/services/__init__.py +5 -2
- invenio_vocabularies/services/config.py +179 -0
- invenio_vocabularies/services/custom_fields/__init__.py +6 -2
- invenio_vocabularies/services/custom_fields/subject.py +82 -0
- invenio_vocabularies/services/custom_fields/vocabulary.py +5 -3
- invenio_vocabularies/services/permissions.py +3 -1
- invenio_vocabularies/services/results.py +110 -0
- invenio_vocabularies/services/schema.py +11 -2
- invenio_vocabularies/services/service.py +46 -94
- invenio_vocabularies/services/tasks.py +1 -1
- invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/subjects.html +23 -0
- invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabularies-list.html +12 -0
- invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabulary-details.html +71 -0
- invenio_vocabularies/translations/af/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ar/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/bg/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ca/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/cs/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/da/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/de/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/de_AT/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/de_DE/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/el/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/en/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/en_AT/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/en_HU/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/es/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/es_CU/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/es_MX/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/et/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/et_EE/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/fa/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/fa_IR/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/fr/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/fr_CI/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/fr_FR/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/gl/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/hi_IN/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/hr/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/hu/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/hu_HU/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/it/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ja/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ka/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/lt/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/messages.pot +95 -48
- invenio_vocabularies/translations/ne/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/no/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/pl/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/pt/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ro/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ru/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/rw/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/sk/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/sv/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/sv_SE/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/tr/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/uk/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/uk_UA/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/zh_CN/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/zh_TW/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/views.py +12 -26
- invenio_vocabularies/webpack.py +3 -3
- {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/METADATA +150 -6
- {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/RECORD +165 -132
- {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/WHEEL +1 -1
- {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/entry_points.txt +17 -0
- {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/AUTHORS.rst +0 -0
- {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/LICENSE +0 -0
- {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
{
|
|
2
|
+
"settings": {
|
|
3
|
+
"analysis": {
|
|
4
|
+
"char_filter": {
|
|
5
|
+
"strip_special_chars": {
|
|
6
|
+
"type": "pattern_replace",
|
|
7
|
+
"pattern": "[\\p{Punct}\\p{S}]",
|
|
8
|
+
"replacement": ""
|
|
9
|
+
}
|
|
10
|
+
},
|
|
11
|
+
"analyzer": {
|
|
12
|
+
"accent_edge_analyzer": {
|
|
13
|
+
"tokenizer": "standard",
|
|
14
|
+
"type": "custom",
|
|
15
|
+
"char_filter": ["strip_special_chars"],
|
|
16
|
+
"filter": [
|
|
17
|
+
"lowercasepreserveoriginal",
|
|
18
|
+
"asciifoldingpreserveoriginal",
|
|
19
|
+
"edgegrams"
|
|
20
|
+
]
|
|
21
|
+
},
|
|
22
|
+
"accent_analyzer": {
|
|
23
|
+
"tokenizer": "standard",
|
|
24
|
+
"type": "custom",
|
|
25
|
+
"char_filter": ["strip_special_chars"],
|
|
26
|
+
"filter": [
|
|
27
|
+
"lowercasepreserveoriginal",
|
|
28
|
+
"asciifoldingpreserveoriginal"
|
|
29
|
+
]
|
|
30
|
+
}
|
|
31
|
+
},
|
|
32
|
+
"normalizer": {
|
|
33
|
+
"accent_normalizer": {
|
|
34
|
+
"type": "custom",
|
|
35
|
+
"char_filter": ["strip_special_chars"],
|
|
36
|
+
"filter": [
|
|
37
|
+
"lowercase",
|
|
38
|
+
"asciifolding"
|
|
39
|
+
]
|
|
40
|
+
}
|
|
41
|
+
},
|
|
42
|
+
"filter": {
|
|
43
|
+
"lowercasepreserveoriginal": {
|
|
44
|
+
"type": "lowercase",
|
|
45
|
+
"preserve_original": true
|
|
46
|
+
},
|
|
47
|
+
"asciifoldingpreserveoriginal": {
|
|
48
|
+
"type": "asciifolding",
|
|
49
|
+
"preserve_original": true
|
|
50
|
+
},
|
|
51
|
+
"edgegrams": {
|
|
52
|
+
"type": "edge_ngram",
|
|
53
|
+
"min_gram": 2,
|
|
54
|
+
"max_gram": 20
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
},
|
|
59
|
+
"mappings": {
|
|
60
|
+
"dynamic_templates": [
|
|
61
|
+
{
|
|
62
|
+
"i18n_title": {
|
|
63
|
+
"path_match": "title.*",
|
|
64
|
+
"match_mapping_type": "string",
|
|
65
|
+
"mapping": {
|
|
66
|
+
"type": "text",
|
|
67
|
+
"analyzer": "accent_edge_analyzer",
|
|
68
|
+
"search_analyzer": "accent_analyzer"
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
],
|
|
73
|
+
"dynamic": "strict",
|
|
74
|
+
"properties": {
|
|
75
|
+
"$schema": {
|
|
76
|
+
"type": "keyword",
|
|
77
|
+
"index": "false"
|
|
78
|
+
},
|
|
79
|
+
"created": {
|
|
80
|
+
"type": "date"
|
|
81
|
+
},
|
|
82
|
+
"updated": {
|
|
83
|
+
"type": "date"
|
|
84
|
+
},
|
|
85
|
+
"indexed_at": {
|
|
86
|
+
"type": "date"
|
|
87
|
+
},
|
|
88
|
+
"uuid": {
|
|
89
|
+
"type": "keyword"
|
|
90
|
+
},
|
|
91
|
+
"version_id": {
|
|
92
|
+
"type": "integer"
|
|
93
|
+
},
|
|
94
|
+
"id": {
|
|
95
|
+
"type": "keyword"
|
|
96
|
+
},
|
|
97
|
+
"name_sort": {
|
|
98
|
+
"type": "keyword"
|
|
99
|
+
},
|
|
100
|
+
"name": {
|
|
101
|
+
"type": "text",
|
|
102
|
+
"analyzer": "accent_edge_analyzer",
|
|
103
|
+
"search_analyzer": "accent_analyzer",
|
|
104
|
+
"copy_to": "name_sort"
|
|
105
|
+
},
|
|
106
|
+
"acronym": {
|
|
107
|
+
"type": "text",
|
|
108
|
+
"analyzer": "accent_edge_analyzer",
|
|
109
|
+
"search_analyzer": "accent_analyzer",
|
|
110
|
+
"fields": {
|
|
111
|
+
"keyword": {
|
|
112
|
+
"type": "keyword",
|
|
113
|
+
"normalizer": "accent_normalizer"
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
},
|
|
117
|
+
"identifiers": {
|
|
118
|
+
"properties": {
|
|
119
|
+
"identifier": {
|
|
120
|
+
"type": "keyword"
|
|
121
|
+
},
|
|
122
|
+
"scheme": {
|
|
123
|
+
"type": "keyword"
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
},
|
|
127
|
+
"pid": {
|
|
128
|
+
"type": "object",
|
|
129
|
+
"properties": {
|
|
130
|
+
"pk": {
|
|
131
|
+
"type": "integer"
|
|
132
|
+
},
|
|
133
|
+
"pid_type": {
|
|
134
|
+
"type": "keyword"
|
|
135
|
+
},
|
|
136
|
+
"obj_type": {
|
|
137
|
+
"type": "keyword"
|
|
138
|
+
},
|
|
139
|
+
"status": {
|
|
140
|
+
"type": "keyword"
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
},
|
|
144
|
+
"title": {
|
|
145
|
+
"type": "object",
|
|
146
|
+
"dynamic": "true"
|
|
147
|
+
},
|
|
148
|
+
"tags": {
|
|
149
|
+
"type": "keyword"
|
|
150
|
+
},
|
|
151
|
+
"country": {
|
|
152
|
+
"type": "text"
|
|
153
|
+
},
|
|
154
|
+
"country_name": {
|
|
155
|
+
"type": "text"
|
|
156
|
+
},
|
|
157
|
+
"location_name": {
|
|
158
|
+
"type": "text"
|
|
159
|
+
},
|
|
160
|
+
"status": {
|
|
161
|
+
"type": "keyword"
|
|
162
|
+
},
|
|
163
|
+
"aliases": {
|
|
164
|
+
"type": "text"
|
|
165
|
+
},
|
|
166
|
+
"types": {
|
|
167
|
+
"type": "keyword"
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
@@ -84,7 +84,28 @@
|
|
|
84
84
|
},
|
|
85
85
|
"title": {
|
|
86
86
|
"type": "object",
|
|
87
|
-
"dynamic": true
|
|
87
|
+
"dynamic": "true"
|
|
88
|
+
},
|
|
89
|
+
"tags": {
|
|
90
|
+
"type": "keyword"
|
|
91
|
+
},
|
|
92
|
+
"country": {
|
|
93
|
+
"type": "text"
|
|
94
|
+
},
|
|
95
|
+
"country_name": {
|
|
96
|
+
"type": "text"
|
|
97
|
+
},
|
|
98
|
+
"location_name": {
|
|
99
|
+
"type": "text"
|
|
100
|
+
},
|
|
101
|
+
"status": {
|
|
102
|
+
"type": "keyword"
|
|
103
|
+
},
|
|
104
|
+
"aliases": {
|
|
105
|
+
"type": "text"
|
|
106
|
+
},
|
|
107
|
+
"types": {
|
|
108
|
+
"type": "keyword"
|
|
88
109
|
}
|
|
89
110
|
}
|
|
90
111
|
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
|
-
# Copyright (C) 2020-
|
|
3
|
+
# Copyright (C) 2020-2024 CERN.
|
|
4
|
+
# Copyright (C) 2024 California Institute of Technology.
|
|
4
5
|
#
|
|
5
6
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
7
|
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
@@ -11,7 +12,7 @@
|
|
|
11
12
|
from functools import partial
|
|
12
13
|
|
|
13
14
|
from invenio_i18n import lazy_gettext as _
|
|
14
|
-
from marshmallow import fields
|
|
15
|
+
from marshmallow import fields, validate
|
|
15
16
|
from marshmallow_utils.fields import IdentifierSet, SanitizedUnicode
|
|
16
17
|
from marshmallow_utils.schemas import IdentifierSchema
|
|
17
18
|
|
|
@@ -36,7 +37,20 @@ class AffiliationSchema(BaseVocabularySchema, ModePIDFieldVocabularyMixin):
|
|
|
36
37
|
)
|
|
37
38
|
)
|
|
38
39
|
)
|
|
39
|
-
name = SanitizedUnicode(
|
|
40
|
+
name = SanitizedUnicode(
|
|
41
|
+
required=True, validate=validate.Length(min=1, error=_("Name cannot be blank."))
|
|
42
|
+
)
|
|
43
|
+
country = SanitizedUnicode()
|
|
44
|
+
country_name = SanitizedUnicode()
|
|
45
|
+
location_name = SanitizedUnicode()
|
|
46
|
+
id = SanitizedUnicode(
|
|
47
|
+
validate=validate.Length(min=1, error=_("PID cannot be blank."))
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
acronym = SanitizedUnicode()
|
|
51
|
+
aliases = fields.List(SanitizedUnicode())
|
|
52
|
+
status = SanitizedUnicode()
|
|
53
|
+
types = fields.List(SanitizedUnicode())
|
|
40
54
|
|
|
41
55
|
|
|
42
56
|
class AffiliationRelationSchema(ContribVocabularyRelationSchema):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
|
-
# Copyright (C) 2021 CERN.
|
|
3
|
+
# Copyright (C) 2021-2024 CERN.
|
|
4
4
|
#
|
|
5
5
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
6
|
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
@@ -8,7 +8,6 @@
|
|
|
8
8
|
|
|
9
9
|
"""Vocabulary affiliations."""
|
|
10
10
|
|
|
11
|
-
|
|
12
11
|
from .affiliations import record_type
|
|
13
12
|
|
|
14
13
|
AffiliationsServiceConfig = record_type.service_config_cls
|
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
|
-
# Copyright (C) 2022 CERN.
|
|
3
|
+
# Copyright (C) 2022-2024 CERN.
|
|
4
4
|
#
|
|
5
5
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
6
|
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
7
7
|
# details.
|
|
8
8
|
|
|
9
9
|
"""Vocabulary awards."""
|
|
10
|
+
|
|
10
11
|
from flask_resources import (
|
|
11
12
|
BaseListSchema,
|
|
12
13
|
JSONSerializer,
|
|
@@ -17,24 +18,35 @@ from invenio_db import db
|
|
|
17
18
|
from invenio_records.dumpers import SearchDumper
|
|
18
19
|
from invenio_records.dumpers.indexedat import IndexedAtDumperExt
|
|
19
20
|
from invenio_records.dumpers.relations import RelationDumperExt
|
|
20
|
-
from invenio_records.systemfields import
|
|
21
|
+
from invenio_records.systemfields import MultiRelationsField
|
|
21
22
|
from invenio_records_resources.factories.factory import RecordTypeFactory
|
|
22
|
-
from invenio_records_resources.records.systemfields import
|
|
23
|
+
from invenio_records_resources.records.systemfields import (
|
|
24
|
+
ModelPIDField,
|
|
25
|
+
PIDListRelation,
|
|
26
|
+
PIDRelation,
|
|
27
|
+
)
|
|
23
28
|
from invenio_records_resources.resources.records.headers import etag_headers
|
|
24
29
|
|
|
25
30
|
from ...services.permissions import PermissionPolicy
|
|
26
31
|
from ..funders.api import Funder
|
|
32
|
+
from ..subjects.api import Subject
|
|
27
33
|
from .config import AwardsSearchOptions, service_components
|
|
28
34
|
from .schema import AwardSchema
|
|
29
35
|
from .serializer import AwardL10NItemSchema
|
|
30
36
|
|
|
31
|
-
award_relations =
|
|
37
|
+
award_relations = MultiRelationsField(
|
|
32
38
|
funders=PIDRelation(
|
|
33
39
|
"funder",
|
|
34
40
|
keys=["name"],
|
|
35
41
|
pid_field=Funder.pid,
|
|
36
42
|
cache_key="funder",
|
|
37
|
-
)
|
|
43
|
+
),
|
|
44
|
+
subjects=PIDListRelation(
|
|
45
|
+
"subjects",
|
|
46
|
+
keys=["subject", "scheme", "identifiers", "props"],
|
|
47
|
+
pid_field=Subject.pid,
|
|
48
|
+
cache_key="subjects",
|
|
49
|
+
),
|
|
38
50
|
)
|
|
39
51
|
|
|
40
52
|
record_type = RecordTypeFactory(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
|
-
# Copyright (C) 2022 CERN.
|
|
3
|
+
# Copyright (C) 2022-2024 CERN.
|
|
4
4
|
#
|
|
5
5
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
6
|
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
@@ -8,10 +8,17 @@
|
|
|
8
8
|
|
|
9
9
|
"""Awards datastreams, transformers, writers and readers."""
|
|
10
10
|
|
|
11
|
+
import io
|
|
12
|
+
|
|
13
|
+
import requests
|
|
14
|
+
from flask import current_app
|
|
11
15
|
from invenio_access.permissions import system_identity
|
|
12
16
|
from invenio_i18n import lazy_gettext as _
|
|
13
17
|
|
|
18
|
+
from invenio_vocabularies.datastreams.errors import ReaderError
|
|
19
|
+
|
|
14
20
|
from ...datastreams.errors import TransformerError
|
|
21
|
+
from ...datastreams.readers import BaseReader
|
|
15
22
|
from ...datastreams.transformers import BaseTransformer
|
|
16
23
|
from ...datastreams.writers import ServiceWriter
|
|
17
24
|
from .config import awards_ec_ror_id, awards_openaire_funders_mapping
|
|
@@ -39,7 +46,20 @@ class OpenAIREProjectTransformer(BaseTransformer):
|
|
|
39
46
|
award = {}
|
|
40
47
|
|
|
41
48
|
code = record["code"]
|
|
42
|
-
|
|
49
|
+
|
|
50
|
+
# The `id` should follow the format `sourcePrefix::md5(localId)` where `sourcePrefix` is 12 characters long.
|
|
51
|
+
# See: https://graph.openaire.eu/docs/data-model/pids-and-identifiers#identifiers-in-the-graph
|
|
52
|
+
#
|
|
53
|
+
# The format of `id` in the full OpenAIRE Graph Dataset (https://doi.org/10.5281/zenodo.3516917)
|
|
54
|
+
# follows this format (e.g. 'abc_________::0123456789abcdef0123456789abcdef').
|
|
55
|
+
# However, the format of `id` in the new collected projects dataset (https://doi.org/10.5281/zenodo.6419021)
|
|
56
|
+
# does not follow this format, and has a `40|` prefix (e.g. '40|abc_________::0123456789abcdef0123456789abcdef').
|
|
57
|
+
#
|
|
58
|
+
# The number '40' corresponds to the entity types 'Project'.
|
|
59
|
+
# See: https://ec.europa.eu/research/participants/documents/downloadPublic?documentIds=080166e5a3a1a213&appId=PPGMS
|
|
60
|
+
# See: https://graph.openaire.eu/docs/5.0.0/data-model/entities/project#id
|
|
61
|
+
openaire_funder_prefix = record["id"].split("::", 1)[0].split("|", 1)[-1]
|
|
62
|
+
|
|
43
63
|
funder_id = awards_openaire_funders_mapping.get(openaire_funder_prefix)
|
|
44
64
|
if funder_id is None:
|
|
45
65
|
raise TransformerError(
|
|
@@ -54,10 +74,7 @@ class OpenAIREProjectTransformer(BaseTransformer):
|
|
|
54
74
|
|
|
55
75
|
funding = next(iter(record.get("funding", [])), None)
|
|
56
76
|
if funding:
|
|
57
|
-
|
|
58
|
-
# Example funding stream ID: `EC::HE::HORIZON-AG-UN`. We need the `EC`
|
|
59
|
-
# string, i.e. the second "part" of the identifier.
|
|
60
|
-
program = next(iter(funding_stream_id.split("::")[1:2]), "")
|
|
77
|
+
program = funding.get("fundingStream", {}).get("id", "")
|
|
61
78
|
if program:
|
|
62
79
|
award["program"] = program
|
|
63
80
|
|
|
@@ -78,7 +95,20 @@ class OpenAIREProjectTransformer(BaseTransformer):
|
|
|
78
95
|
award["identifiers"] = identifiers
|
|
79
96
|
|
|
80
97
|
award["number"] = code
|
|
98
|
+
|
|
99
|
+
# `title` is a mandatory attribute of the `Project` object in the OpenAIRE Graph Data Model.
|
|
100
|
+
# See: https://graph.openaire.eu/docs/data-model/entities/project#title
|
|
101
|
+
# However, 15'000+ awards for the FCT funder (and 1 record the NIH funder) are missing a title attribute.
|
|
102
|
+
if "title" not in record:
|
|
103
|
+
raise TransformerError(
|
|
104
|
+
_(
|
|
105
|
+
"Missing title attribute for award {award_id}".format(
|
|
106
|
+
award_id=award["id"]
|
|
107
|
+
)
|
|
108
|
+
)
|
|
109
|
+
)
|
|
81
110
|
award["title"] = {"en": record["title"]}
|
|
111
|
+
|
|
82
112
|
award["funder"] = {"id": funder_id}
|
|
83
113
|
acronym = record.get("acronym")
|
|
84
114
|
if acronym:
|
|
@@ -88,22 +118,226 @@ class OpenAIREProjectTransformer(BaseTransformer):
|
|
|
88
118
|
return stream_entry
|
|
89
119
|
|
|
90
120
|
|
|
121
|
+
class CORDISProjectHTTPReader(BaseReader):
|
|
122
|
+
"""CORDIS Project HTTP Reader returning an in-memory binary stream of the latest CORDIS Horizon Europe project zip file."""
|
|
123
|
+
|
|
124
|
+
def _iter(self, fp, *args, **kwargs):
|
|
125
|
+
raise NotImplementedError(
|
|
126
|
+
"CORDISProjectHTTPReader downloads one file and therefore does not iterate through items"
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
def read(self, item=None, *args, **kwargs):
|
|
130
|
+
"""Reads the latest CORDIS Horizon Europe project zip file and yields an in-memory binary stream of it."""
|
|
131
|
+
if item:
|
|
132
|
+
raise NotImplementedError(
|
|
133
|
+
"CORDISProjectHTTPReader does not support being chained after another reader"
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
if self._origin == "HE":
|
|
137
|
+
file_url = "https://cordis.europa.eu/data/cordis-HORIZONprojects-xml.zip"
|
|
138
|
+
elif self._origin == "H2020":
|
|
139
|
+
file_url = "https://cordis.europa.eu/data/cordis-h2020projects-xml.zip"
|
|
140
|
+
elif self._origin == "FP7":
|
|
141
|
+
file_url = "https://cordis.europa.eu/data/cordis-fp7projects-xml.zip"
|
|
142
|
+
else:
|
|
143
|
+
raise ReaderError(
|
|
144
|
+
"The --origin option should be either 'HE' (for Horizon Europe) or 'H2020' (for Horizon 2020) or 'FP7'"
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# Download the ZIP file and fully load the response bytes content in memory.
|
|
148
|
+
# The bytes content are then wrapped by a BytesIO to be file-like object (as required by `zipfile.ZipFile`).
|
|
149
|
+
# Using directly `file_resp.raw` is not possible since `zipfile.ZipFile` requires the file-like object to be seekable.
|
|
150
|
+
file_resp = requests.get(file_url)
|
|
151
|
+
file_resp.raise_for_status()
|
|
152
|
+
yield io.BytesIO(file_resp.content)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class CORDISProjectTransformer(BaseTransformer):
|
|
156
|
+
"""Transforms a CORDIS project record into an award record."""
|
|
157
|
+
|
|
158
|
+
def apply(self, stream_entry, **kwargs):
|
|
159
|
+
"""Applies the transformation to the stream entry."""
|
|
160
|
+
record = stream_entry.entry
|
|
161
|
+
award = {}
|
|
162
|
+
|
|
163
|
+
# Here `id` is the project ID, which will be used to attach the update to the existing project.
|
|
164
|
+
award["id"] = (
|
|
165
|
+
f"{current_app.config['VOCABULARIES_AWARDS_EC_ROR_ID']}::{record['id']}"
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
categories = record.get("relations", {}).get("categories", {}).get("category")
|
|
169
|
+
if categories:
|
|
170
|
+
if isinstance(categories, dict):
|
|
171
|
+
categories = [categories]
|
|
172
|
+
|
|
173
|
+
award["subjects"] = [
|
|
174
|
+
{"id": f"euroscivoc:{vocab_id}"}
|
|
175
|
+
for category in categories
|
|
176
|
+
if category.get("@classification") == "euroSciVoc"
|
|
177
|
+
and (vocab_id := category["code"].split("/")[-1]).isdigit()
|
|
178
|
+
]
|
|
179
|
+
|
|
180
|
+
organizations = (
|
|
181
|
+
record.get("relations", {}).get("associations", {}).get("organization")
|
|
182
|
+
)
|
|
183
|
+
if organizations:
|
|
184
|
+
# Projects with a single organization are not wrapped in a list,
|
|
185
|
+
# so we do this here to be able to iterate over it.
|
|
186
|
+
organizations = (
|
|
187
|
+
organizations if isinstance(organizations, list) else [organizations]
|
|
188
|
+
)
|
|
189
|
+
award["organizations"] = []
|
|
190
|
+
for organization in organizations:
|
|
191
|
+
# Some organizations in FP7 projects do not have a "legalname" key,
|
|
192
|
+
# for instance the 14th participant in "SAGE" https://cordis.europa.eu/project/id/999902.
|
|
193
|
+
# In this case, fully skip the organization entry.
|
|
194
|
+
if "legalname" not in organization:
|
|
195
|
+
continue
|
|
196
|
+
|
|
197
|
+
organization_data = {
|
|
198
|
+
"organization": organization["legalname"],
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
# Some organizations in FP7 projects do not have an "id" key (the PIC identifier),
|
|
202
|
+
# for instance "AIlGreenVehicles" in "MOTORBRAIN" https://cordis.europa.eu/project/id/270693.
|
|
203
|
+
# In this case, still store the name but skip the identifier part.
|
|
204
|
+
if "id" in organization:
|
|
205
|
+
organization_data.update(
|
|
206
|
+
{
|
|
207
|
+
"scheme": "pic",
|
|
208
|
+
"id": organization["id"],
|
|
209
|
+
}
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
award["organizations"].append(organization_data)
|
|
213
|
+
|
|
214
|
+
programmes = (
|
|
215
|
+
record.get("relations", {}).get("associations", {}).get("programme", {})
|
|
216
|
+
)
|
|
217
|
+
if programmes:
|
|
218
|
+
# Projects with a single programme (this is the case of some projects in FP7) are not wrapped in a list,
|
|
219
|
+
# so we do this here to be able to iterate over it.
|
|
220
|
+
programmes = programmes if isinstance(programmes, list) else [programmes]
|
|
221
|
+
|
|
222
|
+
programmes_related_legal_basis = [
|
|
223
|
+
{
|
|
224
|
+
"code": programme["code"],
|
|
225
|
+
"uniqueprogrammepart": programme.get("@uniqueprogrammepart"),
|
|
226
|
+
}
|
|
227
|
+
for programme in programmes
|
|
228
|
+
if programme.get("@type") == "relatedLegalBasis"
|
|
229
|
+
]
|
|
230
|
+
|
|
231
|
+
if len(programmes_related_legal_basis) == 0:
|
|
232
|
+
raise TransformerError(
|
|
233
|
+
_(
|
|
234
|
+
"No related legal basis programme found for project {project_id}".format(
|
|
235
|
+
project_id=record["id"]
|
|
236
|
+
)
|
|
237
|
+
)
|
|
238
|
+
)
|
|
239
|
+
elif len(programmes_related_legal_basis) == 1:
|
|
240
|
+
# FP7 projects have only one related legal basis programme and do not have a 'uniqueprogrammepart' field.
|
|
241
|
+
unique_programme_related_legal_basis = programmes_related_legal_basis[0]
|
|
242
|
+
elif len(programmes_related_legal_basis) >= 1:
|
|
243
|
+
# The entry with the field 'uniqueprogrammepart' == 'true' is the high level programme code,
|
|
244
|
+
# while the other entry is a more specific sub-programme.
|
|
245
|
+
unique_programme_related_legal_basis = [
|
|
246
|
+
programme_related_legal_basis
|
|
247
|
+
# A few H2020 projects have more than one 'uniqueprogrammepart' == 'true',
|
|
248
|
+
# for instance https://cordis.europa.eu/project/id/825673 (showing as "main programme" in the page)
|
|
249
|
+
# which has one entry with the code 'H2020-EU.1.2.',
|
|
250
|
+
# and one with the code 'H2020-EU.1.2.3.'.
|
|
251
|
+
# We sort them from the shortest code to the longest code, and take the first item,
|
|
252
|
+
# so that it conforms more with other projects which all have the shortest code as the main one.
|
|
253
|
+
for programme_related_legal_basis in sorted(
|
|
254
|
+
programmes_related_legal_basis, key=lambda d: len(d["code"])
|
|
255
|
+
)
|
|
256
|
+
if programme_related_legal_basis["uniqueprogrammepart"] == "true"
|
|
257
|
+
][0]
|
|
258
|
+
|
|
259
|
+
# Store the code of the programme.
|
|
260
|
+
# For instance the code "HORIZON.1.2" which means "Marie Skłodowska-Curie Actions (MSCA)"
|
|
261
|
+
# See https://cordis.europa.eu/programme/id/HORIZON.1.2
|
|
262
|
+
award["program"] = unique_programme_related_legal_basis["code"]
|
|
263
|
+
|
|
264
|
+
stream_entry.entry = award
|
|
265
|
+
return stream_entry
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
class CORDISAwardsServiceWriter(ServiceWriter):
|
|
269
|
+
"""CORDIS Awards service writer."""
|
|
270
|
+
|
|
271
|
+
def __init__(self, *args, **kwargs):
|
|
272
|
+
"""Constructor."""
|
|
273
|
+
service_or_name = kwargs.pop("service_or_name", "awards")
|
|
274
|
+
# Here we only update and we do not insert, since CORDIS data is used to augment existing awards
|
|
275
|
+
# (with subjects, organizations, and program information) and is not used to create new awards.
|
|
276
|
+
super().__init__(
|
|
277
|
+
service_or_name=service_or_name, insert=False, update=True, *args, **kwargs
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
def _entry_id(self, entry):
|
|
281
|
+
"""Get the id from an entry."""
|
|
282
|
+
return entry["id"]
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
VOCABULARIES_DATASTREAM_READERS = {
|
|
286
|
+
"cordis-project-http": CORDISProjectHTTPReader,
|
|
287
|
+
}
|
|
288
|
+
|
|
91
289
|
VOCABULARIES_DATASTREAM_TRANSFORMERS = {
|
|
92
290
|
"openaire-award": OpenAIREProjectTransformer,
|
|
291
|
+
"cordis-award": CORDISProjectTransformer,
|
|
93
292
|
}
|
|
94
293
|
"""ORCiD Data Streams transformers."""
|
|
95
294
|
|
|
96
295
|
VOCABULARIES_DATASTREAM_WRITERS = {
|
|
97
296
|
"awards-service": AwardsServiceWriter,
|
|
297
|
+
"cordis-awards-service": CORDISAwardsServiceWriter,
|
|
98
298
|
}
|
|
99
299
|
"""ORCiD Data Streams transformers."""
|
|
100
300
|
|
|
301
|
+
DATASTREAM_CONFIG_CORDIS = {
|
|
302
|
+
"readers": [
|
|
303
|
+
{"type": "cordis-project-http"},
|
|
304
|
+
{
|
|
305
|
+
"type": "zip",
|
|
306
|
+
"args": {
|
|
307
|
+
"regex": "\\.xml$",
|
|
308
|
+
"mode": "r",
|
|
309
|
+
},
|
|
310
|
+
},
|
|
311
|
+
{
|
|
312
|
+
"type": "xml",
|
|
313
|
+
"args": {
|
|
314
|
+
"root_element": "project",
|
|
315
|
+
},
|
|
316
|
+
},
|
|
317
|
+
],
|
|
318
|
+
"transformers": [
|
|
319
|
+
{"type": "cordis-award"},
|
|
320
|
+
],
|
|
321
|
+
"writers": [
|
|
322
|
+
{
|
|
323
|
+
"type": "cordis-awards-service",
|
|
324
|
+
"args": {
|
|
325
|
+
"identity": system_identity,
|
|
326
|
+
},
|
|
327
|
+
}
|
|
328
|
+
],
|
|
329
|
+
}
|
|
330
|
+
"""Data Stream configuration.
|
|
331
|
+
|
|
332
|
+
An origin is required for the reader.
|
|
333
|
+
"""
|
|
334
|
+
|
|
101
335
|
DATASTREAM_CONFIG = {
|
|
102
336
|
"readers": [
|
|
103
337
|
{
|
|
104
338
|
"type": "tar",
|
|
105
339
|
"args": {
|
|
106
|
-
"regex": "
|
|
340
|
+
"regex": "\\.json.gz$",
|
|
107
341
|
"mode": "r",
|
|
108
342
|
},
|
|
109
343
|
},
|
|
@@ -7,6 +7,9 @@
|
|
|
7
7
|
"$schema": {
|
|
8
8
|
"$ref": "local://definitions-v1.0.0.json#/$schema"
|
|
9
9
|
},
|
|
10
|
+
"tags": {
|
|
11
|
+
"$ref": "local://vocabularies/definitions-v1.0.0.json#/tags"
|
|
12
|
+
},
|
|
10
13
|
"identifiers": {
|
|
11
14
|
"description": "Alternate identifiers for the award.",
|
|
12
15
|
"type": "array",
|
|
@@ -39,6 +42,41 @@
|
|
|
39
42
|
},
|
|
40
43
|
"program": {
|
|
41
44
|
"type": "string"
|
|
45
|
+
},
|
|
46
|
+
"subjects": {
|
|
47
|
+
"description": "Award's subjects.",
|
|
48
|
+
"type": "array",
|
|
49
|
+
"properties": {
|
|
50
|
+
"id": {
|
|
51
|
+
"$ref": "local://definitions-v1.0.0.json#/identifier"
|
|
52
|
+
},
|
|
53
|
+
"subject": {
|
|
54
|
+
"description": "Human readable label.",
|
|
55
|
+
"type": "string"
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
},
|
|
59
|
+
"organizations": {
|
|
60
|
+
"description": "Award's organizations.",
|
|
61
|
+
"type": "array",
|
|
62
|
+
"items": {
|
|
63
|
+
"type": "object",
|
|
64
|
+
"additionalProperties": false,
|
|
65
|
+
"properties": {
|
|
66
|
+
"scheme": {
|
|
67
|
+
"description": "Identifier of the organization scheme.",
|
|
68
|
+
"$ref": "local://definitions-v1.0.0.json#/identifier"
|
|
69
|
+
},
|
|
70
|
+
"id": {
|
|
71
|
+
"description": "Identifier of the organization for the given scheme.",
|
|
72
|
+
"$ref": "local://definitions-v1.0.0.json#/identifier"
|
|
73
|
+
},
|
|
74
|
+
"organization": {
|
|
75
|
+
"description": "Human readable label.",
|
|
76
|
+
"type": "string"
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
42
80
|
}
|
|
43
81
|
}
|
|
44
82
|
}
|