invenio-vocabularies 2.3.1__py2.py3-none-any.whl → 6.3.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of invenio-vocabularies might be problematic. Click here for more details.

Files changed (165) hide show
  1. invenio_vocabularies/__init__.py +2 -2
  2. invenio_vocabularies/administration/__init__.py +10 -0
  3. invenio_vocabularies/administration/views/__init__.py +10 -0
  4. invenio_vocabularies/administration/views/vocabularies.py +45 -0
  5. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/package.json +1 -7
  6. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/CustomAwardForm.js +80 -64
  7. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingField.js +49 -41
  8. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingModal.js +5 -7
  9. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/NoAwardResults.js +3 -3
  10. invenio_vocabularies/cli.py +31 -44
  11. invenio_vocabularies/config.py +68 -4
  12. invenio_vocabularies/contrib/affiliations/affiliations.py +11 -0
  13. invenio_vocabularies/contrib/affiliations/api.py +1 -2
  14. invenio_vocabularies/contrib/affiliations/config.py +13 -2
  15. invenio_vocabularies/contrib/affiliations/datastreams.py +186 -0
  16. invenio_vocabularies/contrib/affiliations/jsonschemas/affiliations/affiliation-v1.0.0.json +38 -1
  17. invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v1.0.0.json +22 -1
  18. invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v2.0.0.json +171 -0
  19. invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v1.0.0.json +22 -1
  20. invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v2.0.0.json +171 -0
  21. invenio_vocabularies/contrib/affiliations/mappings/v7/affiliations/affiliation-v1.0.0.json +22 -1
  22. invenio_vocabularies/contrib/affiliations/schema.py +17 -3
  23. invenio_vocabularies/contrib/affiliations/services.py +1 -2
  24. invenio_vocabularies/contrib/awards/awards.py +17 -5
  25. invenio_vocabularies/contrib/awards/datastreams.py +241 -7
  26. invenio_vocabularies/contrib/awards/jsonschemas/awards/award-v1.0.0.json +38 -0
  27. invenio_vocabularies/contrib/awards/mappings/os-v1/awards/award-v1.0.0.json +51 -2
  28. invenio_vocabularies/contrib/awards/mappings/os-v2/awards/award-v1.0.0.json +51 -2
  29. invenio_vocabularies/contrib/awards/mappings/v7/awards/award-v1.0.0.json +51 -2
  30. invenio_vocabularies/contrib/awards/schema.py +16 -1
  31. invenio_vocabularies/contrib/awards/serializer.py +8 -1
  32. invenio_vocabularies/contrib/awards/services.py +1 -2
  33. invenio_vocabularies/contrib/common/__init__.py +9 -0
  34. invenio_vocabularies/contrib/common/openaire/__init__.py +9 -0
  35. invenio_vocabularies/contrib/common/openaire/datastreams.py +84 -0
  36. invenio_vocabularies/contrib/common/ror/__init__.py +9 -0
  37. invenio_vocabularies/contrib/common/ror/datastreams.py +220 -0
  38. invenio_vocabularies/contrib/funders/config.py +11 -2
  39. invenio_vocabularies/contrib/funders/datastreams.py +40 -62
  40. invenio_vocabularies/contrib/funders/funders.py +3 -1
  41. invenio_vocabularies/contrib/funders/jsonschemas/funders/funder-v1.0.0.json +36 -1
  42. invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v1.0.0.json +22 -1
  43. invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v2.0.0.json +156 -0
  44. invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v1.0.0.json +22 -1
  45. invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v2.0.0.json +156 -0
  46. invenio_vocabularies/contrib/funders/mappings/v7/funders/funder-v1.0.0.json +22 -1
  47. invenio_vocabularies/contrib/funders/schema.py +8 -0
  48. invenio_vocabularies/contrib/funders/serializer.py +2 -1
  49. invenio_vocabularies/contrib/names/config.py +5 -3
  50. invenio_vocabularies/contrib/names/datastreams.py +172 -4
  51. invenio_vocabularies/contrib/names/jsonschemas/names/name-v1.0.0.json +3 -0
  52. invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v1.0.0.json +3 -0
  53. invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v2.0.0.json +150 -0
  54. invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v1.0.0.json +3 -0
  55. invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v2.0.0.json +150 -0
  56. invenio_vocabularies/contrib/names/mappings/v7/names/name-v1.0.0.json +3 -0
  57. invenio_vocabularies/contrib/names/names.py +15 -3
  58. invenio_vocabularies/contrib/names/permissions.py +20 -0
  59. invenio_vocabularies/contrib/names/s3client.py +44 -0
  60. invenio_vocabularies/contrib/names/schema.py +14 -0
  61. invenio_vocabularies/contrib/subjects/config.py +9 -3
  62. invenio_vocabularies/contrib/subjects/datastreams.py +61 -0
  63. invenio_vocabularies/contrib/subjects/euroscivoc/__init__.py +9 -0
  64. invenio_vocabularies/contrib/subjects/euroscivoc/datastreams.py +171 -0
  65. invenio_vocabularies/contrib/subjects/jsonschemas/subjects/subject-v1.0.0.json +31 -0
  66. invenio_vocabularies/contrib/subjects/mappings/os-v1/subjects/subject-v1.0.0.json +35 -0
  67. invenio_vocabularies/contrib/subjects/mappings/os-v2/subjects/subject-v1.0.0.json +35 -0
  68. invenio_vocabularies/contrib/subjects/mappings/v7/subjects/subject-v1.0.0.json +35 -0
  69. invenio_vocabularies/contrib/subjects/mesh/__init__.py +9 -0
  70. invenio_vocabularies/contrib/subjects/mesh/datastreams.py +43 -0
  71. invenio_vocabularies/contrib/subjects/schema.py +47 -5
  72. invenio_vocabularies/contrib/subjects/subjects.py +10 -0
  73. invenio_vocabularies/datastreams/datastreams.py +61 -13
  74. invenio_vocabularies/datastreams/factories.py +1 -2
  75. invenio_vocabularies/datastreams/readers.py +138 -29
  76. invenio_vocabularies/datastreams/tasks.py +37 -0
  77. invenio_vocabularies/datastreams/transformers.py +17 -27
  78. invenio_vocabularies/datastreams/writers.py +116 -14
  79. invenio_vocabularies/datastreams/xml.py +34 -0
  80. invenio_vocabularies/ext.py +59 -5
  81. invenio_vocabularies/factories.py +137 -0
  82. invenio_vocabularies/jobs.py +133 -0
  83. invenio_vocabularies/proxies.py +2 -2
  84. invenio_vocabularies/records/jsonschemas/vocabularies/definitions-v1.0.0.json +7 -0
  85. invenio_vocabularies/records/jsonschemas/vocabularies/vocabulary-v1.0.0.json +1 -4
  86. invenio_vocabularies/records/mappings/os-v1/vocabularies/vocabulary-v1.0.0.json +3 -3
  87. invenio_vocabularies/records/mappings/os-v2/vocabularies/vocabulary-v1.0.0.json +3 -3
  88. invenio_vocabularies/records/mappings/v7/vocabularies/vocabulary-v1.0.0.json +3 -3
  89. invenio_vocabularies/records/models.py +2 -4
  90. invenio_vocabularies/records/pidprovider.py +1 -2
  91. invenio_vocabularies/records/systemfields/relations.py +2 -2
  92. invenio_vocabularies/resources/__init__.py +9 -1
  93. invenio_vocabularies/resources/config.py +105 -0
  94. invenio_vocabularies/resources/resource.py +31 -41
  95. invenio_vocabularies/resources/schema.py +2 -1
  96. invenio_vocabularies/services/__init__.py +5 -2
  97. invenio_vocabularies/services/config.py +179 -0
  98. invenio_vocabularies/services/custom_fields/__init__.py +6 -2
  99. invenio_vocabularies/services/custom_fields/subject.py +82 -0
  100. invenio_vocabularies/services/custom_fields/vocabulary.py +5 -3
  101. invenio_vocabularies/services/permissions.py +3 -1
  102. invenio_vocabularies/services/results.py +110 -0
  103. invenio_vocabularies/services/schema.py +11 -2
  104. invenio_vocabularies/services/service.py +46 -94
  105. invenio_vocabularies/services/tasks.py +1 -1
  106. invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/subjects.html +23 -0
  107. invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabularies-list.html +12 -0
  108. invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabulary-details.html +71 -0
  109. invenio_vocabularies/translations/af/LC_MESSAGES/messages.mo +0 -0
  110. invenio_vocabularies/translations/ar/LC_MESSAGES/messages.mo +0 -0
  111. invenio_vocabularies/translations/bg/LC_MESSAGES/messages.mo +0 -0
  112. invenio_vocabularies/translations/ca/LC_MESSAGES/messages.mo +0 -0
  113. invenio_vocabularies/translations/cs/LC_MESSAGES/messages.mo +0 -0
  114. invenio_vocabularies/translations/da/LC_MESSAGES/messages.mo +0 -0
  115. invenio_vocabularies/translations/de/LC_MESSAGES/messages.mo +0 -0
  116. invenio_vocabularies/translations/de_AT/LC_MESSAGES/messages.mo +0 -0
  117. invenio_vocabularies/translations/de_DE/LC_MESSAGES/messages.mo +0 -0
  118. invenio_vocabularies/translations/el/LC_MESSAGES/messages.mo +0 -0
  119. invenio_vocabularies/translations/en/LC_MESSAGES/messages.mo +0 -0
  120. invenio_vocabularies/translations/en_AT/LC_MESSAGES/messages.mo +0 -0
  121. invenio_vocabularies/translations/en_HU/LC_MESSAGES/messages.mo +0 -0
  122. invenio_vocabularies/translations/es/LC_MESSAGES/messages.mo +0 -0
  123. invenio_vocabularies/translations/es_CU/LC_MESSAGES/messages.mo +0 -0
  124. invenio_vocabularies/translations/es_MX/LC_MESSAGES/messages.mo +0 -0
  125. invenio_vocabularies/translations/et/LC_MESSAGES/messages.mo +0 -0
  126. invenio_vocabularies/translations/et_EE/LC_MESSAGES/messages.mo +0 -0
  127. invenio_vocabularies/translations/fa/LC_MESSAGES/messages.mo +0 -0
  128. invenio_vocabularies/translations/fa_IR/LC_MESSAGES/messages.mo +0 -0
  129. invenio_vocabularies/translations/fr/LC_MESSAGES/messages.mo +0 -0
  130. invenio_vocabularies/translations/fr_CI/LC_MESSAGES/messages.mo +0 -0
  131. invenio_vocabularies/translations/fr_FR/LC_MESSAGES/messages.mo +0 -0
  132. invenio_vocabularies/translations/gl/LC_MESSAGES/messages.mo +0 -0
  133. invenio_vocabularies/translations/hi_IN/LC_MESSAGES/messages.mo +0 -0
  134. invenio_vocabularies/translations/hr/LC_MESSAGES/messages.mo +0 -0
  135. invenio_vocabularies/translations/hu/LC_MESSAGES/messages.mo +0 -0
  136. invenio_vocabularies/translations/hu_HU/LC_MESSAGES/messages.mo +0 -0
  137. invenio_vocabularies/translations/it/LC_MESSAGES/messages.mo +0 -0
  138. invenio_vocabularies/translations/ja/LC_MESSAGES/messages.mo +0 -0
  139. invenio_vocabularies/translations/ka/LC_MESSAGES/messages.mo +0 -0
  140. invenio_vocabularies/translations/lt/LC_MESSAGES/messages.mo +0 -0
  141. invenio_vocabularies/translations/messages.pot +95 -48
  142. invenio_vocabularies/translations/ne/LC_MESSAGES/messages.mo +0 -0
  143. invenio_vocabularies/translations/no/LC_MESSAGES/messages.mo +0 -0
  144. invenio_vocabularies/translations/pl/LC_MESSAGES/messages.mo +0 -0
  145. invenio_vocabularies/translations/pt/LC_MESSAGES/messages.mo +0 -0
  146. invenio_vocabularies/translations/ro/LC_MESSAGES/messages.mo +0 -0
  147. invenio_vocabularies/translations/ru/LC_MESSAGES/messages.mo +0 -0
  148. invenio_vocabularies/translations/rw/LC_MESSAGES/messages.mo +0 -0
  149. invenio_vocabularies/translations/sk/LC_MESSAGES/messages.mo +0 -0
  150. invenio_vocabularies/translations/sv/LC_MESSAGES/messages.mo +0 -0
  151. invenio_vocabularies/translations/sv_SE/LC_MESSAGES/messages.mo +0 -0
  152. invenio_vocabularies/translations/tr/LC_MESSAGES/messages.mo +0 -0
  153. invenio_vocabularies/translations/uk/LC_MESSAGES/messages.mo +0 -0
  154. invenio_vocabularies/translations/uk_UA/LC_MESSAGES/messages.mo +0 -0
  155. invenio_vocabularies/translations/zh_CN/LC_MESSAGES/messages.mo +0 -0
  156. invenio_vocabularies/translations/zh_TW/LC_MESSAGES/messages.mo +0 -0
  157. invenio_vocabularies/views.py +12 -26
  158. invenio_vocabularies/webpack.py +3 -3
  159. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/METADATA +150 -6
  160. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/RECORD +165 -132
  161. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/WHEEL +1 -1
  162. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/entry_points.txt +17 -0
  163. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/AUTHORS.rst +0 -0
  164. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/LICENSE +0 -0
  165. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,171 @@
1
+ {
2
+ "settings": {
3
+ "analysis": {
4
+ "char_filter": {
5
+ "strip_special_chars": {
6
+ "type": "pattern_replace",
7
+ "pattern": "[\\p{Punct}\\p{S}]",
8
+ "replacement": ""
9
+ }
10
+ },
11
+ "analyzer": {
12
+ "accent_edge_analyzer": {
13
+ "tokenizer": "standard",
14
+ "type": "custom",
15
+ "char_filter": ["strip_special_chars"],
16
+ "filter": [
17
+ "lowercasepreserveoriginal",
18
+ "asciifoldingpreserveoriginal",
19
+ "edgegrams"
20
+ ]
21
+ },
22
+ "accent_analyzer": {
23
+ "tokenizer": "standard",
24
+ "type": "custom",
25
+ "char_filter": ["strip_special_chars"],
26
+ "filter": [
27
+ "lowercasepreserveoriginal",
28
+ "asciifoldingpreserveoriginal"
29
+ ]
30
+ }
31
+ },
32
+ "normalizer": {
33
+ "accent_normalizer": {
34
+ "type": "custom",
35
+ "char_filter": ["strip_special_chars"],
36
+ "filter": [
37
+ "lowercase",
38
+ "asciifolding"
39
+ ]
40
+ }
41
+ },
42
+ "filter": {
43
+ "lowercasepreserveoriginal": {
44
+ "type": "lowercase",
45
+ "preserve_original": true
46
+ },
47
+ "asciifoldingpreserveoriginal": {
48
+ "type": "asciifolding",
49
+ "preserve_original": true
50
+ },
51
+ "edgegrams": {
52
+ "type": "edge_ngram",
53
+ "min_gram": 2,
54
+ "max_gram": 20
55
+ }
56
+ }
57
+ }
58
+ },
59
+ "mappings": {
60
+ "dynamic_templates": [
61
+ {
62
+ "i18n_title": {
63
+ "path_match": "title.*",
64
+ "match_mapping_type": "string",
65
+ "mapping": {
66
+ "type": "text",
67
+ "analyzer": "accent_edge_analyzer",
68
+ "search_analyzer": "accent_analyzer"
69
+ }
70
+ }
71
+ }
72
+ ],
73
+ "dynamic": "strict",
74
+ "properties": {
75
+ "$schema": {
76
+ "type": "keyword",
77
+ "index": "false"
78
+ },
79
+ "created": {
80
+ "type": "date"
81
+ },
82
+ "updated": {
83
+ "type": "date"
84
+ },
85
+ "indexed_at": {
86
+ "type": "date"
87
+ },
88
+ "uuid": {
89
+ "type": "keyword"
90
+ },
91
+ "version_id": {
92
+ "type": "integer"
93
+ },
94
+ "id": {
95
+ "type": "keyword"
96
+ },
97
+ "name_sort": {
98
+ "type": "keyword"
99
+ },
100
+ "name": {
101
+ "type": "text",
102
+ "analyzer": "accent_edge_analyzer",
103
+ "search_analyzer": "accent_analyzer",
104
+ "copy_to": "name_sort"
105
+ },
106
+ "acronym": {
107
+ "type": "text",
108
+ "analyzer": "accent_edge_analyzer",
109
+ "search_analyzer": "accent_analyzer",
110
+ "fields": {
111
+ "keyword": {
112
+ "type": "keyword",
113
+ "normalizer": "accent_normalizer"
114
+ }
115
+ }
116
+ },
117
+ "identifiers": {
118
+ "properties": {
119
+ "identifier": {
120
+ "type": "keyword"
121
+ },
122
+ "scheme": {
123
+ "type": "keyword"
124
+ }
125
+ }
126
+ },
127
+ "pid": {
128
+ "type": "object",
129
+ "properties": {
130
+ "pk": {
131
+ "type": "integer"
132
+ },
133
+ "pid_type": {
134
+ "type": "keyword"
135
+ },
136
+ "obj_type": {
137
+ "type": "keyword"
138
+ },
139
+ "status": {
140
+ "type": "keyword"
141
+ }
142
+ }
143
+ },
144
+ "title": {
145
+ "type": "object",
146
+ "dynamic": "true"
147
+ },
148
+ "tags": {
149
+ "type": "keyword"
150
+ },
151
+ "country": {
152
+ "type": "text"
153
+ },
154
+ "country_name": {
155
+ "type": "text"
156
+ },
157
+ "location_name": {
158
+ "type": "text"
159
+ },
160
+ "status": {
161
+ "type": "keyword"
162
+ },
163
+ "aliases": {
164
+ "type": "text"
165
+ },
166
+ "types": {
167
+ "type": "keyword"
168
+ }
169
+ }
170
+ }
171
+ }
@@ -84,7 +84,28 @@
84
84
  },
85
85
  "title": {
86
86
  "type": "object",
87
- "dynamic": true
87
+ "dynamic": "true"
88
+ },
89
+ "tags": {
90
+ "type": "keyword"
91
+ },
92
+ "country": {
93
+ "type": "text"
94
+ },
95
+ "country_name": {
96
+ "type": "text"
97
+ },
98
+ "location_name": {
99
+ "type": "text"
100
+ },
101
+ "status": {
102
+ "type": "keyword"
103
+ },
104
+ "aliases": {
105
+ "type": "text"
106
+ },
107
+ "types": {
108
+ "type": "keyword"
88
109
  }
89
110
  }
90
111
  }
@@ -1,6 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
- # Copyright (C) 2020-2022 CERN.
3
+ # Copyright (C) 2020-2024 CERN.
4
+ # Copyright (C) 2024 California Institute of Technology.
4
5
  #
5
6
  # Invenio-Vocabularies is free software; you can redistribute it and/or
6
7
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -11,7 +12,7 @@
11
12
  from functools import partial
12
13
 
13
14
  from invenio_i18n import lazy_gettext as _
14
- from marshmallow import fields
15
+ from marshmallow import fields, validate
15
16
  from marshmallow_utils.fields import IdentifierSet, SanitizedUnicode
16
17
  from marshmallow_utils.schemas import IdentifierSchema
17
18
 
@@ -36,7 +37,20 @@ class AffiliationSchema(BaseVocabularySchema, ModePIDFieldVocabularyMixin):
36
37
  )
37
38
  )
38
39
  )
39
- name = SanitizedUnicode(required=True)
40
+ name = SanitizedUnicode(
41
+ required=True, validate=validate.Length(min=1, error=_("Name cannot be blank."))
42
+ )
43
+ country = SanitizedUnicode()
44
+ country_name = SanitizedUnicode()
45
+ location_name = SanitizedUnicode()
46
+ id = SanitizedUnicode(
47
+ validate=validate.Length(min=1, error=_("PID cannot be blank."))
48
+ )
49
+
50
+ acronym = SanitizedUnicode()
51
+ aliases = fields.List(SanitizedUnicode())
52
+ status = SanitizedUnicode()
53
+ types = fields.List(SanitizedUnicode())
40
54
 
41
55
 
42
56
  class AffiliationRelationSchema(ContribVocabularyRelationSchema):
@@ -1,6 +1,6 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
- # Copyright (C) 2021 CERN.
3
+ # Copyright (C) 2021-2024 CERN.
4
4
  #
5
5
  # Invenio-Vocabularies is free software; you can redistribute it and/or
6
6
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -8,7 +8,6 @@
8
8
 
9
9
  """Vocabulary affiliations."""
10
10
 
11
-
12
11
  from .affiliations import record_type
13
12
 
14
13
  AffiliationsServiceConfig = record_type.service_config_cls
@@ -1,12 +1,13 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
- # Copyright (C) 2022 CERN.
3
+ # Copyright (C) 2022-2024 CERN.
4
4
  #
5
5
  # Invenio-Vocabularies is free software; you can redistribute it and/or
6
6
  # modify it under the terms of the MIT License; see LICENSE file for more
7
7
  # details.
8
8
 
9
9
  """Vocabulary awards."""
10
+
10
11
  from flask_resources import (
11
12
  BaseListSchema,
12
13
  JSONSerializer,
@@ -17,24 +18,35 @@ from invenio_db import db
17
18
  from invenio_records.dumpers import SearchDumper
18
19
  from invenio_records.dumpers.indexedat import IndexedAtDumperExt
19
20
  from invenio_records.dumpers.relations import RelationDumperExt
20
- from invenio_records.systemfields import RelationsField
21
+ from invenio_records.systemfields import MultiRelationsField
21
22
  from invenio_records_resources.factories.factory import RecordTypeFactory
22
- from invenio_records_resources.records.systemfields import ModelPIDField, PIDRelation
23
+ from invenio_records_resources.records.systemfields import (
24
+ ModelPIDField,
25
+ PIDListRelation,
26
+ PIDRelation,
27
+ )
23
28
  from invenio_records_resources.resources.records.headers import etag_headers
24
29
 
25
30
  from ...services.permissions import PermissionPolicy
26
31
  from ..funders.api import Funder
32
+ from ..subjects.api import Subject
27
33
  from .config import AwardsSearchOptions, service_components
28
34
  from .schema import AwardSchema
29
35
  from .serializer import AwardL10NItemSchema
30
36
 
31
- award_relations = RelationsField(
37
+ award_relations = MultiRelationsField(
32
38
  funders=PIDRelation(
33
39
  "funder",
34
40
  keys=["name"],
35
41
  pid_field=Funder.pid,
36
42
  cache_key="funder",
37
- )
43
+ ),
44
+ subjects=PIDListRelation(
45
+ "subjects",
46
+ keys=["subject", "scheme", "identifiers", "props"],
47
+ pid_field=Subject.pid,
48
+ cache_key="subjects",
49
+ ),
38
50
  )
39
51
 
40
52
  record_type = RecordTypeFactory(
@@ -1,6 +1,6 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
- # Copyright (C) 2022 CERN.
3
+ # Copyright (C) 2022-2024 CERN.
4
4
  #
5
5
  # Invenio-Vocabularies is free software; you can redistribute it and/or
6
6
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -8,10 +8,17 @@
8
8
 
9
9
  """Awards datastreams, transformers, writers and readers."""
10
10
 
11
+ import io
12
+
13
+ import requests
14
+ from flask import current_app
11
15
  from invenio_access.permissions import system_identity
12
16
  from invenio_i18n import lazy_gettext as _
13
17
 
18
+ from invenio_vocabularies.datastreams.errors import ReaderError
19
+
14
20
  from ...datastreams.errors import TransformerError
21
+ from ...datastreams.readers import BaseReader
15
22
  from ...datastreams.transformers import BaseTransformer
16
23
  from ...datastreams.writers import ServiceWriter
17
24
  from .config import awards_ec_ror_id, awards_openaire_funders_mapping
@@ -39,7 +46,20 @@ class OpenAIREProjectTransformer(BaseTransformer):
39
46
  award = {}
40
47
 
41
48
  code = record["code"]
42
- openaire_funder_prefix = record["id"].split("::")[0].split("|")[1]
49
+
50
+ # The `id` should follow the format `sourcePrefix::md5(localId)` where `sourcePrefix` is 12 characters long.
51
+ # See: https://graph.openaire.eu/docs/data-model/pids-and-identifiers#identifiers-in-the-graph
52
+ #
53
+ # The format of `id` in the full OpenAIRE Graph Dataset (https://doi.org/10.5281/zenodo.3516917)
54
+ # follows this format (e.g. 'abc_________::0123456789abcdef0123456789abcdef').
55
+ # However, the format of `id` in the new collected projects dataset (https://doi.org/10.5281/zenodo.6419021)
56
+ # does not follow this format, and has a `40|` prefix (e.g. '40|abc_________::0123456789abcdef0123456789abcdef').
57
+ #
58
+ # The number '40' corresponds to the entity types 'Project'.
59
+ # See: https://ec.europa.eu/research/participants/documents/downloadPublic?documentIds=080166e5a3a1a213&appId=PPGMS
60
+ # See: https://graph.openaire.eu/docs/5.0.0/data-model/entities/project#id
61
+ openaire_funder_prefix = record["id"].split("::", 1)[0].split("|", 1)[-1]
62
+
43
63
  funder_id = awards_openaire_funders_mapping.get(openaire_funder_prefix)
44
64
  if funder_id is None:
45
65
  raise TransformerError(
@@ -54,10 +74,7 @@ class OpenAIREProjectTransformer(BaseTransformer):
54
74
 
55
75
  funding = next(iter(record.get("funding", [])), None)
56
76
  if funding:
57
- funding_stream_id = funding.get("funding_stream", {}).get("id", "")
58
- # Example funding stream ID: `EC::HE::HORIZON-AG-UN`. We need the `EC`
59
- # string, i.e. the second "part" of the identifier.
60
- program = next(iter(funding_stream_id.split("::")[1:2]), "")
77
+ program = funding.get("fundingStream", {}).get("id", "")
61
78
  if program:
62
79
  award["program"] = program
63
80
 
@@ -78,7 +95,20 @@ class OpenAIREProjectTransformer(BaseTransformer):
78
95
  award["identifiers"] = identifiers
79
96
 
80
97
  award["number"] = code
98
+
99
+ # `title` is a mandatory attribute of the `Project` object in the OpenAIRE Graph Data Model.
100
+ # See: https://graph.openaire.eu/docs/data-model/entities/project#title
101
+ # However, 15'000+ awards for the FCT funder (and 1 record the NIH funder) are missing a title attribute.
102
+ if "title" not in record:
103
+ raise TransformerError(
104
+ _(
105
+ "Missing title attribute for award {award_id}".format(
106
+ award_id=award["id"]
107
+ )
108
+ )
109
+ )
81
110
  award["title"] = {"en": record["title"]}
111
+
82
112
  award["funder"] = {"id": funder_id}
83
113
  acronym = record.get("acronym")
84
114
  if acronym:
@@ -88,22 +118,226 @@ class OpenAIREProjectTransformer(BaseTransformer):
88
118
  return stream_entry
89
119
 
90
120
 
121
+ class CORDISProjectHTTPReader(BaseReader):
122
+ """CORDIS Project HTTP Reader returning an in-memory binary stream of the latest CORDIS Horizon Europe project zip file."""
123
+
124
+ def _iter(self, fp, *args, **kwargs):
125
+ raise NotImplementedError(
126
+ "CORDISProjectHTTPReader downloads one file and therefore does not iterate through items"
127
+ )
128
+
129
+ def read(self, item=None, *args, **kwargs):
130
+ """Reads the latest CORDIS Horizon Europe project zip file and yields an in-memory binary stream of it."""
131
+ if item:
132
+ raise NotImplementedError(
133
+ "CORDISProjectHTTPReader does not support being chained after another reader"
134
+ )
135
+
136
+ if self._origin == "HE":
137
+ file_url = "https://cordis.europa.eu/data/cordis-HORIZONprojects-xml.zip"
138
+ elif self._origin == "H2020":
139
+ file_url = "https://cordis.europa.eu/data/cordis-h2020projects-xml.zip"
140
+ elif self._origin == "FP7":
141
+ file_url = "https://cordis.europa.eu/data/cordis-fp7projects-xml.zip"
142
+ else:
143
+ raise ReaderError(
144
+ "The --origin option should be either 'HE' (for Horizon Europe) or 'H2020' (for Horizon 2020) or 'FP7'"
145
+ )
146
+
147
+ # Download the ZIP file and fully load the response bytes content in memory.
148
+ # The bytes content are then wrapped by a BytesIO to be file-like object (as required by `zipfile.ZipFile`).
149
+ # Using directly `file_resp.raw` is not possible since `zipfile.ZipFile` requires the file-like object to be seekable.
150
+ file_resp = requests.get(file_url)
151
+ file_resp.raise_for_status()
152
+ yield io.BytesIO(file_resp.content)
153
+
154
+
155
+ class CORDISProjectTransformer(BaseTransformer):
156
+ """Transforms a CORDIS project record into an award record."""
157
+
158
+ def apply(self, stream_entry, **kwargs):
159
+ """Applies the transformation to the stream entry."""
160
+ record = stream_entry.entry
161
+ award = {}
162
+
163
+ # Here `id` is the project ID, which will be used to attach the update to the existing project.
164
+ award["id"] = (
165
+ f"{current_app.config['VOCABULARIES_AWARDS_EC_ROR_ID']}::{record['id']}"
166
+ )
167
+
168
+ categories = record.get("relations", {}).get("categories", {}).get("category")
169
+ if categories:
170
+ if isinstance(categories, dict):
171
+ categories = [categories]
172
+
173
+ award["subjects"] = [
174
+ {"id": f"euroscivoc:{vocab_id}"}
175
+ for category in categories
176
+ if category.get("@classification") == "euroSciVoc"
177
+ and (vocab_id := category["code"].split("/")[-1]).isdigit()
178
+ ]
179
+
180
+ organizations = (
181
+ record.get("relations", {}).get("associations", {}).get("organization")
182
+ )
183
+ if organizations:
184
+ # Projects with a single organization are not wrapped in a list,
185
+ # so we do this here to be able to iterate over it.
186
+ organizations = (
187
+ organizations if isinstance(organizations, list) else [organizations]
188
+ )
189
+ award["organizations"] = []
190
+ for organization in organizations:
191
+ # Some organizations in FP7 projects do not have a "legalname" key,
192
+ # for instance the 14th participant in "SAGE" https://cordis.europa.eu/project/id/999902.
193
+ # In this case, fully skip the organization entry.
194
+ if "legalname" not in organization:
195
+ continue
196
+
197
+ organization_data = {
198
+ "organization": organization["legalname"],
199
+ }
200
+
201
+ # Some organizations in FP7 projects do not have an "id" key (the PIC identifier),
202
+ # for instance "AIlGreenVehicles" in "MOTORBRAIN" https://cordis.europa.eu/project/id/270693.
203
+ # In this case, still store the name but skip the identifier part.
204
+ if "id" in organization:
205
+ organization_data.update(
206
+ {
207
+ "scheme": "pic",
208
+ "id": organization["id"],
209
+ }
210
+ )
211
+
212
+ award["organizations"].append(organization_data)
213
+
214
+ programmes = (
215
+ record.get("relations", {}).get("associations", {}).get("programme", {})
216
+ )
217
+ if programmes:
218
+ # Projects with a single programme (this is the case of some projects in FP7) are not wrapped in a list,
219
+ # so we do this here to be able to iterate over it.
220
+ programmes = programmes if isinstance(programmes, list) else [programmes]
221
+
222
+ programmes_related_legal_basis = [
223
+ {
224
+ "code": programme["code"],
225
+ "uniqueprogrammepart": programme.get("@uniqueprogrammepart"),
226
+ }
227
+ for programme in programmes
228
+ if programme.get("@type") == "relatedLegalBasis"
229
+ ]
230
+
231
+ if len(programmes_related_legal_basis) == 0:
232
+ raise TransformerError(
233
+ _(
234
+ "No related legal basis programme found for project {project_id}".format(
235
+ project_id=record["id"]
236
+ )
237
+ )
238
+ )
239
+ elif len(programmes_related_legal_basis) == 1:
240
+ # FP7 projects have only one related legal basis programme and do not have a 'uniqueprogrammepart' field.
241
+ unique_programme_related_legal_basis = programmes_related_legal_basis[0]
242
+ elif len(programmes_related_legal_basis) >= 1:
243
+ # The entry with the field 'uniqueprogrammepart' == 'true' is the high level programme code,
244
+ # while the other entry is a more specific sub-programme.
245
+ unique_programme_related_legal_basis = [
246
+ programme_related_legal_basis
247
+ # A few H2020 projects have more than one 'uniqueprogrammepart' == 'true',
248
+ # for instance https://cordis.europa.eu/project/id/825673 (showing as "main programme" in the page)
249
+ # which has one entry with the code 'H2020-EU.1.2.',
250
+ # and one with the code 'H2020-EU.1.2.3.'.
251
+ # We sort them from the shortest code to the longest code, and take the first item,
252
+ # so that it conforms more with other projects which all have the shortest code as the main one.
253
+ for programme_related_legal_basis in sorted(
254
+ programmes_related_legal_basis, key=lambda d: len(d["code"])
255
+ )
256
+ if programme_related_legal_basis["uniqueprogrammepart"] == "true"
257
+ ][0]
258
+
259
+ # Store the code of the programme.
260
+ # For instance the code "HORIZON.1.2" which means "Marie Skłodowska-Curie Actions (MSCA)"
261
+ # See https://cordis.europa.eu/programme/id/HORIZON.1.2
262
+ award["program"] = unique_programme_related_legal_basis["code"]
263
+
264
+ stream_entry.entry = award
265
+ return stream_entry
266
+
267
+
268
+ class CORDISAwardsServiceWriter(ServiceWriter):
269
+ """CORDIS Awards service writer."""
270
+
271
+ def __init__(self, *args, **kwargs):
272
+ """Constructor."""
273
+ service_or_name = kwargs.pop("service_or_name", "awards")
274
+ # Here we only update and we do not insert, since CORDIS data is used to augment existing awards
275
+ # (with subjects, organizations, and program information) and is not used to create new awards.
276
+ super().__init__(
277
+ service_or_name=service_or_name, insert=False, update=True, *args, **kwargs
278
+ )
279
+
280
+ def _entry_id(self, entry):
281
+ """Get the id from an entry."""
282
+ return entry["id"]
283
+
284
+
285
+ VOCABULARIES_DATASTREAM_READERS = {
286
+ "cordis-project-http": CORDISProjectHTTPReader,
287
+ }
288
+
91
289
  VOCABULARIES_DATASTREAM_TRANSFORMERS = {
92
290
  "openaire-award": OpenAIREProjectTransformer,
291
+ "cordis-award": CORDISProjectTransformer,
93
292
  }
94
293
  """ORCiD Data Streams transformers."""
95
294
 
96
295
  VOCABULARIES_DATASTREAM_WRITERS = {
97
296
  "awards-service": AwardsServiceWriter,
297
+ "cordis-awards-service": CORDISAwardsServiceWriter,
98
298
  }
99
299
  """ORCiD Data Streams transformers."""
100
300
 
301
+ DATASTREAM_CONFIG_CORDIS = {
302
+ "readers": [
303
+ {"type": "cordis-project-http"},
304
+ {
305
+ "type": "zip",
306
+ "args": {
307
+ "regex": "\\.xml$",
308
+ "mode": "r",
309
+ },
310
+ },
311
+ {
312
+ "type": "xml",
313
+ "args": {
314
+ "root_element": "project",
315
+ },
316
+ },
317
+ ],
318
+ "transformers": [
319
+ {"type": "cordis-award"},
320
+ ],
321
+ "writers": [
322
+ {
323
+ "type": "cordis-awards-service",
324
+ "args": {
325
+ "identity": system_identity,
326
+ },
327
+ }
328
+ ],
329
+ }
330
+ """Data Stream configuration.
331
+
332
+ An origin is required for the reader.
333
+ """
334
+
101
335
  DATASTREAM_CONFIG = {
102
336
  "readers": [
103
337
  {
104
338
  "type": "tar",
105
339
  "args": {
106
- "regex": ".json.gz$",
340
+ "regex": "\\.json.gz$",
107
341
  "mode": "r",
108
342
  },
109
343
  },
@@ -7,6 +7,9 @@
7
7
  "$schema": {
8
8
  "$ref": "local://definitions-v1.0.0.json#/$schema"
9
9
  },
10
+ "tags": {
11
+ "$ref": "local://vocabularies/definitions-v1.0.0.json#/tags"
12
+ },
10
13
  "identifiers": {
11
14
  "description": "Alternate identifiers for the award.",
12
15
  "type": "array",
@@ -39,6 +42,41 @@
39
42
  },
40
43
  "program": {
41
44
  "type": "string"
45
+ },
46
+ "subjects": {
47
+ "description": "Award's subjects.",
48
+ "type": "array",
49
+ "properties": {
50
+ "id": {
51
+ "$ref": "local://definitions-v1.0.0.json#/identifier"
52
+ },
53
+ "subject": {
54
+ "description": "Human readable label.",
55
+ "type": "string"
56
+ }
57
+ }
58
+ },
59
+ "organizations": {
60
+ "description": "Award's organizations.",
61
+ "type": "array",
62
+ "items": {
63
+ "type": "object",
64
+ "additionalProperties": false,
65
+ "properties": {
66
+ "scheme": {
67
+ "description": "Identifier of the organization scheme.",
68
+ "$ref": "local://definitions-v1.0.0.json#/identifier"
69
+ },
70
+ "id": {
71
+ "description": "Identifier of the organization for the given scheme.",
72
+ "$ref": "local://definitions-v1.0.0.json#/identifier"
73
+ },
74
+ "organization": {
75
+ "description": "Human readable label.",
76
+ "type": "string"
77
+ }
78
+ }
79
+ }
42
80
  }
43
81
  }
44
82
  }