invenio-vocabularies 2.3.1__py2.py3-none-any.whl → 6.3.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of invenio-vocabularies might be problematic. Click here for more details.

Files changed (165) hide show
  1. invenio_vocabularies/__init__.py +2 -2
  2. invenio_vocabularies/administration/__init__.py +10 -0
  3. invenio_vocabularies/administration/views/__init__.py +10 -0
  4. invenio_vocabularies/administration/views/vocabularies.py +45 -0
  5. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/package.json +1 -7
  6. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/CustomAwardForm.js +80 -64
  7. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingField.js +49 -41
  8. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingModal.js +5 -7
  9. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/NoAwardResults.js +3 -3
  10. invenio_vocabularies/cli.py +31 -44
  11. invenio_vocabularies/config.py +68 -4
  12. invenio_vocabularies/contrib/affiliations/affiliations.py +11 -0
  13. invenio_vocabularies/contrib/affiliations/api.py +1 -2
  14. invenio_vocabularies/contrib/affiliations/config.py +13 -2
  15. invenio_vocabularies/contrib/affiliations/datastreams.py +186 -0
  16. invenio_vocabularies/contrib/affiliations/jsonschemas/affiliations/affiliation-v1.0.0.json +38 -1
  17. invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v1.0.0.json +22 -1
  18. invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v2.0.0.json +171 -0
  19. invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v1.0.0.json +22 -1
  20. invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v2.0.0.json +171 -0
  21. invenio_vocabularies/contrib/affiliations/mappings/v7/affiliations/affiliation-v1.0.0.json +22 -1
  22. invenio_vocabularies/contrib/affiliations/schema.py +17 -3
  23. invenio_vocabularies/contrib/affiliations/services.py +1 -2
  24. invenio_vocabularies/contrib/awards/awards.py +17 -5
  25. invenio_vocabularies/contrib/awards/datastreams.py +241 -7
  26. invenio_vocabularies/contrib/awards/jsonschemas/awards/award-v1.0.0.json +38 -0
  27. invenio_vocabularies/contrib/awards/mappings/os-v1/awards/award-v1.0.0.json +51 -2
  28. invenio_vocabularies/contrib/awards/mappings/os-v2/awards/award-v1.0.0.json +51 -2
  29. invenio_vocabularies/contrib/awards/mappings/v7/awards/award-v1.0.0.json +51 -2
  30. invenio_vocabularies/contrib/awards/schema.py +16 -1
  31. invenio_vocabularies/contrib/awards/serializer.py +8 -1
  32. invenio_vocabularies/contrib/awards/services.py +1 -2
  33. invenio_vocabularies/contrib/common/__init__.py +9 -0
  34. invenio_vocabularies/contrib/common/openaire/__init__.py +9 -0
  35. invenio_vocabularies/contrib/common/openaire/datastreams.py +84 -0
  36. invenio_vocabularies/contrib/common/ror/__init__.py +9 -0
  37. invenio_vocabularies/contrib/common/ror/datastreams.py +220 -0
  38. invenio_vocabularies/contrib/funders/config.py +11 -2
  39. invenio_vocabularies/contrib/funders/datastreams.py +40 -62
  40. invenio_vocabularies/contrib/funders/funders.py +3 -1
  41. invenio_vocabularies/contrib/funders/jsonschemas/funders/funder-v1.0.0.json +36 -1
  42. invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v1.0.0.json +22 -1
  43. invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v2.0.0.json +156 -0
  44. invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v1.0.0.json +22 -1
  45. invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v2.0.0.json +156 -0
  46. invenio_vocabularies/contrib/funders/mappings/v7/funders/funder-v1.0.0.json +22 -1
  47. invenio_vocabularies/contrib/funders/schema.py +8 -0
  48. invenio_vocabularies/contrib/funders/serializer.py +2 -1
  49. invenio_vocabularies/contrib/names/config.py +5 -3
  50. invenio_vocabularies/contrib/names/datastreams.py +172 -4
  51. invenio_vocabularies/contrib/names/jsonschemas/names/name-v1.0.0.json +3 -0
  52. invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v1.0.0.json +3 -0
  53. invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v2.0.0.json +150 -0
  54. invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v1.0.0.json +3 -0
  55. invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v2.0.0.json +150 -0
  56. invenio_vocabularies/contrib/names/mappings/v7/names/name-v1.0.0.json +3 -0
  57. invenio_vocabularies/contrib/names/names.py +15 -3
  58. invenio_vocabularies/contrib/names/permissions.py +20 -0
  59. invenio_vocabularies/contrib/names/s3client.py +44 -0
  60. invenio_vocabularies/contrib/names/schema.py +14 -0
  61. invenio_vocabularies/contrib/subjects/config.py +9 -3
  62. invenio_vocabularies/contrib/subjects/datastreams.py +61 -0
  63. invenio_vocabularies/contrib/subjects/euroscivoc/__init__.py +9 -0
  64. invenio_vocabularies/contrib/subjects/euroscivoc/datastreams.py +171 -0
  65. invenio_vocabularies/contrib/subjects/jsonschemas/subjects/subject-v1.0.0.json +31 -0
  66. invenio_vocabularies/contrib/subjects/mappings/os-v1/subjects/subject-v1.0.0.json +35 -0
  67. invenio_vocabularies/contrib/subjects/mappings/os-v2/subjects/subject-v1.0.0.json +35 -0
  68. invenio_vocabularies/contrib/subjects/mappings/v7/subjects/subject-v1.0.0.json +35 -0
  69. invenio_vocabularies/contrib/subjects/mesh/__init__.py +9 -0
  70. invenio_vocabularies/contrib/subjects/mesh/datastreams.py +43 -0
  71. invenio_vocabularies/contrib/subjects/schema.py +47 -5
  72. invenio_vocabularies/contrib/subjects/subjects.py +10 -0
  73. invenio_vocabularies/datastreams/datastreams.py +61 -13
  74. invenio_vocabularies/datastreams/factories.py +1 -2
  75. invenio_vocabularies/datastreams/readers.py +138 -29
  76. invenio_vocabularies/datastreams/tasks.py +37 -0
  77. invenio_vocabularies/datastreams/transformers.py +17 -27
  78. invenio_vocabularies/datastreams/writers.py +116 -14
  79. invenio_vocabularies/datastreams/xml.py +34 -0
  80. invenio_vocabularies/ext.py +59 -5
  81. invenio_vocabularies/factories.py +137 -0
  82. invenio_vocabularies/jobs.py +133 -0
  83. invenio_vocabularies/proxies.py +2 -2
  84. invenio_vocabularies/records/jsonschemas/vocabularies/definitions-v1.0.0.json +7 -0
  85. invenio_vocabularies/records/jsonschemas/vocabularies/vocabulary-v1.0.0.json +1 -4
  86. invenio_vocabularies/records/mappings/os-v1/vocabularies/vocabulary-v1.0.0.json +3 -3
  87. invenio_vocabularies/records/mappings/os-v2/vocabularies/vocabulary-v1.0.0.json +3 -3
  88. invenio_vocabularies/records/mappings/v7/vocabularies/vocabulary-v1.0.0.json +3 -3
  89. invenio_vocabularies/records/models.py +2 -4
  90. invenio_vocabularies/records/pidprovider.py +1 -2
  91. invenio_vocabularies/records/systemfields/relations.py +2 -2
  92. invenio_vocabularies/resources/__init__.py +9 -1
  93. invenio_vocabularies/resources/config.py +105 -0
  94. invenio_vocabularies/resources/resource.py +31 -41
  95. invenio_vocabularies/resources/schema.py +2 -1
  96. invenio_vocabularies/services/__init__.py +5 -2
  97. invenio_vocabularies/services/config.py +179 -0
  98. invenio_vocabularies/services/custom_fields/__init__.py +6 -2
  99. invenio_vocabularies/services/custom_fields/subject.py +82 -0
  100. invenio_vocabularies/services/custom_fields/vocabulary.py +5 -3
  101. invenio_vocabularies/services/permissions.py +3 -1
  102. invenio_vocabularies/services/results.py +110 -0
  103. invenio_vocabularies/services/schema.py +11 -2
  104. invenio_vocabularies/services/service.py +46 -94
  105. invenio_vocabularies/services/tasks.py +1 -1
  106. invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/subjects.html +23 -0
  107. invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabularies-list.html +12 -0
  108. invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabulary-details.html +71 -0
  109. invenio_vocabularies/translations/af/LC_MESSAGES/messages.mo +0 -0
  110. invenio_vocabularies/translations/ar/LC_MESSAGES/messages.mo +0 -0
  111. invenio_vocabularies/translations/bg/LC_MESSAGES/messages.mo +0 -0
  112. invenio_vocabularies/translations/ca/LC_MESSAGES/messages.mo +0 -0
  113. invenio_vocabularies/translations/cs/LC_MESSAGES/messages.mo +0 -0
  114. invenio_vocabularies/translations/da/LC_MESSAGES/messages.mo +0 -0
  115. invenio_vocabularies/translations/de/LC_MESSAGES/messages.mo +0 -0
  116. invenio_vocabularies/translations/de_AT/LC_MESSAGES/messages.mo +0 -0
  117. invenio_vocabularies/translations/de_DE/LC_MESSAGES/messages.mo +0 -0
  118. invenio_vocabularies/translations/el/LC_MESSAGES/messages.mo +0 -0
  119. invenio_vocabularies/translations/en/LC_MESSAGES/messages.mo +0 -0
  120. invenio_vocabularies/translations/en_AT/LC_MESSAGES/messages.mo +0 -0
  121. invenio_vocabularies/translations/en_HU/LC_MESSAGES/messages.mo +0 -0
  122. invenio_vocabularies/translations/es/LC_MESSAGES/messages.mo +0 -0
  123. invenio_vocabularies/translations/es_CU/LC_MESSAGES/messages.mo +0 -0
  124. invenio_vocabularies/translations/es_MX/LC_MESSAGES/messages.mo +0 -0
  125. invenio_vocabularies/translations/et/LC_MESSAGES/messages.mo +0 -0
  126. invenio_vocabularies/translations/et_EE/LC_MESSAGES/messages.mo +0 -0
  127. invenio_vocabularies/translations/fa/LC_MESSAGES/messages.mo +0 -0
  128. invenio_vocabularies/translations/fa_IR/LC_MESSAGES/messages.mo +0 -0
  129. invenio_vocabularies/translations/fr/LC_MESSAGES/messages.mo +0 -0
  130. invenio_vocabularies/translations/fr_CI/LC_MESSAGES/messages.mo +0 -0
  131. invenio_vocabularies/translations/fr_FR/LC_MESSAGES/messages.mo +0 -0
  132. invenio_vocabularies/translations/gl/LC_MESSAGES/messages.mo +0 -0
  133. invenio_vocabularies/translations/hi_IN/LC_MESSAGES/messages.mo +0 -0
  134. invenio_vocabularies/translations/hr/LC_MESSAGES/messages.mo +0 -0
  135. invenio_vocabularies/translations/hu/LC_MESSAGES/messages.mo +0 -0
  136. invenio_vocabularies/translations/hu_HU/LC_MESSAGES/messages.mo +0 -0
  137. invenio_vocabularies/translations/it/LC_MESSAGES/messages.mo +0 -0
  138. invenio_vocabularies/translations/ja/LC_MESSAGES/messages.mo +0 -0
  139. invenio_vocabularies/translations/ka/LC_MESSAGES/messages.mo +0 -0
  140. invenio_vocabularies/translations/lt/LC_MESSAGES/messages.mo +0 -0
  141. invenio_vocabularies/translations/messages.pot +95 -48
  142. invenio_vocabularies/translations/ne/LC_MESSAGES/messages.mo +0 -0
  143. invenio_vocabularies/translations/no/LC_MESSAGES/messages.mo +0 -0
  144. invenio_vocabularies/translations/pl/LC_MESSAGES/messages.mo +0 -0
  145. invenio_vocabularies/translations/pt/LC_MESSAGES/messages.mo +0 -0
  146. invenio_vocabularies/translations/ro/LC_MESSAGES/messages.mo +0 -0
  147. invenio_vocabularies/translations/ru/LC_MESSAGES/messages.mo +0 -0
  148. invenio_vocabularies/translations/rw/LC_MESSAGES/messages.mo +0 -0
  149. invenio_vocabularies/translations/sk/LC_MESSAGES/messages.mo +0 -0
  150. invenio_vocabularies/translations/sv/LC_MESSAGES/messages.mo +0 -0
  151. invenio_vocabularies/translations/sv_SE/LC_MESSAGES/messages.mo +0 -0
  152. invenio_vocabularies/translations/tr/LC_MESSAGES/messages.mo +0 -0
  153. invenio_vocabularies/translations/uk/LC_MESSAGES/messages.mo +0 -0
  154. invenio_vocabularies/translations/uk_UA/LC_MESSAGES/messages.mo +0 -0
  155. invenio_vocabularies/translations/zh_CN/LC_MESSAGES/messages.mo +0 -0
  156. invenio_vocabularies/translations/zh_TW/LC_MESSAGES/messages.mo +0 -0
  157. invenio_vocabularies/views.py +12 -26
  158. invenio_vocabularies/webpack.py +3 -3
  159. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/METADATA +150 -6
  160. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/RECORD +165 -132
  161. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/WHEEL +1 -1
  162. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/entry_points.txt +17 -0
  163. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/AUTHORS.rst +0 -0
  164. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/LICENSE +0 -0
  165. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/top_level.txt +0 -0
@@ -47,17 +47,66 @@
47
47
  },
48
48
  "title": {
49
49
  "type": "object",
50
- "dynamic": true
50
+ "dynamic": "true"
51
+ },
52
+ "tags": {
53
+ "type": "keyword"
51
54
  },
52
55
  "number": {
53
56
  "type": "keyword"
54
57
  },
55
58
  "acronym": {
56
- "type": "keyword"
59
+ "type": "keyword",
60
+ "fields": {
61
+ "text": { "type": "text" }
62
+ }
57
63
  },
58
64
  "program": {
59
65
  "type": "keyword"
60
66
  },
67
+ "subjects": {
68
+ "properties": {
69
+ "@v": {
70
+ "type": "keyword"
71
+ },
72
+ "id": {
73
+ "type": "keyword"
74
+ },
75
+ "props": {
76
+ "type": "object",
77
+ "dynamic": "true"
78
+ },
79
+ "subject": {
80
+ "type": "keyword"
81
+ },
82
+ "scheme": {
83
+ "type": "keyword"
84
+ },
85
+ "identifiers": {
86
+ "properties": {
87
+ "identifier": {
88
+ "type": "keyword"
89
+ },
90
+ "scheme": {
91
+ "type": "keyword"
92
+ }
93
+ }
94
+ }
95
+ }
96
+ },
97
+ "organizations": {
98
+ "properties": {
99
+ "scheme": {
100
+ "type": "keyword"
101
+ },
102
+ "id": {
103
+ "type": "keyword"
104
+ },
105
+ "organization": {
106
+ "type": "keyword"
107
+ }
108
+ }
109
+ },
61
110
  "funder": {
62
111
  "type": "object",
63
112
  "properties": {
@@ -47,17 +47,66 @@
47
47
  },
48
48
  "title": {
49
49
  "type": "object",
50
- "dynamic": true
50
+ "dynamic": "true"
51
+ },
52
+ "tags": {
53
+ "type": "keyword"
51
54
  },
52
55
  "number": {
53
56
  "type": "keyword"
54
57
  },
55
58
  "acronym": {
56
- "type": "keyword"
59
+ "type": "keyword",
60
+ "fields": {
61
+ "text": { "type": "text" }
62
+ }
57
63
  },
58
64
  "program": {
59
65
  "type": "keyword"
60
66
  },
67
+ "subjects": {
68
+ "properties": {
69
+ "@v": {
70
+ "type": "keyword"
71
+ },
72
+ "id": {
73
+ "type": "keyword"
74
+ },
75
+ "props": {
76
+ "type": "object",
77
+ "dynamic": "true"
78
+ },
79
+ "subject": {
80
+ "type": "keyword"
81
+ },
82
+ "scheme": {
83
+ "type": "keyword"
84
+ },
85
+ "identifiers": {
86
+ "properties": {
87
+ "identifier": {
88
+ "type": "keyword"
89
+ },
90
+ "scheme": {
91
+ "type": "keyword"
92
+ }
93
+ }
94
+ }
95
+ }
96
+ },
97
+ "organizations": {
98
+ "properties": {
99
+ "scheme": {
100
+ "type": "keyword"
101
+ },
102
+ "id": {
103
+ "type": "keyword"
104
+ },
105
+ "organization": {
106
+ "type": "keyword"
107
+ }
108
+ }
109
+ },
61
110
  "funder": {
62
111
  "type": "object",
63
112
  "properties": {
@@ -47,17 +47,66 @@
47
47
  },
48
48
  "title": {
49
49
  "type": "object",
50
- "dynamic": true
50
+ "dynamic": "true"
51
+ },
52
+ "tags": {
53
+ "type": "keyword"
51
54
  },
52
55
  "number": {
53
56
  "type": "keyword"
54
57
  },
55
58
  "acronym": {
56
- "type": "keyword"
59
+ "type": "keyword",
60
+ "fields": {
61
+ "text": { "type": "text" }
62
+ }
57
63
  },
58
64
  "program": {
59
65
  "type": "keyword"
60
66
  },
67
+ "subjects": {
68
+ "properties": {
69
+ "@v": {
70
+ "type": "keyword"
71
+ },
72
+ "id": {
73
+ "type": "keyword"
74
+ },
75
+ "props": {
76
+ "type": "object",
77
+ "dynamic": "true"
78
+ },
79
+ "subject": {
80
+ "type": "keyword"
81
+ },
82
+ "scheme": {
83
+ "type": "keyword"
84
+ },
85
+ "identifiers": {
86
+ "properties": {
87
+ "identifier": {
88
+ "type": "keyword"
89
+ },
90
+ "scheme": {
91
+ "type": "keyword"
92
+ }
93
+ }
94
+ }
95
+ }
96
+ },
97
+ "organizations": {
98
+ "properties": {
99
+ "scheme": {
100
+ "type": "keyword"
101
+ },
102
+ "id": {
103
+ "type": "keyword"
104
+ },
105
+ "organization": {
106
+ "type": "keyword"
107
+ }
108
+ }
109
+ },
61
110
  "funder": {
62
111
  "type": "object",
63
112
  "properties": {
@@ -1,6 +1,6 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
- # Copyright (C) 2021-2022 CERN.
3
+ # Copyright (C) 2021-2024 CERN.
4
4
  #
5
5
  # Invenio-Vocabularies is free software; you can redistribute it and/or
6
6
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -17,13 +17,24 @@ from marshmallow_utils.schemas import IdentifierSchema
17
17
 
18
18
  from ...services.schema import (
19
19
  BaseVocabularySchema,
20
+ ContribVocabularyRelationSchema,
20
21
  ModePIDFieldVocabularyMixin,
21
22
  i18n_strings,
22
23
  )
23
24
  from ..funders.schema import FunderRelationSchema
25
+ from ..subjects.schema import SubjectRelationSchema
24
26
  from .config import award_schemes
25
27
 
26
28
 
29
+ class AwardOrganizationRelationSchema(ContribVocabularyRelationSchema):
30
+ """Schema to define an organization relation in an award."""
31
+
32
+ ftf_name = "organization"
33
+ parent_field_name = "organizations"
34
+ organization = SanitizedUnicode()
35
+ scheme = SanitizedUnicode()
36
+
37
+
27
38
  class AwardSchema(BaseVocabularySchema, ModePIDFieldVocabularyMixin):
28
39
  """Award schema."""
29
40
 
@@ -46,6 +57,10 @@ class AwardSchema(BaseVocabularySchema, ModePIDFieldVocabularyMixin):
46
57
 
47
58
  program = SanitizedUnicode()
48
59
 
60
+ subjects = fields.List(fields.Nested(SubjectRelationSchema))
61
+
62
+ organizations = fields.List(fields.Nested(AwardOrganizationRelationSchema))
63
+
49
64
  id = SanitizedUnicode(
50
65
  validate=validate.Length(min=1, error=_("PID cannot be blank."))
51
66
  )
@@ -1,6 +1,6 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
- # Copyright (C) 2022 CERN.
3
+ # Copyright (C) 2022-2024 CERN.
4
4
  #
5
5
  # Invenio-Vocabularies is free software; you can redistribute it and/or
6
6
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -12,6 +12,9 @@ from marshmallow import Schema, fields
12
12
 
13
13
  from invenio_vocabularies.resources import L10NString
14
14
 
15
+ from ..subjects.schema import SubjectRelationSchema
16
+ from .schema import AwardOrganizationRelationSchema
17
+
15
18
 
16
19
  class IdentifierSchema(Schema):
17
20
  """Identifier scheme."""
@@ -37,4 +40,8 @@ class AwardL10NItemSchema(Schema):
37
40
  acronym = fields.String(dump_only=True)
38
41
  program = fields.String(dump_only=True)
39
42
  funder = fields.Nested(FunderRelationSchema, dump_only=True)
43
+ subjects = fields.List(fields.Nested(SubjectRelationSchema), dump_only=True)
40
44
  identifiers = fields.List(fields.Nested(IdentifierSchema), dump_only=True)
45
+ organizations = fields.List(
46
+ fields.Nested(AwardOrganizationRelationSchema), dump_only=True
47
+ )
@@ -1,6 +1,6 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
- # Copyright (C) 2022 CERN.
3
+ # Copyright (C) 2022-2024 CERN.
4
4
  #
5
5
  # Invenio-Vocabularies is free software; you can redistribute it and/or
6
6
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -8,7 +8,6 @@
8
8
 
9
9
  """Vocabulary awards."""
10
10
 
11
-
12
11
  from .awards import record_type
13
12
 
14
13
  AwardsServiceConfig = record_type.service_config_cls
@@ -0,0 +1,9 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2024 CERN.
4
+ #
5
+ # Invenio-Vocabularies is free software; you can redistribute it and/or
6
+ # modify it under the terms of the MIT License; see LICENSE file for more
7
+ # details.
8
+
9
+ """Vocabularies common module."""
@@ -0,0 +1,9 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2024 CERN.
4
+ #
5
+ # Invenio-Vocabularies is free software; you can redistribute it and/or
6
+ # modify it under the terms of the MIT License; see LICENSE file for more
7
+ # details.
8
+
9
+ """OpenAIRE-related module."""
@@ -0,0 +1,84 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2024 CERN.
4
+ #
5
+ # Invenio-Vocabularies is free software; you can redistribute it and/or
6
+ # modify it under the terms of the MIT License; see LICENSE file for more
7
+ # details.
8
+
9
+ """OpenAIRE-related Datastreams Readers/Writers/Transformers module."""
10
+
11
+ import io
12
+
13
+ import requests
14
+
15
+ from invenio_vocabularies.datastreams.errors import ReaderError
16
+ from invenio_vocabularies.datastreams.readers import BaseReader
17
+
18
+
19
+ class OpenAIREHTTPReader(BaseReader):
20
+ """OpenAIRE HTTP Reader returning an in-memory binary stream of the latest OpenAIRE Graph Dataset tar file of a given type."""
21
+
22
+ def __init__(self, origin=None, mode="r", tar_href=None, *args, **kwargs):
23
+ """Constructor."""
24
+ self.tar_href = tar_href
25
+ super().__init__(origin, mode, *args, **kwargs)
26
+
27
+ def _iter(self, fp, *args, **kwargs):
28
+ raise NotImplementedError(
29
+ "OpenAIREHTTPReader downloads one file and therefore does not iterate through items"
30
+ )
31
+
32
+ def read(self, item=None, *args, **kwargs):
33
+ """Reads the latest OpenAIRE Graph Dataset tar file of a given type from Zenodo and yields an in-memory binary stream of it."""
34
+ if item:
35
+ raise NotImplementedError(
36
+ "OpenAIREHTTPReader does not support being chained after another reader"
37
+ )
38
+
39
+ if self._origin == "full":
40
+ # OpenAIRE Graph Dataset
41
+ api_url = "https://zenodo.org/api/records/3516917"
42
+ elif self._origin == "diff":
43
+ # OpenAIRE Graph dataset: new collected projects
44
+ api_url = "https://zenodo.org/api/records/6419021"
45
+ else:
46
+ raise ReaderError("The --origin option should be either 'full' or 'diff'")
47
+
48
+ # Call the signposting `linkset+json` endpoint for the Concept DOI (i.e. latest version) of the OpenAIRE Graph Dataset.
49
+ # See: https://github.com/inveniosoftware/rfcs/blob/master/rfcs/rdm-0071-signposting.md#provide-an-applicationlinksetjson-endpoint
50
+ headers = {"Accept": "application/linkset+json"}
51
+ api_resp = requests.get(api_url, headers=headers)
52
+ api_resp.raise_for_status()
53
+
54
+ # Extract the Landing page Link Set Object located as the first (index 0) item.
55
+ landing_page_linkset = api_resp.json()["linkset"][0]
56
+
57
+ # Extract the URL of the only tar file matching `tar_href` linked to the record.
58
+ landing_page_matching_tar_items = [
59
+ item
60
+ for item in landing_page_linkset["item"]
61
+ if item["type"] == "application/x-tar"
62
+ and item["href"].endswith(self.tar_href)
63
+ ]
64
+ if len(landing_page_matching_tar_items) != 1:
65
+ raise ReaderError(
66
+ f"Expected 1 tar item matching {self.tar_href} but got {len(landing_page_matching_tar_items)}"
67
+ )
68
+ file_url = landing_page_matching_tar_items[0]["href"]
69
+
70
+ # Download the matching tar file and fully load the response bytes content in memory.
71
+ # The bytes content are then wrapped by a BytesIO to be file-like object (as required by `tarfile.open`).
72
+ # Using directly `file_resp.raw` is not possible since `tarfile.open` requires the file-like object to be seekable.
73
+ file_resp = requests.get(file_url)
74
+ file_resp.raise_for_status()
75
+ yield io.BytesIO(file_resp.content)
76
+
77
+
78
+ VOCABULARIES_DATASTREAM_READERS = {
79
+ "openaire-http": OpenAIREHTTPReader,
80
+ }
81
+
82
+ VOCABULARIES_DATASTREAM_TRANSFORMERS = {}
83
+
84
+ VOCABULARIES_DATASTREAM_WRITERS = {}
@@ -0,0 +1,9 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2024 CERN.
4
+ #
5
+ # Invenio-Vocabularies is free software; you can redistribute it and/or
6
+ # modify it under the terms of the MIT License; see LICENSE file for more
7
+ # details.
8
+
9
+ """ROR-related module."""
@@ -0,0 +1,220 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2024 CERN.
4
+ # Copyright (C) 2024 California Institute of Technology.
5
+ #
6
+ # Invenio-Vocabularies is free software; you can redistribute it and/or
7
+ # modify it under the terms of the MIT License; see LICENSE file for more
8
+ # details.
9
+
10
+ """ROR-related Datastreams Readers/Writers/Transformers module."""
11
+
12
+ import io
13
+
14
+ import arrow
15
+ import requests
16
+ from idutils import normalize_ror
17
+
18
+ from invenio_vocabularies.datastreams.errors import ReaderError, TransformerError
19
+ from invenio_vocabularies.datastreams.readers import BaseReader
20
+ from invenio_vocabularies.datastreams.transformers import BaseTransformer
21
+
22
+
23
+ class RORHTTPReader(BaseReader):
24
+ """ROR HTTP Reader.
25
+
26
+ Returning an in-memory
27
+ binary stream of the latest ROR data dump ZIP file.
28
+ """
29
+
30
+ def __init__(self, origin=None, mode="r", since=None, *args, **kwargs):
31
+ """Constructor."""
32
+ self._since = since
33
+ super().__init__(origin, mode, *args, **kwargs)
34
+
35
+ def _iter(self, fp, *args, **kwargs):
36
+ raise NotImplementedError(
37
+ "RORHTTPReader downloads one file "
38
+ "and therefore does not iterate through items"
39
+ )
40
+
41
+ def _get_last_dump_date(self, linksets):
42
+ """Get the last dump date."""
43
+ for linkset in linksets:
44
+ metadata_formats = linkset.get("describedby", [])
45
+ for format_link in metadata_formats:
46
+ if format_link.get("type") == "application/ld+json":
47
+ json_ld_reponse = requests.get(
48
+ format_link["href"],
49
+ headers={"Accept": format_link["type"]},
50
+ )
51
+ json_ld_reponse.raise_for_status()
52
+ json_ld_data = json_ld_reponse.json()
53
+
54
+ last_dump_date = arrow.get(
55
+ json_ld_data.get("dateCreated")
56
+ or json_ld_data.get("datePublished")
57
+ )
58
+ return last_dump_date
59
+ else:
60
+ raise ReaderError(
61
+ "Couldn't find JSON-LD in publisher's linkset "
62
+ "to determine last dump date."
63
+ )
64
+
65
+ def read(self, item=None, *args, **kwargs):
66
+ """Reads the latest ROR data dump.
67
+
68
+ Read from ZIP file from
69
+ Zenodo and yields an in-memory binary stream of it.
70
+ """
71
+ if item:
72
+ raise NotImplementedError(
73
+ "RORHTTPReader does not support being chained after another reader"
74
+ )
75
+
76
+ # Follow the DOI to get the link of the linkset
77
+ dataset_doi_link = "https://doi.org/10.5281/zenodo.6347574"
78
+ landing_page = requests.get(dataset_doi_link, allow_redirects=True)
79
+ landing_page.raise_for_status()
80
+
81
+ # Call the signposting `linkset+json` endpoint for
82
+ # the Concept DOI (i.e. latest version) of the ROR data dump.
83
+ # See: https://github.com/inveniosoftware/rfcs/blob/master/rfcs/rdm-0071-signposting.md#provide-an-applicationlinksetjson-endpoint
84
+ if "linkset" not in landing_page.links:
85
+ raise ReaderError("Linkset not found in the ROR dataset record.")
86
+ linkset_response = requests.get(
87
+ landing_page.links["linkset"]["url"],
88
+ headers={"Accept": "application/linkset+json"},
89
+ )
90
+ linkset_response.raise_for_status()
91
+ linksets = linkset_response.json()["linkset"]
92
+
93
+ if self._since:
94
+ last_dump_date = self._get_last_dump_date(linksets)
95
+ if last_dump_date < arrow.get(self._since):
96
+ return
97
+
98
+ for linkset in linksets:
99
+ items = linkset.get("item", [])
100
+ zip_files = [item for item in items if item["type"] == "application/zip"]
101
+ if len(zip_files) == 1:
102
+ file_url = zip_files[0]["href"]
103
+ break
104
+ if len(zip_files) > 1:
105
+ raise ReaderError(f"Expected 1 ZIP item but got {len(zip_files)}")
106
+
107
+ # Download the ZIP file and fully load the response bytes content in memory.
108
+ # The bytes content are then wrapped by a BytesIO to be
109
+ # file-like object (as required by `zipfile.ZipFile`).
110
+ # Using directly `file_resp.raw` is not possible since
111
+ # `zipfile.ZipFile` requires the file-like object to be seekable.
112
+ file_resp = requests.get(file_url)
113
+ file_resp.raise_for_status()
114
+ yield io.BytesIO(file_resp.content)
115
+
116
+
117
+ VOCABULARIES_DATASTREAM_READERS = {
118
+ "ror-http": RORHTTPReader,
119
+ }
120
+
121
+
122
+ class RORTransformer(BaseTransformer):
123
+ """Transforms a JSON ROR record into a funders record."""
124
+
125
+ def __init__(
126
+ self, *args, vocab_schemes=None, funder_fundref_doi_prefix=None, **kwargs
127
+ ):
128
+ """Initializes the transformer."""
129
+ self.vocab_schemes = vocab_schemes
130
+ self.funder_fundref_doi_prefix = funder_fundref_doi_prefix
131
+ super().__init__(*args, **kwargs)
132
+
133
+ def apply(self, stream_entry, **kwargs):
134
+ """Applies the transformation to the stream entry."""
135
+ record = stream_entry.entry
136
+ ror = {}
137
+ ror["title"] = {}
138
+
139
+ ror["id"] = normalize_ror(record.get("id"))
140
+ if not ror["id"]:
141
+ raise TransformerError(_("Id not found in ROR entry."))
142
+
143
+ # Using set so aliases are unique
144
+ aliases = set()
145
+ acronym = None
146
+ for name in record.get("names"):
147
+ lang = name.get("lang", "en")
148
+ if lang == None:
149
+ lang = "en"
150
+ if "ror_display" in name["types"]:
151
+ ror["name"] = name["value"]
152
+ if "label" in name["types"]:
153
+ ror["title"][lang] = name["value"]
154
+ if "alias" in name["types"]:
155
+ aliases.add(name["value"])
156
+ if "acronym" in name["types"]:
157
+ # The first acronyn goes in acronym field to maintain
158
+ # compatability with existing data structure
159
+ if not acronym:
160
+ acronym = name["value"]
161
+ else:
162
+ aliases.add(name["value"])
163
+ if "en" not in ror["title"]:
164
+ ror["title"]["en"] = ror["name"]
165
+ if acronym:
166
+ ror["acronym"] = acronym
167
+ if aliases:
168
+ ror["aliases"] = list(aliases)
169
+
170
+ # ror_display is required and should be in every entry
171
+ if not ror["name"]:
172
+ raise TransformerError(
173
+ _("Name with type ror_display not found in ROR entry.")
174
+ )
175
+
176
+ # This only gets the first location, to maintain compatability
177
+ # with existing data structure
178
+ location = record.get("locations", [{}])[0].get("geonames_details", {})
179
+ ror["country"] = location.get("country_code")
180
+ ror["country_name"] = location.get("country_name")
181
+ ror["location_name"] = location.get("name")
182
+
183
+ ror["types"] = record.get("types")
184
+
185
+ status = record.get("status")
186
+ ror["status"] = status
187
+
188
+ # The ROR is always listed in identifiers, expected by serialization
189
+ ror["identifiers"] = [{"identifier": ror["id"], "scheme": "ror"}]
190
+ if self.vocab_schemes:
191
+ valid_schemes = set(self.vocab_schemes.keys())
192
+ else:
193
+ valid_schemes = set()
194
+ fund_ref = "fundref"
195
+ if self.funder_fundref_doi_prefix:
196
+ valid_schemes.add(fund_ref)
197
+ for identifier in record.get("external_ids"):
198
+ scheme = identifier["type"]
199
+ if scheme in valid_schemes:
200
+ value = identifier.get("preferred") or identifier.get("all")[0]
201
+ if scheme == fund_ref:
202
+ if self.funder_fundref_doi_prefix:
203
+ value = f"{self.funder_fundref_doi_prefix}/{value}"
204
+ scheme = "doi"
205
+ ror["identifiers"].append(
206
+ {
207
+ "identifier": value,
208
+ "scheme": scheme,
209
+ }
210
+ )
211
+
212
+ stream_entry.entry = ror
213
+ return stream_entry
214
+
215
+
216
+ VOCABULARIES_DATASTREAM_TRANSFORMERS = {
217
+ "ror": RORTransformer,
218
+ }
219
+
220
+ VOCABULARIES_DATASTREAM_WRITERS = {}
@@ -1,6 +1,6 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
- # Copyright (C) 2022 CERN.
3
+ # Copyright (C) 2022-2024 CERN.
4
4
  #
5
5
  # Invenio-Vocabularies is free software; you can redistribute it and/or
6
6
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -9,6 +9,7 @@
9
9
  """Vocabulary funders configuration."""
10
10
 
11
11
  from flask import current_app
12
+ from invenio_i18n import get_locale
12
13
  from invenio_i18n import lazy_gettext as _
13
14
  from invenio_records_resources.services import SearchOptions
14
15
  from invenio_records_resources.services.records.components import DataComponent
@@ -22,6 +23,7 @@ funder_schemes = LocalProxy(lambda: current_app.config["VOCABULARIES_FUNDER_SCHE
22
23
  funder_fundref_doi_prefix = LocalProxy(
23
24
  lambda: current_app.config["VOCABULARIES_FUNDER_DOI_PREFIX"]
24
25
  )
26
+ localized_title = LocalProxy(lambda: f"title.{get_locale()}^20")
25
27
 
26
28
 
27
29
  class FundersSearchOptions(SearchOptions):
@@ -30,8 +32,15 @@ class FundersSearchOptions(SearchOptions):
30
32
  suggest_parser_cls = SuggestQueryParser.factory(
31
33
  fields=[
32
34
  "name^100",
35
+ "acronym.keyword^100",
36
+ "acronym^40",
37
+ localized_title,
38
+ "id^20",
39
+ "aliases^20",
33
40
  "identifiers.identifier^10",
34
- ]
41
+ ],
42
+ type="most_fields", # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html#multi-match-types
43
+ fuzziness="AUTO", # https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#fuzziness
35
44
  )
36
45
 
37
46
  sort_default = "bestmatch"