invenio-vocabularies 2.3.1__py2.py3-none-any.whl → 6.3.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of invenio-vocabularies might be problematic. Click here for more details.

Files changed (165) hide show
  1. invenio_vocabularies/__init__.py +2 -2
  2. invenio_vocabularies/administration/__init__.py +10 -0
  3. invenio_vocabularies/administration/views/__init__.py +10 -0
  4. invenio_vocabularies/administration/views/vocabularies.py +45 -0
  5. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/package.json +1 -7
  6. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/CustomAwardForm.js +80 -64
  7. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingField.js +49 -41
  8. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingModal.js +5 -7
  9. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/NoAwardResults.js +3 -3
  10. invenio_vocabularies/cli.py +31 -44
  11. invenio_vocabularies/config.py +68 -4
  12. invenio_vocabularies/contrib/affiliations/affiliations.py +11 -0
  13. invenio_vocabularies/contrib/affiliations/api.py +1 -2
  14. invenio_vocabularies/contrib/affiliations/config.py +13 -2
  15. invenio_vocabularies/contrib/affiliations/datastreams.py +186 -0
  16. invenio_vocabularies/contrib/affiliations/jsonschemas/affiliations/affiliation-v1.0.0.json +38 -1
  17. invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v1.0.0.json +22 -1
  18. invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v2.0.0.json +171 -0
  19. invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v1.0.0.json +22 -1
  20. invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v2.0.0.json +171 -0
  21. invenio_vocabularies/contrib/affiliations/mappings/v7/affiliations/affiliation-v1.0.0.json +22 -1
  22. invenio_vocabularies/contrib/affiliations/schema.py +17 -3
  23. invenio_vocabularies/contrib/affiliations/services.py +1 -2
  24. invenio_vocabularies/contrib/awards/awards.py +17 -5
  25. invenio_vocabularies/contrib/awards/datastreams.py +241 -7
  26. invenio_vocabularies/contrib/awards/jsonschemas/awards/award-v1.0.0.json +38 -0
  27. invenio_vocabularies/contrib/awards/mappings/os-v1/awards/award-v1.0.0.json +51 -2
  28. invenio_vocabularies/contrib/awards/mappings/os-v2/awards/award-v1.0.0.json +51 -2
  29. invenio_vocabularies/contrib/awards/mappings/v7/awards/award-v1.0.0.json +51 -2
  30. invenio_vocabularies/contrib/awards/schema.py +16 -1
  31. invenio_vocabularies/contrib/awards/serializer.py +8 -1
  32. invenio_vocabularies/contrib/awards/services.py +1 -2
  33. invenio_vocabularies/contrib/common/__init__.py +9 -0
  34. invenio_vocabularies/contrib/common/openaire/__init__.py +9 -0
  35. invenio_vocabularies/contrib/common/openaire/datastreams.py +84 -0
  36. invenio_vocabularies/contrib/common/ror/__init__.py +9 -0
  37. invenio_vocabularies/contrib/common/ror/datastreams.py +220 -0
  38. invenio_vocabularies/contrib/funders/config.py +11 -2
  39. invenio_vocabularies/contrib/funders/datastreams.py +40 -62
  40. invenio_vocabularies/contrib/funders/funders.py +3 -1
  41. invenio_vocabularies/contrib/funders/jsonschemas/funders/funder-v1.0.0.json +36 -1
  42. invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v1.0.0.json +22 -1
  43. invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v2.0.0.json +156 -0
  44. invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v1.0.0.json +22 -1
  45. invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v2.0.0.json +156 -0
  46. invenio_vocabularies/contrib/funders/mappings/v7/funders/funder-v1.0.0.json +22 -1
  47. invenio_vocabularies/contrib/funders/schema.py +8 -0
  48. invenio_vocabularies/contrib/funders/serializer.py +2 -1
  49. invenio_vocabularies/contrib/names/config.py +5 -3
  50. invenio_vocabularies/contrib/names/datastreams.py +172 -4
  51. invenio_vocabularies/contrib/names/jsonschemas/names/name-v1.0.0.json +3 -0
  52. invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v1.0.0.json +3 -0
  53. invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v2.0.0.json +150 -0
  54. invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v1.0.0.json +3 -0
  55. invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v2.0.0.json +150 -0
  56. invenio_vocabularies/contrib/names/mappings/v7/names/name-v1.0.0.json +3 -0
  57. invenio_vocabularies/contrib/names/names.py +15 -3
  58. invenio_vocabularies/contrib/names/permissions.py +20 -0
  59. invenio_vocabularies/contrib/names/s3client.py +44 -0
  60. invenio_vocabularies/contrib/names/schema.py +14 -0
  61. invenio_vocabularies/contrib/subjects/config.py +9 -3
  62. invenio_vocabularies/contrib/subjects/datastreams.py +61 -0
  63. invenio_vocabularies/contrib/subjects/euroscivoc/__init__.py +9 -0
  64. invenio_vocabularies/contrib/subjects/euroscivoc/datastreams.py +171 -0
  65. invenio_vocabularies/contrib/subjects/jsonschemas/subjects/subject-v1.0.0.json +31 -0
  66. invenio_vocabularies/contrib/subjects/mappings/os-v1/subjects/subject-v1.0.0.json +35 -0
  67. invenio_vocabularies/contrib/subjects/mappings/os-v2/subjects/subject-v1.0.0.json +35 -0
  68. invenio_vocabularies/contrib/subjects/mappings/v7/subjects/subject-v1.0.0.json +35 -0
  69. invenio_vocabularies/contrib/subjects/mesh/__init__.py +9 -0
  70. invenio_vocabularies/contrib/subjects/mesh/datastreams.py +43 -0
  71. invenio_vocabularies/contrib/subjects/schema.py +47 -5
  72. invenio_vocabularies/contrib/subjects/subjects.py +10 -0
  73. invenio_vocabularies/datastreams/datastreams.py +61 -13
  74. invenio_vocabularies/datastreams/factories.py +1 -2
  75. invenio_vocabularies/datastreams/readers.py +138 -29
  76. invenio_vocabularies/datastreams/tasks.py +37 -0
  77. invenio_vocabularies/datastreams/transformers.py +17 -27
  78. invenio_vocabularies/datastreams/writers.py +116 -14
  79. invenio_vocabularies/datastreams/xml.py +34 -0
  80. invenio_vocabularies/ext.py +59 -5
  81. invenio_vocabularies/factories.py +137 -0
  82. invenio_vocabularies/jobs.py +133 -0
  83. invenio_vocabularies/proxies.py +2 -2
  84. invenio_vocabularies/records/jsonschemas/vocabularies/definitions-v1.0.0.json +7 -0
  85. invenio_vocabularies/records/jsonschemas/vocabularies/vocabulary-v1.0.0.json +1 -4
  86. invenio_vocabularies/records/mappings/os-v1/vocabularies/vocabulary-v1.0.0.json +3 -3
  87. invenio_vocabularies/records/mappings/os-v2/vocabularies/vocabulary-v1.0.0.json +3 -3
  88. invenio_vocabularies/records/mappings/v7/vocabularies/vocabulary-v1.0.0.json +3 -3
  89. invenio_vocabularies/records/models.py +2 -4
  90. invenio_vocabularies/records/pidprovider.py +1 -2
  91. invenio_vocabularies/records/systemfields/relations.py +2 -2
  92. invenio_vocabularies/resources/__init__.py +9 -1
  93. invenio_vocabularies/resources/config.py +105 -0
  94. invenio_vocabularies/resources/resource.py +31 -41
  95. invenio_vocabularies/resources/schema.py +2 -1
  96. invenio_vocabularies/services/__init__.py +5 -2
  97. invenio_vocabularies/services/config.py +179 -0
  98. invenio_vocabularies/services/custom_fields/__init__.py +6 -2
  99. invenio_vocabularies/services/custom_fields/subject.py +82 -0
  100. invenio_vocabularies/services/custom_fields/vocabulary.py +5 -3
  101. invenio_vocabularies/services/permissions.py +3 -1
  102. invenio_vocabularies/services/results.py +110 -0
  103. invenio_vocabularies/services/schema.py +11 -2
  104. invenio_vocabularies/services/service.py +46 -94
  105. invenio_vocabularies/services/tasks.py +1 -1
  106. invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/subjects.html +23 -0
  107. invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabularies-list.html +12 -0
  108. invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabulary-details.html +71 -0
  109. invenio_vocabularies/translations/af/LC_MESSAGES/messages.mo +0 -0
  110. invenio_vocabularies/translations/ar/LC_MESSAGES/messages.mo +0 -0
  111. invenio_vocabularies/translations/bg/LC_MESSAGES/messages.mo +0 -0
  112. invenio_vocabularies/translations/ca/LC_MESSAGES/messages.mo +0 -0
  113. invenio_vocabularies/translations/cs/LC_MESSAGES/messages.mo +0 -0
  114. invenio_vocabularies/translations/da/LC_MESSAGES/messages.mo +0 -0
  115. invenio_vocabularies/translations/de/LC_MESSAGES/messages.mo +0 -0
  116. invenio_vocabularies/translations/de_AT/LC_MESSAGES/messages.mo +0 -0
  117. invenio_vocabularies/translations/de_DE/LC_MESSAGES/messages.mo +0 -0
  118. invenio_vocabularies/translations/el/LC_MESSAGES/messages.mo +0 -0
  119. invenio_vocabularies/translations/en/LC_MESSAGES/messages.mo +0 -0
  120. invenio_vocabularies/translations/en_AT/LC_MESSAGES/messages.mo +0 -0
  121. invenio_vocabularies/translations/en_HU/LC_MESSAGES/messages.mo +0 -0
  122. invenio_vocabularies/translations/es/LC_MESSAGES/messages.mo +0 -0
  123. invenio_vocabularies/translations/es_CU/LC_MESSAGES/messages.mo +0 -0
  124. invenio_vocabularies/translations/es_MX/LC_MESSAGES/messages.mo +0 -0
  125. invenio_vocabularies/translations/et/LC_MESSAGES/messages.mo +0 -0
  126. invenio_vocabularies/translations/et_EE/LC_MESSAGES/messages.mo +0 -0
  127. invenio_vocabularies/translations/fa/LC_MESSAGES/messages.mo +0 -0
  128. invenio_vocabularies/translations/fa_IR/LC_MESSAGES/messages.mo +0 -0
  129. invenio_vocabularies/translations/fr/LC_MESSAGES/messages.mo +0 -0
  130. invenio_vocabularies/translations/fr_CI/LC_MESSAGES/messages.mo +0 -0
  131. invenio_vocabularies/translations/fr_FR/LC_MESSAGES/messages.mo +0 -0
  132. invenio_vocabularies/translations/gl/LC_MESSAGES/messages.mo +0 -0
  133. invenio_vocabularies/translations/hi_IN/LC_MESSAGES/messages.mo +0 -0
  134. invenio_vocabularies/translations/hr/LC_MESSAGES/messages.mo +0 -0
  135. invenio_vocabularies/translations/hu/LC_MESSAGES/messages.mo +0 -0
  136. invenio_vocabularies/translations/hu_HU/LC_MESSAGES/messages.mo +0 -0
  137. invenio_vocabularies/translations/it/LC_MESSAGES/messages.mo +0 -0
  138. invenio_vocabularies/translations/ja/LC_MESSAGES/messages.mo +0 -0
  139. invenio_vocabularies/translations/ka/LC_MESSAGES/messages.mo +0 -0
  140. invenio_vocabularies/translations/lt/LC_MESSAGES/messages.mo +0 -0
  141. invenio_vocabularies/translations/messages.pot +95 -48
  142. invenio_vocabularies/translations/ne/LC_MESSAGES/messages.mo +0 -0
  143. invenio_vocabularies/translations/no/LC_MESSAGES/messages.mo +0 -0
  144. invenio_vocabularies/translations/pl/LC_MESSAGES/messages.mo +0 -0
  145. invenio_vocabularies/translations/pt/LC_MESSAGES/messages.mo +0 -0
  146. invenio_vocabularies/translations/ro/LC_MESSAGES/messages.mo +0 -0
  147. invenio_vocabularies/translations/ru/LC_MESSAGES/messages.mo +0 -0
  148. invenio_vocabularies/translations/rw/LC_MESSAGES/messages.mo +0 -0
  149. invenio_vocabularies/translations/sk/LC_MESSAGES/messages.mo +0 -0
  150. invenio_vocabularies/translations/sv/LC_MESSAGES/messages.mo +0 -0
  151. invenio_vocabularies/translations/sv_SE/LC_MESSAGES/messages.mo +0 -0
  152. invenio_vocabularies/translations/tr/LC_MESSAGES/messages.mo +0 -0
  153. invenio_vocabularies/translations/uk/LC_MESSAGES/messages.mo +0 -0
  154. invenio_vocabularies/translations/uk_UA/LC_MESSAGES/messages.mo +0 -0
  155. invenio_vocabularies/translations/zh_CN/LC_MESSAGES/messages.mo +0 -0
  156. invenio_vocabularies/translations/zh_TW/LC_MESSAGES/messages.mo +0 -0
  157. invenio_vocabularies/views.py +12 -26
  158. invenio_vocabularies/webpack.py +3 -3
  159. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/METADATA +150 -6
  160. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/RECORD +165 -132
  161. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/WHEEL +1 -1
  162. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/entry_points.txt +17 -0
  163. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/AUTHORS.rst +0 -0
  164. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/LICENSE +0 -0
  165. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,44 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # This file is part of Invenio.
4
+ # Copyright (C) 2024 CERN.
5
+ #
6
+ # Invenio-Vocabularies is free software; you can redistribute it and/or
7
+ # modify it under the terms of the MIT License; see LICENSE file for more
8
+ # details.
9
+
10
+ """S3 client."""
11
+
12
+ from flask import current_app
13
+
14
+ try:
15
+ import s3fs
16
+ except ImportError:
17
+ s3fs = None
18
+
19
+
20
+ class S3Client:
21
+ """S3 client."""
22
+
23
+ def __init__(self, access_key, secret_key):
24
+ """Constructor."""
25
+ if s3fs is None:
26
+ raise Exception("s3fs is not installed.")
27
+
28
+ self.fs = s3fs.S3FileSystem(key=access_key, secret=secret_key)
29
+
30
+ def read_file(self, s3_path):
31
+ """Reads a file from S3."""
32
+ with self.fs.open(s3_path, "rb") as f:
33
+ return f.read()
34
+
35
+
36
+ class S3OrcidClient(S3Client):
37
+ """S3 ORCiD client."""
38
+
39
+ def __init__(self):
40
+ """Constructor."""
41
+ super().__init__(
42
+ access_key=current_app.config["VOCABULARIES_ORCID_ACCESS_KEY"],
43
+ secret_key=current_app.config["VOCABULARIES_ORCID_SECRET_KEY"],
44
+ )
@@ -56,6 +56,20 @@ class NameSchema(BaseVocabularySchema, ModePIDFieldVocabularyMixin):
56
56
  ]
57
57
  raise ValidationError({"family_name": messages})
58
58
 
59
+ @validates_schema
60
+ def validate_affiliatons(self, data, **kwargs):
61
+ """Validate names."""
62
+ affiliations = data.get("affiliations", [])
63
+ seen_names = set()
64
+ for affiliation in affiliations:
65
+ name = affiliation.get("name")
66
+ if not affiliation.get("id") and name:
67
+ if name in seen_names:
68
+ messages = [_("Duplicated affiliations.")]
69
+ raise ValidationError({"affiliations": messages})
70
+ else:
71
+ seen_names.add(name)
72
+
59
73
  @post_load
60
74
  def update_name(self, data, **kwargs):
61
75
  """Update names for person.
@@ -2,6 +2,7 @@
2
2
  #
3
3
  # Copyright (C) 2021 CERN.
4
4
  # Copyright (C) 2021 Northwestern University.
5
+ # Copyright (C) 2024 University of Münster.
5
6
  #
6
7
  # Invenio-Vocabularies is free software; you can redistribute it and/or
7
8
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -9,13 +10,19 @@
9
10
 
10
11
  """Subjects configuration."""
11
12
 
13
+ from flask import current_app
12
14
  from invenio_i18n import lazy_gettext as _
13
15
  from invenio_records_resources.services import SearchOptions
14
16
  from invenio_records_resources.services.records.components import DataComponent
17
+ from werkzeug.local import LocalProxy
15
18
 
16
19
  from ...services.components import PIDComponent
17
20
  from ...services.querystr import FilteredSuggestQueryParser
18
21
 
22
+ subject_schemes = LocalProxy(
23
+ lambda: current_app.config["VOCABULARIES_SUBJECTS_SCHEMES"]
24
+ )
25
+
19
26
 
20
27
  class SubjectsSearchOptions(SearchOptions):
21
28
  """Search options."""
@@ -23,9 +30,8 @@ class SubjectsSearchOptions(SearchOptions):
23
30
  suggest_parser_cls = FilteredSuggestQueryParser.factory(
24
31
  filter_field="scheme",
25
32
  fields=[ # suggest fields
26
- "subject^100",
27
- "subject._2gram",
28
- "subject._3gram",
33
+ "title.*^100",
34
+ "synonyms^20",
29
35
  ],
30
36
  )
31
37
 
@@ -0,0 +1,61 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2024 University of Münster.
4
+ #
5
+ # Invenio-Vocabularies is free software; you can redistribute it and/or
6
+ # modify it under the terms of the MIT License; see LICENSE file for more
7
+ # details.
8
+
9
+ """Names datastreams, transformers, writers and readers."""
10
+
11
+ from invenio_access.permissions import system_identity
12
+ from invenio_i18n import lazy_gettext as _
13
+
14
+ from ...datastreams.writers import ServiceWriter
15
+ from .euroscivoc import datastreams as euroscivoc_datastreams
16
+ from .mesh import datastreams as mesh_datastreams
17
+
18
+
19
+ class SubjectsServiceWriter(ServiceWriter):
20
+ """Subjects Service Writer."""
21
+
22
+ def __init__(self, *args, **kwargs):
23
+ """Constructor."""
24
+ service_or_name = kwargs.pop("service_or_name", "subjects")
25
+ super().__init__(service_or_name=service_or_name, *args, **kwargs)
26
+
27
+ def _entry_id(self, entry):
28
+ """Get the id from an entry."""
29
+ return entry["id"]
30
+
31
+
32
+ VOCABULARIES_DATASTREAM_READERS = {
33
+ **mesh_datastreams.VOCABULARIES_DATASTREAM_READERS,
34
+ **euroscivoc_datastreams.VOCABULARIES_DATASTREAM_READERS,
35
+ }
36
+ """Subjects Data Streams readers."""
37
+
38
+ VOCABULARIES_DATASTREAM_TRANSFORMERS = {
39
+ **mesh_datastreams.VOCABULARIES_DATASTREAM_TRANSFORMERS,
40
+ **euroscivoc_datastreams.VOCABULARIES_DATASTREAM_TRANSFORMERS,
41
+ }
42
+ """Subjects Data Streams transformers."""
43
+
44
+ VOCABULARIES_DATASTREAM_WRITERS = {
45
+ "subjects-service": SubjectsServiceWriter,
46
+ **mesh_datastreams.VOCABULARIES_DATASTREAM_WRITERS,
47
+ **euroscivoc_datastreams.VOCABULARIES_DATASTREAM_WRITERS,
48
+ }
49
+ """Subjects Data Streams writers."""
50
+
51
+ DATASTREAM_CONFIG = {
52
+ "readers": [
53
+ {"type": "yaml"},
54
+ ],
55
+ "writers": [
56
+ {
57
+ "type": "subjects-service",
58
+ }
59
+ ],
60
+ }
61
+ """Data Stream configuration."""
@@ -0,0 +1,9 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2024 CERN.
4
+ #
5
+ # Invenio-Vocabularies is free software; you can redistribute it and/or
6
+ # modify it under the terms of the MIT License; see LICENSE file for more
7
+ # details.
8
+
9
+ """EuroSciVoc Subjects module."""
@@ -0,0 +1,171 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2022-2024 CERN.
4
+ #
5
+ # Invenio-Vocabularies is free software; you can redistribute it and/or
6
+ # modify it under the terms of the MIT License; see LICENSE file for more
7
+ # details.
8
+
9
+ """EuroSciVoc subjects datastreams, readers, transformers, and writers."""
10
+
11
+ import io
12
+ from collections import namedtuple
13
+
14
+ import requests
15
+ from rdflib import OWL, RDF, Graph, Namespace
16
+
17
+ from invenio_vocabularies.config import SUBJECTS_EUROSCIVOC_FILE_URL
18
+ from invenio_vocabularies.datastreams.readers import BaseReader
19
+ from invenio_vocabularies.datastreams.transformers import BaseTransformer
20
+
21
+
22
+ class EuroSciVocSubjectsHTTPReader(BaseReader):
23
+ """Reader class to fetch and process EuroSciVoc RDF data."""
24
+
25
+ def __init__(self, origin=None, mode="r", since=None, *args, **kwargs):
26
+ """Initialize the reader with the data source.
27
+
28
+ :param origin: The URL from which to fetch the RDF data.
29
+ :param mode: Mode of operation (default is 'r' for reading).
30
+ """
31
+ self.origin = origin or SUBJECTS_EUROSCIVOC_FILE_URL
32
+ super().__init__(origin=origin, mode=mode, *args, **kwargs)
33
+
34
+ def _iter(self, rdf_graph):
35
+ """Iterate over the RDF graph, yielding one subject at a time.
36
+
37
+ :param rdf_graph: The RDF graph to process.
38
+ :yield: Subject and graph to be transformed.
39
+ """
40
+ SKOS_CORE = Namespace("http://www.w3.org/2004/02/skos/core#")
41
+
42
+ for subject, _, _ in rdf_graph.triples((None, RDF.type, SKOS_CORE.Concept)):
43
+ yield {"subject": subject, "rdf_graph": rdf_graph}
44
+
45
+ def read(self, item=None, *args, **kwargs):
46
+ """Fetch and process the EuroSciVoc RDF data, yielding it one subject at a time.
47
+
48
+ :param item: The RDF data provided as bytes (optional).
49
+ :yield: Processed EuroSciVoc subject data.
50
+ """
51
+ if item:
52
+ raise NotImplementedError(
53
+ "EuroSciVocSubjectsHTTPReader does not support being chained after another reader"
54
+ )
55
+ # Fetch the RDF data from the specified origin URL
56
+ response = requests.get(self.origin)
57
+ response.raise_for_status()
58
+
59
+ # Treat the response content as a file-like object
60
+ rdf_data = io.BytesIO(response.content)
61
+
62
+ # Parse the RDF data into a graph
63
+ rdf_graph = Graph()
64
+ rdf_graph.parse(rdf_data, format="xml")
65
+
66
+ # Yield each processed subject from the RDF graph
67
+ yield from self._iter(rdf_graph)
68
+
69
+
70
+ class EuroSciVocSubjectsTransformer(BaseTransformer):
71
+ """Transformer class to convert EuroSciVoc RDF data to a dictionary format."""
72
+
73
+ SKOS_CORE = Namespace("http://www.w3.org/2004/02/skos/core#")
74
+ SPLITCHAR = ","
75
+
76
+ def _get_notation(self, subject, rdf_graph):
77
+ """Extract the numeric notation for a subject."""
78
+ for _, _, notation in rdf_graph.triples(
79
+ (subject, self.SKOS_CORE.notation, None)
80
+ ):
81
+ if str(notation).isdigit():
82
+ return str(notation)
83
+ return None
84
+
85
+ def _get_labels(self, subject, rdf_graph):
86
+ """Extract prefLabel and altLabel languages for a subject."""
87
+ labels = {
88
+ label.language: label.value.capitalize()
89
+ for _, _, label in rdf_graph.triples(
90
+ (subject, self.SKOS_CORE.prefLabel, None)
91
+ )
92
+ }
93
+ if "en" not in labels:
94
+ for _, _, label in rdf_graph.triples(
95
+ (subject, self.SKOS_CORE.altLabel, None)
96
+ ):
97
+ labels.setdefault(label.language, label.value.capitalize())
98
+ return labels
99
+
100
+ def _find_parents(self, subject, rdf_graph):
101
+ """Find parent notations."""
102
+ parents = []
103
+
104
+ # Traverse the broader hierarchy
105
+ for broader in rdf_graph.transitive_objects(subject, self.SKOS_CORE.broader):
106
+ if broader != subject: # Ensure we don't include the current subject
107
+ parent_notation = self._get_notation(broader, rdf_graph)
108
+ if parent_notation:
109
+ parents.append(parent_notation)
110
+
111
+ return parents
112
+
113
+ def _transform_entry(self, subject, rdf_graph):
114
+ """Transform an entry to the required dictionary format."""
115
+ # Get subject notation with euroscivoc prefix
116
+ notation = self._get_notation(subject, rdf_graph)
117
+ id = f"euroscivoc:{notation}" if notation else None
118
+ # Get labels for the current subject
119
+ labels = self._get_labels(subject, rdf_graph)
120
+ # Join parent notations with SPLITCHAR separator and add euroscivoc prefix
121
+ parents = self.SPLITCHAR.join(
122
+ f"euroscivoc:{n}" for n in reversed(self._find_parents(subject, rdf_graph))
123
+ )
124
+ # Create identifiers list
125
+ identifiers = [{"scheme": "url", "identifier": str(subject)}]
126
+
127
+ return {
128
+ "id": id,
129
+ "scheme": "EuroSciVoc",
130
+ "subject": labels.get("en", "").capitalize(),
131
+ "title": labels,
132
+ "props": {"parents": parents} if parents else {},
133
+ "identifiers": identifiers,
134
+ }
135
+
136
+ def apply(self, stream_entry, *args, **kwargs):
137
+ """Transform a stream entry to the required dictionary format.
138
+
139
+ :param stream_entry: The entry to be transformed, which includes the subject and the RDF graph.
140
+ :return: The transformed stream entry.
141
+ """
142
+ # Apply transformations
143
+ entry_data = self._transform_entry(
144
+ stream_entry.entry["subject"], stream_entry.entry["rdf_graph"]
145
+ )
146
+ stream_entry.entry = entry_data
147
+ return stream_entry
148
+
149
+
150
+ # Configuration for datastream readers, transformers, and writers
151
+ VOCABULARIES_DATASTREAM_READERS = {"euroscivoc-reader": EuroSciVocSubjectsHTTPReader}
152
+
153
+ VOCABULARIES_DATASTREAM_WRITERS = {}
154
+
155
+ VOCABULARIES_DATASTREAM_TRANSFORMERS = {
156
+ "euroscivoc-transformer": EuroSciVocSubjectsTransformer
157
+ }
158
+
159
+ DATASTREAM_CONFIG = {
160
+ "readers": [
161
+ {
162
+ "type": "euroscivoc-reader",
163
+ }
164
+ ],
165
+ "transformers": [{"type": "euroscivoc-transformer"}],
166
+ "writers": [
167
+ {
168
+ "type": "subjects-service",
169
+ }
170
+ ],
171
+ }
@@ -8,6 +8,9 @@
8
8
  "$schema": {
9
9
  "$ref": "local://definitions-v1.0.0.json#/$schema"
10
10
  },
11
+ "tags": {
12
+ "$ref": "local://vocabularies/definitions-v1.0.0.json#/tags"
13
+ },
11
14
  "id": {
12
15
  "description": "URI or classification code as identifier - globally unique among all subject schemes.",
13
16
  "$ref": "local://definitions-v1.0.0.json#/identifier"
@@ -22,6 +25,34 @@
22
25
  "subject": {
23
26
  "description": "Human readable label.",
24
27
  "type": "string"
28
+ },
29
+ "title": {
30
+ "description": "Human readable label in different languages.",
31
+ "$ref": "local://vocabularies/definitions-v1.0.0.json#/title"
32
+ },
33
+ "props": {
34
+ "type": "object",
35
+ "patternProperties": {
36
+ "^.*$": {
37
+ "type": "string"
38
+ }
39
+ }
40
+ },
41
+ "identifiers": {
42
+ "description": "Alternate identifiers for the subject.",
43
+ "type": "array",
44
+ "items": {
45
+ "$ref": "local://definitions-v2.0.0.json#/identifiers_with_scheme"
46
+ },
47
+ "uniqueItems": true
48
+ },
49
+ "synonyms": {
50
+ "description": "Synonyms of the subject label.",
51
+ "type": "array",
52
+ "items": {
53
+ "type": "string"
54
+ },
55
+ "uniqueItems": true
25
56
  }
26
57
  }
27
58
  }
@@ -1,5 +1,16 @@
1
1
  {
2
2
  "mappings": {
3
+ "dynamic_templates": [
4
+ {
5
+ "i18n_title": {
6
+ "path_match": "title.*",
7
+ "match_mapping_type": "string",
8
+ "mapping": {
9
+ "type": "search_as_you_type"
10
+ }
11
+ }
12
+ }
13
+ ],
3
14
  "dynamic": "strict",
4
15
  "properties": {
5
16
  "$schema": {
@@ -55,6 +66,30 @@
55
66
  "type": "keyword"
56
67
  }
57
68
  }
69
+ },
70
+ "title": {
71
+ "type": "object",
72
+ "dynamic": "true"
73
+ },
74
+ "props": {
75
+ "type": "object",
76
+ "dynamic": "true"
77
+ },
78
+ "identifiers": {
79
+ "properties": {
80
+ "identifier": {
81
+ "type": "keyword"
82
+ },
83
+ "scheme": {
84
+ "type": "keyword"
85
+ }
86
+ }
87
+ },
88
+ "synonyms": {
89
+ "type": "text"
90
+ },
91
+ "tags": {
92
+ "type": "keyword"
58
93
  }
59
94
  }
60
95
  }
@@ -1,5 +1,16 @@
1
1
  {
2
2
  "mappings": {
3
+ "dynamic_templates": [
4
+ {
5
+ "i18n_title": {
6
+ "path_match": "title.*",
7
+ "match_mapping_type": "string",
8
+ "mapping": {
9
+ "type": "search_as_you_type"
10
+ }
11
+ }
12
+ }
13
+ ],
3
14
  "dynamic": "strict",
4
15
  "properties": {
5
16
  "$schema": {
@@ -55,6 +66,30 @@
55
66
  "type": "keyword"
56
67
  }
57
68
  }
69
+ },
70
+ "title": {
71
+ "type": "object",
72
+ "dynamic": "true"
73
+ },
74
+ "synonyms": {
75
+ "type": "text"
76
+ },
77
+ "props": {
78
+ "type": "object",
79
+ "dynamic": "true"
80
+ },
81
+ "identifiers": {
82
+ "properties": {
83
+ "identifier": {
84
+ "type": "keyword"
85
+ },
86
+ "scheme": {
87
+ "type": "keyword"
88
+ }
89
+ }
90
+ },
91
+ "tags": {
92
+ "type": "keyword"
58
93
  }
59
94
  }
60
95
  }
@@ -1,5 +1,16 @@
1
1
  {
2
2
  "mappings": {
3
+ "dynamic_templates": [
4
+ {
5
+ "i18n_title": {
6
+ "path_match": "title.*",
7
+ "match_mapping_type": "string",
8
+ "mapping": {
9
+ "type": "search_as_you_type"
10
+ }
11
+ }
12
+ }
13
+ ],
3
14
  "dynamic": "strict",
4
15
  "properties": {
5
16
  "$schema": {
@@ -55,6 +66,30 @@
55
66
  "type": "keyword"
56
67
  }
57
68
  }
69
+ },
70
+ "title": {
71
+ "type": "object",
72
+ "dynamic": "true"
73
+ },
74
+ "props": {
75
+ "type": "object",
76
+ "dynamic": "true"
77
+ },
78
+ "identifiers": {
79
+ "properties": {
80
+ "identifier": {
81
+ "type": "keyword"
82
+ },
83
+ "scheme": {
84
+ "type": "keyword"
85
+ }
86
+ }
87
+ },
88
+ "synonyms": {
89
+ "type": "text"
90
+ },
91
+ "tags": {
92
+ "type": "keyword"
58
93
  }
59
94
  }
60
95
  }
@@ -0,0 +1,9 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2024 CERN.
4
+ #
5
+ # Invenio-Vocabularies is free software; you can redistribute it and/or
6
+ # modify it under the terms of the MIT License; see LICENSE file for more
7
+ # details.
8
+
9
+ """MeSH Subjects module."""
@@ -0,0 +1,43 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2022-2024 CERN.
4
+ # Copyright (C) 2024 California Institute of Technology.
5
+ #
6
+ # Invenio-Vocabularies is free software; you can redistribute it and/or
7
+ # modify it under the terms of the MIT License; see LICENSE file for more
8
+ # details.
9
+
10
+ """MeSH subjects datastreams, transformers, writers and readers."""
11
+
12
+ from invenio_vocabularies.datastreams.transformers import (
13
+ BaseTransformer,
14
+ TransformerError,
15
+ )
16
+
17
+
18
+ class MeshSubjectsTransformer(BaseTransformer):
19
+ """MeSH subjects Transformer."""
20
+
21
+ def apply(self, stream_entry, *args, **kwargs):
22
+ """Apply transformation on steam entry."""
23
+ entry_data = stream_entry.entry
24
+
25
+ # ID in MeSH data is the URL, ex. https://id.nlm.nih.gov/mesh/D000001
26
+ # We just want to use the ID prefixed by "mesh:""
27
+ try:
28
+ mesh_id = entry_data["id"].split("/")[-1]
29
+ except Exception:
30
+ raise TransformerError("Not a valid MeSH ID.")
31
+
32
+ entry_data["id"] = "mesh:" + mesh_id
33
+ return stream_entry
34
+
35
+
36
+ VOCABULARIES_DATASTREAM_READERS = {}
37
+ """MeSH datastream readers."""
38
+
39
+ VOCABULARIES_DATASTREAM_WRITERS = {}
40
+ """MeSH subject datastream writers."""
41
+
42
+ VOCABULARIES_DATASTREAM_TRANSFORMERS = {"mesh-subjects": MeshSubjectsTransformer}
43
+ """MeSH subjects datastream transformers."""
@@ -1,7 +1,8 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
3
  # Copyright (C) 2021 Northwestern University.
4
- # Copyright (C) 2021-2022 CERN.
4
+ # Copyright (C) 2021-2024 CERN.
5
+ # Copyright (C) 2024 University of Münster.
5
6
  #
6
7
  # Invenio-Vocabularies is free software; you can redistribute it and/or
7
8
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -9,10 +10,19 @@
9
10
 
10
11
  """Subjects schema."""
11
12
 
12
- from invenio_i18n import lazy_gettext as _
13
- from marshmallow_utils.fields import SanitizedUnicode
13
+ from functools import partial
14
14
 
15
- from ...services.schema import BaseVocabularySchema, ContribVocabularyRelationSchema
15
+ from invenio_i18n import get_locale
16
+ from marshmallow import Schema, fields, pre_load
17
+ from marshmallow_utils.fields import IdentifierSet, SanitizedUnicode
18
+ from marshmallow_utils.schemas import IdentifierSchema
19
+
20
+ from ...services.schema import (
21
+ BaseVocabularySchema,
22
+ ContribVocabularyRelationSchema,
23
+ i18n_strings,
24
+ )
25
+ from .config import subject_schemes
16
26
 
17
27
 
18
28
  class SubjectSchema(BaseVocabularySchema):
@@ -24,6 +34,26 @@ class SubjectSchema(BaseVocabularySchema):
24
34
  id = SanitizedUnicode(required=True)
25
35
  scheme = SanitizedUnicode(required=True)
26
36
  subject = SanitizedUnicode(required=True)
37
+ title = i18n_strings
38
+ props = fields.Dict(keys=SanitizedUnicode(), values=SanitizedUnicode())
39
+ identifiers = IdentifierSet(
40
+ fields.Nested(
41
+ partial(
42
+ IdentifierSchema,
43
+ allowed_schemes=subject_schemes,
44
+ identifier_required=False,
45
+ )
46
+ )
47
+ )
48
+ synonyms = fields.List(SanitizedUnicode())
49
+
50
+ @pre_load
51
+ def add_subject_from_title(self, data, **kwargs):
52
+ """Add subject from title if not present."""
53
+ locale = get_locale().language
54
+ if "subject" not in data:
55
+ data["subject"] = data["title"].get(locale) or data["title"].values()[0]
56
+ return data
27
57
 
28
58
 
29
59
  class SubjectRelationSchema(ContribVocabularyRelationSchema):
@@ -32,4 +62,16 @@ class SubjectRelationSchema(ContribVocabularyRelationSchema):
32
62
  ftf_name = "subject"
33
63
  parent_field_name = "subjects"
34
64
  subject = SanitizedUnicode()
35
- scheme = SanitizedUnicode()
65
+ scheme = SanitizedUnicode(dump_only=True)
66
+ title = fields.Dict(dump_only=True)
67
+ props = fields.Dict(dump_only=True)
68
+ identifiers = IdentifierSet(
69
+ fields.Nested(
70
+ partial(
71
+ IdentifierSchema,
72
+ allowed_schemes=subject_schemes,
73
+ identifier_required=False,
74
+ )
75
+ )
76
+ )
77
+ synonyms = fields.List(SanitizedUnicode(), dump_only=True)