invenio-vocabularies 3.4.2__py2.py3-none-any.whl → 4.1.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of invenio-vocabularies might be problematic. Click here for more details.

Files changed (69) hide show
  1. invenio_vocabularies/__init__.py +1 -1
  2. invenio_vocabularies/administration/__init__.py +10 -0
  3. invenio_vocabularies/administration/views/__init__.py +10 -0
  4. invenio_vocabularies/administration/views/vocabularies.py +44 -0
  5. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/CustomAwardForm.js +8 -20
  6. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingField.js +2 -2
  7. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingModal.js +5 -7
  8. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/NoAwardResults.js +3 -3
  9. invenio_vocabularies/cli.py +10 -39
  10. invenio_vocabularies/config.py +33 -3
  11. invenio_vocabularies/contrib/affiliations/config.py +2 -2
  12. invenio_vocabularies/contrib/affiliations/datastreams.py +67 -0
  13. invenio_vocabularies/contrib/affiliations/jsonschemas/affiliations/affiliation-v1.0.0.json +38 -1
  14. invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v1.0.0.json +21 -0
  15. invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v1.0.0.json +21 -0
  16. invenio_vocabularies/contrib/affiliations/mappings/v7/affiliations/affiliation-v1.0.0.json +21 -0
  17. invenio_vocabularies/contrib/affiliations/schema.py +17 -3
  18. invenio_vocabularies/contrib/awards/datastreams.py +90 -3
  19. invenio_vocabularies/contrib/awards/jsonschemas/awards/award-v1.0.0.json +3 -0
  20. invenio_vocabularies/contrib/awards/mappings/os-v1/awards/award-v1.0.0.json +3 -0
  21. invenio_vocabularies/contrib/awards/mappings/os-v2/awards/award-v1.0.0.json +3 -0
  22. invenio_vocabularies/contrib/awards/mappings/v7/awards/award-v1.0.0.json +3 -0
  23. invenio_vocabularies/contrib/common/__init__.py +9 -0
  24. invenio_vocabularies/contrib/common/ror/__init__.py +9 -0
  25. invenio_vocabularies/contrib/common/ror/datastreams.py +166 -0
  26. invenio_vocabularies/contrib/funders/config.py +2 -0
  27. invenio_vocabularies/contrib/funders/datastreams.py +10 -59
  28. invenio_vocabularies/contrib/funders/jsonschemas/funders/funder-v1.0.0.json +36 -1
  29. invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v1.0.0.json +21 -0
  30. invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v1.0.0.json +21 -0
  31. invenio_vocabularies/contrib/funders/mappings/v7/funders/funder-v1.0.0.json +21 -0
  32. invenio_vocabularies/contrib/funders/schema.py +8 -0
  33. invenio_vocabularies/contrib/funders/serializer.py +2 -1
  34. invenio_vocabularies/contrib/names/jsonschemas/names/name-v1.0.0.json +3 -0
  35. invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v1.0.0.json +3 -0
  36. invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v1.0.0.json +3 -0
  37. invenio_vocabularies/contrib/names/mappings/v7/names/name-v1.0.0.json +3 -0
  38. invenio_vocabularies/contrib/subjects/jsonschemas/subjects/subject-v1.0.0.json +3 -0
  39. invenio_vocabularies/contrib/subjects/mappings/os-v1/subjects/subject-v1.0.0.json +3 -0
  40. invenio_vocabularies/contrib/subjects/mappings/os-v2/subjects/subject-v1.0.0.json +3 -0
  41. invenio_vocabularies/contrib/subjects/mappings/v7/subjects/subject-v1.0.0.json +3 -0
  42. invenio_vocabularies/datastreams/factories.py +1 -2
  43. invenio_vocabularies/datastreams/readers.py +96 -3
  44. invenio_vocabularies/datastreams/writers.py +2 -2
  45. invenio_vocabularies/ext.py +22 -7
  46. invenio_vocabularies/factories.py +93 -0
  47. invenio_vocabularies/proxies.py +2 -2
  48. invenio_vocabularies/records/jsonschemas/vocabularies/definitions-v1.0.0.json +7 -0
  49. invenio_vocabularies/records/jsonschemas/vocabularies/vocabulary-v1.0.0.json +1 -4
  50. invenio_vocabularies/resources/__init__.py +8 -1
  51. invenio_vocabularies/resources/config.py +105 -0
  52. invenio_vocabularies/resources/resource.py +31 -41
  53. invenio_vocabularies/services/__init__.py +5 -2
  54. invenio_vocabularies/services/config.py +179 -0
  55. invenio_vocabularies/services/permissions.py +3 -1
  56. invenio_vocabularies/services/results.py +110 -0
  57. invenio_vocabularies/services/schema.py +1 -1
  58. invenio_vocabularies/services/service.py +41 -86
  59. invenio_vocabularies/services/tasks.py +31 -1
  60. invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabularies-list.html +12 -0
  61. invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabulary-details.html +71 -0
  62. invenio_vocabularies/views.py +7 -0
  63. {invenio_vocabularies-3.4.2.dist-info → invenio_vocabularies-4.1.1.dist-info}/METADATA +32 -7
  64. {invenio_vocabularies-3.4.2.dist-info → invenio_vocabularies-4.1.1.dist-info}/RECORD +69 -56
  65. {invenio_vocabularies-3.4.2.dist-info → invenio_vocabularies-4.1.1.dist-info}/entry_points.txt +7 -0
  66. {invenio_vocabularies-3.4.2.dist-info → invenio_vocabularies-4.1.1.dist-info}/AUTHORS.rst +0 -0
  67. {invenio_vocabularies-3.4.2.dist-info → invenio_vocabularies-4.1.1.dist-info}/LICENSE +0 -0
  68. {invenio_vocabularies-3.4.2.dist-info → invenio_vocabularies-4.1.1.dist-info}/WHEEL +0 -0
  69. {invenio_vocabularies-3.4.2.dist-info → invenio_vocabularies-4.1.1.dist-info}/top_level.txt +0 -0
@@ -1,22 +1,79 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
- # Copyright (C) 2022 CERN.
3
+ # Copyright (C) 2022-2024 CERN.
4
4
  #
5
5
  # Invenio-Vocabularies is free software; you can redistribute it and/or
6
6
  # modify it under the terms of the MIT License; see LICENSE file for more
7
7
  # details.
8
8
 
9
9
  """Awards datastreams, transformers, writers and readers."""
10
+ import io
10
11
 
12
+ import requests
11
13
  from invenio_access.permissions import system_identity
12
14
  from invenio_i18n import lazy_gettext as _
13
15
 
14
- from ...datastreams.errors import TransformerError
16
+ from ...datastreams.errors import ReaderError, TransformerError
17
+ from ...datastreams.readers import BaseReader
15
18
  from ...datastreams.transformers import BaseTransformer
16
19
  from ...datastreams.writers import ServiceWriter
17
20
  from .config import awards_ec_ror_id, awards_openaire_funders_mapping
18
21
 
19
22
 
23
+ class OpenAIREProjectHTTPReader(BaseReader):
24
+ """OpenAIRE Project HTTP Reader returning an in-memory binary stream of the latest OpenAIRE Graph Dataset project tar file."""
25
+
26
+ def _iter(self, fp, *args, **kwargs):
27
+ raise NotImplementedError(
28
+ "OpenAIREProjectHTTPReader downloads one file and therefore does not iterate through items"
29
+ )
30
+
31
+ def read(self, item=None, *args, **kwargs):
32
+ """Reads the latest OpenAIRE Graph Dataset project tar file from Zenodo and yields an in-memory binary stream of it."""
33
+ if item:
34
+ raise NotImplementedError(
35
+ "OpenAIREProjectHTTPReader does not support being chained after another reader"
36
+ )
37
+
38
+ if self._origin == "full":
39
+ # OpenAIRE Graph Dataset
40
+ api_url = "https://zenodo.org/api/records/3516917"
41
+ elif self._origin == "diff":
42
+ # OpenAIRE Graph dataset: new collected projects
43
+ api_url = "https://zenodo.org/api/records/6419021"
44
+ else:
45
+ raise ReaderError("The --origin option should be either 'full' or 'diff'")
46
+
47
+ # Call the signposting `linkset+json` endpoint for the Concept DOI (i.e. latest version) of the OpenAIRE Graph Dataset.
48
+ # See: https://github.com/inveniosoftware/rfcs/blob/master/rfcs/rdm-0071-signposting.md#provide-an-applicationlinksetjson-endpoint
49
+ headers = {"Accept": "application/linkset+json"}
50
+ api_resp = requests.get(api_url, headers=headers)
51
+ api_resp.raise_for_status()
52
+
53
+ # Extract the Landing page Link Set Object located as the first (index 0) item.
54
+ landing_page_linkset = api_resp.json()["linkset"][0]
55
+
56
+ # Extract the URL of the only project tar file linked to the record.
57
+ landing_page_project_tar_items = [
58
+ item
59
+ for item in landing_page_linkset["item"]
60
+ if item["type"] == "application/x-tar"
61
+ and item["href"].endswith("/project.tar")
62
+ ]
63
+ if len(landing_page_project_tar_items) != 1:
64
+ raise ReaderError(
65
+ f"Expected 1 project tar item but got {len(landing_page_project_tar_items)}"
66
+ )
67
+ file_url = landing_page_project_tar_items[0]["href"]
68
+
69
+ # Download the project tar file and fully load the response bytes content in memory.
70
+ # The bytes content are then wrapped by a BytesIO to be file-like object (as required by `tarfile.open`).
71
+ # Using directly `file_resp.raw` is not possible since `tarfile.open` requires the file-like object to be seekable.
72
+ file_resp = requests.get(file_url)
73
+ file_resp.raise_for_status()
74
+ yield io.BytesIO(file_resp.content)
75
+
76
+
20
77
  class AwardsServiceWriter(ServiceWriter):
21
78
  """Funders service writer."""
22
79
 
@@ -39,7 +96,20 @@ class OpenAIREProjectTransformer(BaseTransformer):
39
96
  award = {}
40
97
 
41
98
  code = record["code"]
42
- openaire_funder_prefix = record["id"].split("::")[0].split("|")[1]
99
+
100
+ # The `id` should follow the format `sourcePrefix::md5(localId)` where `sourcePrefix` is 12 characters long.
101
+ # See: https://graph.openaire.eu/docs/data-model/pids-and-identifiers#identifiers-in-the-graph
102
+ #
103
+ # The format of `id` in the full OpenAIRE Graph Dataset (https://doi.org/10.5281/zenodo.3516917)
104
+ # follows this format (e.g. 'abc_________::0123456789abcdef0123456789abcdef').
105
+ # However, the format of `id` in the new collected projects dataset (https://doi.org/10.5281/zenodo.6419021)
106
+ # does not follow this format, and has a `40|` prefix (e.g. '40|abc_________::0123456789abcdef0123456789abcdef').
107
+ #
108
+ # The number '40' corresponds to the entity types 'Project'.
109
+ # See: https://ec.europa.eu/research/participants/documents/downloadPublic?documentIds=080166e5a3a1a213&appId=PPGMS
110
+ # See: https://graph.openaire.eu/docs/5.0.0/data-model/entities/project#id
111
+ openaire_funder_prefix = record["id"].split("::", 1)[0].split("|", 1)[-1]
112
+
43
113
  funder_id = awards_openaire_funders_mapping.get(openaire_funder_prefix)
44
114
  if funder_id is None:
45
115
  raise TransformerError(
@@ -78,7 +148,20 @@ class OpenAIREProjectTransformer(BaseTransformer):
78
148
  award["identifiers"] = identifiers
79
149
 
80
150
  award["number"] = code
151
+
152
+ # `title` is a mandatory attribute of the `Project` object in the OpenAIRE Graph Data Model.
153
+ # See: https://graph.openaire.eu/docs/data-model/entities/project#title
154
+ # However, 15'000+ awards for the FCT funder (and 1 record the NIH funder) are missing a title attribute.
155
+ if "title" not in record:
156
+ raise TransformerError(
157
+ _(
158
+ "Missing title attribute for award {award_id}".format(
159
+ award_id=award["id"]
160
+ )
161
+ )
162
+ )
81
163
  award["title"] = {"en": record["title"]}
164
+
82
165
  award["funder"] = {"id": funder_id}
83
166
  acronym = record.get("acronym")
84
167
  if acronym:
@@ -88,6 +171,10 @@ class OpenAIREProjectTransformer(BaseTransformer):
88
171
  return stream_entry
89
172
 
90
173
 
174
+ VOCABULARIES_DATASTREAM_READERS = {
175
+ "openaire-project-http": OpenAIREProjectHTTPReader,
176
+ }
177
+
91
178
  VOCABULARIES_DATASTREAM_TRANSFORMERS = {
92
179
  "openaire-award": OpenAIREProjectTransformer,
93
180
  }
@@ -7,6 +7,9 @@
7
7
  "$schema": {
8
8
  "$ref": "local://definitions-v1.0.0.json#/$schema"
9
9
  },
10
+ "tags": {
11
+ "$ref": "local://vocabularies/definitions-v1.0.0.json#/tags"
12
+ },
10
13
  "identifiers": {
11
14
  "description": "Alternate identifiers for the award.",
12
15
  "type": "array",
@@ -49,6 +49,9 @@
49
49
  "type": "object",
50
50
  "dynamic": "true"
51
51
  },
52
+ "tags": {
53
+ "type": "keyword"
54
+ },
52
55
  "number": {
53
56
  "type": "keyword"
54
57
  },
@@ -49,6 +49,9 @@
49
49
  "type": "object",
50
50
  "dynamic": "true"
51
51
  },
52
+ "tags": {
53
+ "type": "keyword"
54
+ },
52
55
  "number": {
53
56
  "type": "keyword"
54
57
  },
@@ -49,6 +49,9 @@
49
49
  "type": "object",
50
50
  "dynamic": "true"
51
51
  },
52
+ "tags": {
53
+ "type": "keyword"
54
+ },
52
55
  "number": {
53
56
  "type": "keyword"
54
57
  },
@@ -0,0 +1,9 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2024 CERN.
4
+ #
5
+ # Invenio-Vocabularies is free software; you can redistribute it and/or
6
+ # modify it under the terms of the MIT License; see LICENSE file for more
7
+ # details.
8
+
9
+ """Vocabularies common module."""
@@ -0,0 +1,9 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2024 CERN.
4
+ #
5
+ # Invenio-Vocabularies is free software; you can redistribute it and/or
6
+ # modify it under the terms of the MIT License; see LICENSE file for more
7
+ # details.
8
+
9
+ """ROR-related module."""
@@ -0,0 +1,166 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2024 CERN.
4
+ # Copyright (C) 2024 California Institute of Technology.
5
+ #
6
+ # Invenio-Vocabularies is free software; you can redistribute it and/or
7
+ # modify it under the terms of the MIT License; see LICENSE file for more
8
+ # details.
9
+
10
+ """ROR-related Datastreams Readers/Writers/Transformers module."""
11
+
12
+ import io
13
+
14
+ import requests
15
+ from idutils import normalize_ror
16
+
17
+ from invenio_vocabularies.datastreams.errors import ReaderError, TransformerError
18
+ from invenio_vocabularies.datastreams.readers import BaseReader
19
+ from invenio_vocabularies.datastreams.transformers import BaseTransformer
20
+
21
+
22
+ class RORHTTPReader(BaseReader):
23
+ """ROR HTTP Reader returning an in-memory binary stream of the latest ROR data dump ZIP file."""
24
+
25
+ def _iter(self, fp, *args, **kwargs):
26
+ raise NotImplementedError(
27
+ "RORHTTPReader downloads one file and therefore does not iterate through items"
28
+ )
29
+
30
+ def read(self, item=None, *args, **kwargs):
31
+ """Reads the latest ROR data dump ZIP file from Zenodo and yields an in-memory binary stream of it."""
32
+ if item:
33
+ raise NotImplementedError(
34
+ "RORHTTPReader does not support being chained after another reader"
35
+ )
36
+
37
+ # Call the signposting `linkset+json` endpoint for the Concept DOI (i.e. latest version) of the ROR data dump.
38
+ # See: https://github.com/inveniosoftware/rfcs/blob/master/rfcs/rdm-0071-signposting.md#provide-an-applicationlinksetjson-endpoint
39
+ headers = {"Accept": "application/linkset+json"}
40
+ api_url = "https://zenodo.org/api/records/6347574"
41
+ api_resp = requests.get(api_url, headers=headers)
42
+ api_resp.raise_for_status()
43
+
44
+ # Extract the Landing page Link Set Object located as the first (index 0) item.
45
+ landing_page_linkset = api_resp.json()["linkset"][0]
46
+
47
+ # Extract the URL of the only ZIP file linked to the record.
48
+ landing_page_zip_items = [
49
+ item
50
+ for item in landing_page_linkset["item"]
51
+ if item["type"] == "application/zip"
52
+ ]
53
+ if len(landing_page_zip_items) != 1:
54
+ raise ReaderError(
55
+ f"Expected 1 ZIP item but got {len(landing_page_zip_items)}"
56
+ )
57
+ file_url = landing_page_zip_items[0]["href"]
58
+
59
+ # Download the ZIP file and fully load the response bytes content in memory.
60
+ # The bytes content are then wrapped by a BytesIO to be file-like object (as required by `zipfile.ZipFile`).
61
+ # Using directly `file_resp.raw` is not possible since `zipfile.ZipFile` requires the file-like object to be seekable.
62
+ file_resp = requests.get(file_url)
63
+ file_resp.raise_for_status()
64
+ yield io.BytesIO(file_resp.content)
65
+
66
+
67
+ VOCABULARIES_DATASTREAM_READERS = {
68
+ "ror-http": RORHTTPReader,
69
+ }
70
+
71
+
72
+ class RORTransformer(BaseTransformer):
73
+ """Transforms a JSON ROR record into a funders record."""
74
+
75
+ def __init__(
76
+ self, *args, vocab_schemes=None, funder_fundref_doi_prefix=None, **kwargs
77
+ ):
78
+ """Initializes the transformer."""
79
+ self.vocab_schemes = vocab_schemes
80
+ self.funder_fundref_doi_prefix = funder_fundref_doi_prefix
81
+ super().__init__(*args, **kwargs)
82
+
83
+ def apply(self, stream_entry, **kwargs):
84
+ """Applies the transformation to the stream entry."""
85
+ record = stream_entry.entry
86
+ ror = {}
87
+ ror["title"] = {}
88
+
89
+ ror["id"] = normalize_ror(record.get("id"))
90
+ if not ror["id"]:
91
+ raise TransformerError(_("Id not found in ROR entry."))
92
+
93
+ # Using set so aliases are unique
94
+ aliases = set()
95
+ acronym = None
96
+ for name in record.get("names"):
97
+ lang = name.get("lang", "en")
98
+ if lang == None:
99
+ lang = "en"
100
+ if "ror_display" in name["types"]:
101
+ ror["name"] = name["value"]
102
+ if "label" in name["types"]:
103
+ ror["title"][lang] = name["value"]
104
+ if "alias" in name["types"]:
105
+ aliases.add(name["value"])
106
+ if "acronym" in name["types"]:
107
+ # The first acronyn goes in acronym field to maintain
108
+ # compatability with existing data structure
109
+ if not acronym:
110
+ acronym = name["value"]
111
+ else:
112
+ aliases.add(name["value"])
113
+ if acronym:
114
+ ror["acronym"] = acronym
115
+ if aliases:
116
+ ror["aliases"] = list(aliases)
117
+
118
+ # ror_display is required and should be in every entry
119
+ if not ror["name"]:
120
+ raise TransformerError(
121
+ _("Name with type ror_display not found in ROR entry.")
122
+ )
123
+
124
+ # This only gets the first location, to maintain compatability
125
+ # with existing data structure
126
+ location = record.get("locations", [{}])[0].get("geonames_details", {})
127
+ ror["country"] = location.get("country_code")
128
+ ror["country_name"] = location.get("country_name")
129
+ ror["location_name"] = location.get("name")
130
+
131
+ ror["types"] = record.get("types")
132
+
133
+ status = record.get("status")
134
+ ror["status"] = status
135
+
136
+ # The ROR is always listed in identifiers, expected by serialization
137
+ ror["identifiers"] = [{"identifier": ror["id"], "scheme": "ror"}]
138
+ if self.vocab_schemes:
139
+ valid_schemes = set(self.vocab_schemes.keys())
140
+ else:
141
+ valid_schemes = set()
142
+ fund_ref = "fundref"
143
+ if self.funder_fundref_doi_prefix:
144
+ valid_schemes.add(fund_ref)
145
+ for identifier in record.get("external_ids"):
146
+ scheme = identifier["type"]
147
+ if scheme in valid_schemes:
148
+ value = identifier.get("preferred") or identifier.get("all")[0]
149
+ if scheme == fund_ref:
150
+ if self.funder_fundref_doi_prefix:
151
+ value = f"{self.funder_fundref_doi_prefix}/{value}"
152
+ scheme = "doi"
153
+ ror["identifiers"].append(
154
+ {
155
+ "identifier": value,
156
+ "scheme": scheme,
157
+ }
158
+ )
159
+
160
+ stream_entry.entry = ror
161
+ return stream_entry
162
+
163
+
164
+ VOCABULARIES_DATASTREAM_TRANSFORMERS = {
165
+ "ror": RORTransformer,
166
+ }
@@ -31,6 +31,8 @@ class FundersSearchOptions(SearchOptions):
31
31
  fields=[
32
32
  "name^100",
33
33
  "identifiers.identifier^10",
34
+ "acronym^10",
35
+ "aliases^10",
34
36
  ]
35
37
  )
36
38
 
@@ -1,6 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
- # Copyright (C) 2022 CERN.
3
+ # Copyright (C) 2022-2024 CERN.
4
+ # Copyright (C) 2024 California Institute of Technology.
4
5
  #
5
6
  # Invenio-Vocabularies is free software; you can redistribute it and/or
6
7
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -12,8 +13,6 @@ from idutils import normalize_ror
12
13
  from invenio_access.permissions import system_identity
13
14
  from invenio_i18n import lazy_gettext as _
14
15
 
15
- from ...datastreams.errors import TransformerError
16
- from ...datastreams.transformers import BaseTransformer
17
16
  from ...datastreams.writers import ServiceWriter
18
17
  from .config import funder_fundref_doi_prefix, funder_schemes
19
18
 
@@ -31,60 +30,6 @@ class FundersServiceWriter(ServiceWriter):
31
30
  return entry["id"]
32
31
 
33
32
 
34
- class RORTransformer(BaseTransformer):
35
- """Transforms a JSON ROR record into a funders record."""
36
-
37
- def apply(self, stream_entry, **kwargs):
38
- """Applies the transformation to the stream entry."""
39
- record = stream_entry.entry
40
- funder = {}
41
-
42
- funder["id"] = normalize_ror(record.get("id"))
43
- if not funder["id"]:
44
- raise TransformerError(_("Id not found in ROR entry."))
45
-
46
- funder["name"] = record.get("name")
47
- if not funder["name"]:
48
- raise TransformerError(_("Name not found in ROR entry."))
49
-
50
- country_code = record.get("country", {}).get("country_code")
51
- if country_code:
52
- funder["country"] = country_code
53
-
54
- funder["title"] = {"en": funder["name"]}
55
- for label in record.get("labels", []):
56
- funder["title"][label["iso639"]] = label["label"]
57
-
58
- # The ROR is always listed in identifiers, expected by serialization
59
- funder["identifiers"] = [{"identifier": funder["id"], "scheme": "ror"}]
60
- valid_schemes = set(funder_schemes.keys())
61
- fund_ref = "fundref"
62
- valid_schemes.add(fund_ref)
63
- for scheme, identifier in record.get("external_ids", {}).items():
64
- scheme = scheme.lower()
65
- if scheme in valid_schemes:
66
- value = identifier.get("preferred") or identifier.get("all")[0]
67
- if scheme == fund_ref:
68
- value = f"{funder_fundref_doi_prefix}/{value}"
69
- scheme = "doi"
70
-
71
- funder["identifiers"].append(
72
- {
73
- "identifier": value,
74
- "scheme": scheme,
75
- }
76
- )
77
-
78
- stream_entry.entry = funder
79
- return stream_entry
80
-
81
-
82
- VOCABULARIES_DATASTREAM_TRANSFORMERS = {
83
- "ror-funder": RORTransformer,
84
- }
85
- """ROR Data Streams transformers."""
86
-
87
-
88
33
  VOCABULARIES_DATASTREAM_WRITERS = {
89
34
  "funders-service": FundersServiceWriter,
90
35
  }
@@ -96,13 +41,19 @@ DATASTREAM_CONFIG = {
96
41
  {
97
42
  "type": "zip",
98
43
  "args": {
99
- "regex": "(?<!_schema_v2)\\.json$",
44
+ "regex": "_schema_v2\\.json$",
100
45
  },
101
46
  },
102
47
  {"type": "json"},
103
48
  ],
104
49
  "transformers": [
105
- {"type": "ror-funder"},
50
+ {
51
+ "type": "ror",
52
+ "args": {
53
+ "vocab_schemes": funder_schemes,
54
+ "funder_fundref_doi_prefix": funder_fundref_doi_prefix,
55
+ },
56
+ },
106
57
  ],
107
58
  "writers": [
108
59
  {
@@ -7,9 +7,20 @@
7
7
  "$schema": {
8
8
  "$ref": "local://definitions-v1.0.0.json#/$schema"
9
9
  },
10
+ "tags": {
11
+ "$ref": "local://vocabularies/definitions-v1.0.0.json#/tags"
12
+ },
10
13
  "country": {
11
14
  "type": "string",
12
- "description": "Represents a funder's origin country."
15
+ "description": "Represents a funder's origin country as a country code."
16
+ },
17
+ "country_name": {
18
+ "type": "string",
19
+ "description": "Represents a funder's origin country as a full name."
20
+ },
21
+ "location_name": {
22
+ "type": "string",
23
+ "description": "Represents a funder's location name (usually a city)."
13
24
  },
14
25
  "identifiers": {
15
26
  "description": "Alternate identifiers for the record.",
@@ -23,6 +34,30 @@
23
34
  "type": "string",
24
35
  "description": "Funders name."
25
36
  },
37
+ "acronym": {
38
+ "type": "string",
39
+ "description": "Acronym for funders name."
40
+ },
41
+ "status": {
42
+ "type": "string",
43
+ "description": "Status of the funder."
44
+ },
45
+ "aliases": {
46
+ "description": "Alternate names for the funder.",
47
+ "type": "array",
48
+ "items": {
49
+ "type": "string"
50
+ },
51
+ "uniqueItems": true
52
+ },
53
+ "types": {
54
+ "description": "Types of funders.",
55
+ "type": "array",
56
+ "items": {
57
+ "type": "string"
58
+ },
59
+ "uniqueItems": true
60
+ },
26
61
  "title": {
27
62
  "$ref": "local://vocabularies/definitions-v1.0.0.json#/title"
28
63
  }
@@ -57,12 +57,33 @@
57
57
  "country": {
58
58
  "type": "text"
59
59
  },
60
+ "country_name": {
61
+ "type": "text"
62
+ },
63
+ "location_name": {
64
+ "type": "text"
65
+ },
66
+ "acronym": {
67
+ "type": "text"
68
+ },
69
+ "status": {
70
+ "type": "keyword"
71
+ },
72
+ "aliases": {
73
+ "type": "text"
74
+ },
75
+ "types": {
76
+ "type": "keyword"
77
+ },
60
78
  "id": {
61
79
  "type": "keyword"
62
80
  },
63
81
  "title": {
64
82
  "type": "object",
65
83
  "dynamic": "true"
84
+ },
85
+ "tags": {
86
+ "type": "keyword"
66
87
  }
67
88
  }
68
89
  }
@@ -57,12 +57,33 @@
57
57
  "country": {
58
58
  "type": "text"
59
59
  },
60
+ "country_name": {
61
+ "type": "text"
62
+ },
63
+ "location_name": {
64
+ "type": "text"
65
+ },
66
+ "acronym": {
67
+ "type": "text"
68
+ },
69
+ "status": {
70
+ "type": "keyword"
71
+ },
72
+ "aliases": {
73
+ "type": "text"
74
+ },
75
+ "types": {
76
+ "type": "keyword"
77
+ },
60
78
  "id": {
61
79
  "type": "keyword"
62
80
  },
63
81
  "title": {
64
82
  "type": "object",
65
83
  "dynamic": "true"
84
+ },
85
+ "tags": {
86
+ "type": "keyword"
66
87
  }
67
88
  }
68
89
  }
@@ -57,12 +57,33 @@
57
57
  "country": {
58
58
  "type": "text"
59
59
  },
60
+ "country_name": {
61
+ "type": "text"
62
+ },
63
+ "location_name": {
64
+ "type": "text"
65
+ },
66
+ "acronym": {
67
+ "type": "text"
68
+ },
69
+ "status": {
70
+ "type": "keyword"
71
+ },
72
+ "aliases": {
73
+ "type": "text"
74
+ },
75
+ "types": {
76
+ "type": "keyword"
77
+ },
60
78
  "id": {
61
79
  "type": "keyword"
62
80
  },
63
81
  "title": {
64
82
  "type": "object",
65
83
  "dynamic": "true"
84
+ },
85
+ "tags": {
86
+ "type": "keyword"
66
87
  }
67
88
  }
68
89
  }
@@ -1,6 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
3
  # Copyright (C) 2021-2022 CERN.
4
+ # Copyright (C) 2024 California Institute of Technology.
4
5
  #
5
6
  # Invenio-Vocabularies is free software; you can redistribute it and/or
6
7
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -43,6 +44,8 @@ class FunderSchema(BaseVocabularySchema):
43
44
  required=True, validate=validate.Length(min=1, error=_("Name cannot be blank."))
44
45
  )
45
46
  country = SanitizedUnicode()
47
+ country_name = SanitizedUnicode()
48
+ location_name = SanitizedUnicode()
46
49
  identifiers = IdentifierSet(
47
50
  fields.Nested(
48
51
  partial(
@@ -57,6 +60,11 @@ class FunderSchema(BaseVocabularySchema):
57
60
  validate=validate.Length(min=1, error=_("PID cannot be blank."))
58
61
  )
59
62
 
63
+ acronym = SanitizedUnicode()
64
+ aliases = fields.List(SanitizedUnicode())
65
+ status = SanitizedUnicode()
66
+ types = fields.List(SanitizedUnicode())
67
+
60
68
  @validates_schema
61
69
  def validate_id(self, data, **kwargs):
62
70
  """Validates ID."""
@@ -1,6 +1,6 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
- # Copyright (C) 2022 CERN.
3
+ # Copyright (C) 2022-2024 CERN.
4
4
  #
5
5
  # Invenio-Vocabularies is free software; you can redistribute it and/or
6
6
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -29,4 +29,5 @@ class FunderL10NItemSchema(Schema):
29
29
  props = fields.Dict(dump_only=True)
30
30
  name = fields.String(dump_only=True)
31
31
  country = fields.String(dump_only=True)
32
+ country_name = fields.String(dump_only=True)
32
33
  identifiers = fields.List(fields.Nested(IdentifierSchema), dump_only=True)