invenio-vocabularies 6.3.1__py2.py3-none-any.whl → 6.4.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of invenio-vocabularies might be problematic. Click here for more details.

@@ -10,6 +10,6 @@
10
10
 
11
11
  from .ext import InvenioVocabularies
12
12
 
13
- __version__ = "6.3.1"
13
+ __version__ = "6.4.1"
14
14
 
15
15
  __all__ = ("__version__", "InvenioVocabularies")
@@ -9,8 +9,11 @@
9
9
 
10
10
  """Affiliations datastreams, transformers, writers and readers."""
11
11
 
12
+ from copy import deepcopy
13
+
12
14
  from flask import current_app
13
15
 
16
+ from ...datastreams import StreamEntry
14
17
  from ...datastreams.errors import TransformerError, WriterError
15
18
  from ...datastreams.transformers import BaseTransformer
16
19
  from ...datastreams.writers import ServiceWriter
@@ -96,15 +99,24 @@ class OpenAIREAffiliationsServiceWriter(ServiceWriter):
96
99
  """Get the id from an entry."""
97
100
  return entry["id"]
98
101
 
99
- def write(self, stream_entry, *args, **kwargs):
100
- """Writes the input entry using a given service."""
101
- entry = stream_entry.entry
102
-
103
- return super().write(stream_entry, *args, **kwargs)
104
-
105
- def write_many(self, stream_entries, *args, **kwargs):
106
- """Writes the input entries using a given service."""
107
- return super().write_many(stream_entries, *args, **kwargs)
102
+ def _do_update(self, entry):
103
+ vocab_id = self._entry_id(entry)
104
+ current = self._resolve(vocab_id)
105
+ updated = deepcopy(current.to_dict())
106
+
107
+ if "identifiers" in entry:
108
+ # For each new identifier
109
+ for new_identifier in entry["identifiers"]:
110
+ # Either find an existing identifier with the same scheme and update the "identifier" value
111
+ for existing_identifier in updated["identifiers"]:
112
+ if existing_identifier["scheme"] == new_identifier["scheme"]:
113
+ existing_identifier["identifier"] = new_identifier["identifier"]
114
+ break
115
+ # Or add the new identifier to the list of identifiers
116
+ else:
117
+ updated["identifiers"].append(new_identifier)
118
+
119
+ return StreamEntry(self._service.update(self._identity, vocab_id, updated))
108
120
 
109
121
 
110
122
  VOCABULARIES_DATASTREAM_READERS = {}
@@ -74,7 +74,10 @@ class OpenAIREProjectTransformer(BaseTransformer):
74
74
 
75
75
  funding = next(iter(record.get("funding", [])), None)
76
76
  if funding:
77
- program = funding.get("fundingStream", {}).get("id", "")
77
+ funding_stream_id = funding.get("fundingStream", {}).get("id", "")
78
+ # Example funding stream ID: `EC::HE::HORIZON-AG-UN`. We need the `HE`
79
+ # string, i.e. the second "part" of the identifier.
80
+ program = next(iter(funding_stream_id.split("::")[1:2]), "")
78
81
  if program:
79
82
  award["program"] = program
80
83
 
@@ -13,7 +13,7 @@
13
13
  from functools import partial
14
14
 
15
15
  from invenio_i18n import get_locale
16
- from marshmallow import Schema, fields, pre_load
16
+ from marshmallow import EXCLUDE, Schema, fields, pre_load
17
17
  from marshmallow_utils.fields import IdentifierSet, SanitizedUnicode
18
18
  from marshmallow_utils.schemas import IdentifierSchema
19
19
 
@@ -59,6 +59,14 @@ class SubjectSchema(BaseVocabularySchema):
59
59
  class SubjectRelationSchema(ContribVocabularyRelationSchema):
60
60
  """Schema to define an optional subject relation in another schema."""
61
61
 
62
+ # If re-running an OpenAIRE awards update on existing awards which already have subjects,
63
+ # the subject entries will contains `scheme` and `props`, which are unknown since they are `dump_only`.
64
+ # This makes the update exclude unknown field and go through with the update.
65
+ class Meta:
66
+ """Metadata class."""
67
+
68
+ unknown = EXCLUDE
69
+
62
70
  ftf_name = "subject"
63
71
  parent_field_name = "subjects"
64
72
  subject = SanitizedUnicode()
@@ -87,23 +87,8 @@ class ServiceWriter(BaseWriter):
87
87
  def _do_update(self, entry):
88
88
  vocab_id = self._entry_id(entry)
89
89
  current = self._resolve(vocab_id)
90
- combined_dict = current.to_dict()
91
-
92
- # Update fields from entry
93
- for key, value in entry.items():
94
- if key in combined_dict:
95
- if isinstance(combined_dict[key], list) and isinstance(value, list):
96
- combined_dict[key].extend(
97
- item for item in value if item not in combined_dict[key]
98
- )
99
- else:
100
- combined_dict[key] = value
101
- else:
102
- combined_dict[key] = value
103
-
104
- return StreamEntry(
105
- self._service.update(self._identity, vocab_id, combined_dict)
106
- )
90
+ updated = dict(current.to_dict(), **entry)
91
+ return StreamEntry(self._service.update(self._identity, vocab_id, updated))
107
92
 
108
93
  def write(self, stream_entry, *args, **kwargs):
109
94
  """Writes the input entry using a given service."""
@@ -131,3 +131,66 @@ class ProcessRORFundersJob(ProcessDataStreamJob):
131
131
  "transformers": [{"type": "ror-funders"}],
132
132
  }
133
133
  }
134
+
135
+
136
+ class ImportAwardsOpenAIREJob(ProcessDataStreamJob):
137
+ """Import awards from OpenAIRE registered task."""
138
+
139
+ description = "Import awards from OpenAIRE"
140
+ title = "Import Awards OpenAIRE"
141
+ id = "import_awards_openaire"
142
+
143
+ @classmethod
144
+ def default_args(cls, job_obj, **kwargs):
145
+ """Generate default job arguments."""
146
+ return {
147
+ "config": {
148
+ "readers": [
149
+ {
150
+ "type": "openaire-http",
151
+ "args": {"origin": "diff", "tar_href": "/project.tar"},
152
+ },
153
+ {
154
+ "type": "tar",
155
+ "args": {
156
+ "mode": "r",
157
+ "regex": "\\.json.gz$",
158
+ },
159
+ },
160
+ {"type": "gzip"},
161
+ {"type": "jsonl"},
162
+ ],
163
+ "transformers": [{"type": "openaire-award"}],
164
+ "writers": [
165
+ {"args": {"writer": {"type": "awards-service"}}, "type": "async"}
166
+ ],
167
+ }
168
+ }
169
+
170
+
171
+ class UpdateAwardsCordisJob(ProcessDataStreamJob):
172
+ """Update awards from CORDIS registered task."""
173
+
174
+ description = "Update awards from CORDIS"
175
+ title = "Update Awards CORDIS"
176
+ id = "update_awards_cordis"
177
+
178
+ @classmethod
179
+ def default_args(cls, job_obj, **kwargs):
180
+ """Generate default job arguments."""
181
+ return {
182
+ "config": {
183
+ "readers": [
184
+ {"args": {"origin": "HE"}, "type": "cordis-project-http"},
185
+ {"args": {"mode": "r", "regex": "\\.xml$"}, "type": "zip"},
186
+ {"args": {"root_element": "project"}, "type": "xml"},
187
+ ],
188
+ "transformers": [{"type": "cordis-award"}],
189
+ "writers": [
190
+ {
191
+ "args": {"writer": {"type": "cordis-awards-service"}},
192
+ "type": "async",
193
+ }
194
+ ],
195
+ }
196
+ }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: invenio-vocabularies
3
- Version: 6.3.1
3
+ Version: 6.4.1
4
4
  Summary: Invenio module for managing vocabularies.
5
5
  Home-page: https://github.com/inveniosoftware/invenio-vocabularies
6
6
  Author: CERN
@@ -83,6 +83,16 @@ https://invenio-vocabularies.readthedocs.io/
83
83
  Changes
84
84
  =======
85
85
 
86
+ Version v6.4.1 (released 2024-10-15)
87
+
88
+ - fix: exclude unknown fields when updating awards with subjects
89
+ - fix: revert generic writer and define OpenAIRE awards writer logic
90
+
91
+ Version v6.4.0 (released 2024-10-15)
92
+
93
+ - jobs: add import awards OpenAIRE; Update CORDIS
94
+ - awards: rollback to use the 2nd part of funding stream as program
95
+
86
96
  Version v6.3.1 (released 2024-10-11)
87
97
 
88
98
  - jobs: pass since as string to task
@@ -1,10 +1,10 @@
1
- invenio_vocabularies/__init__.py,sha256=hDg_LXDMIoa_G123kgz3K8g9J3LghXoNWpYs4e3YSE4,377
1
+ invenio_vocabularies/__init__.py,sha256=cnTvl-gNQL8JCB5A30kCWNNoQi84lT9ksWrLQbmr8eY,377
2
2
  invenio_vocabularies/cli.py,sha256=S3lBsLxsSYa83sCDaGZr5mP7TwPvmmwxzzbB13h8VBI,5856
3
3
  invenio_vocabularies/config.py,sha256=bpNKVgwfb7bgkP5zbmoawnAMD6bHWXIJV-6CpEhi-M8,5752
4
4
  invenio_vocabularies/ext.py,sha256=GujJ4UARd4Fxf4z7zznRk9JAgHamZuYCOdrKU5czg00,5987
5
5
  invenio_vocabularies/factories.py,sha256=mVg4yGKe58e4uS8rYe0DmIO6oMpmtNTcK3wH9eM5jVU,4380
6
6
  invenio_vocabularies/fixtures.py,sha256=nNWwH04HFASjfj1oy5kMdcQGKmVjzUuA5wSw-ER1QAg,1585
7
- invenio_vocabularies/jobs.py,sha256=LxLt3zmfgulDdf5z7RGB70QF8GMPzgdD4vAGf8ZQENY,4285
7
+ invenio_vocabularies/jobs.py,sha256=OoQOBQoWOLHZ16EMlVM54OezgO58u_aMDemAjbCuUk8,6371
8
8
  invenio_vocabularies/proxies.py,sha256=k7cTUgWfnCoYIuNqAj_VFi1zBN33KNNclRSVnBkObEM,711
9
9
  invenio_vocabularies/views.py,sha256=PNJ5nvc3O7ASwNe56xmqy5YaU9n3UYF3W2JwvtE_kYs,1561
10
10
  invenio_vocabularies/webpack.py,sha256=hzTM0qx6iiRHkmjti53yuZ5ebfPMR5mpO9uNewBat74,1891
@@ -47,7 +47,7 @@ invenio_vocabularies/contrib/affiliations/__init__.py,sha256=rV8YAzBRoSKsBYcVjCN
47
47
  invenio_vocabularies/contrib/affiliations/affiliations.py,sha256=BSEjzAjLq17GGKHyPrgC5VLluvZqVnKMepIiqqLQKzo,2077
48
48
  invenio_vocabularies/contrib/affiliations/api.py,sha256=5nIOvpfcseuAAg2XgblHc8jb7TAdfU79XOBRpL-p398,326
49
49
  invenio_vocabularies/contrib/affiliations/config.py,sha256=kk-zAl7Uyi6N4qVOiCkBD9cN1KKOvDcILDHJ-zoIzJA,2084
50
- invenio_vocabularies/contrib/affiliations/datastreams.py,sha256=vEH6wj951LLa9pSsNb0rS8IVJ1zWbEQDDe0Cu770J9k,5479
50
+ invenio_vocabularies/contrib/affiliations/datastreams.py,sha256=M99qM0R2_Tpa1IR_WC9xI2jHQDaVjvEHSn7Mh17_AHI,6060
51
51
  invenio_vocabularies/contrib/affiliations/facets.py,sha256=w316MGvtdyTpRCPOpCEmMxxLraRkbFFb1VvLkFlEc9o,1229
52
52
  invenio_vocabularies/contrib/affiliations/models.py,sha256=JUcj-1ydc2Cw2Rsc24JwXE3TFBJ_6fivhUYhGq4rT8A,329
53
53
  invenio_vocabularies/contrib/affiliations/resources.py,sha256=DBEbRxQmp-o-PeZlgFG588Q4sGcruuwIL8L9O-SzCes,435
@@ -68,7 +68,7 @@ invenio_vocabularies/contrib/awards/__init__.py,sha256=KwCmwFalz-3pDs9iTa5TKUidB
68
68
  invenio_vocabularies/contrib/awards/api.py,sha256=OXukE7PLXs45BTtqVrhvGBNqLmQaI-CgXmHTCi36LZk,303
69
69
  invenio_vocabularies/contrib/awards/awards.py,sha256=tOLvcvTPiN1gn1QAl-hSh1bwcclg8Kx2ZMmJMEhI7vk,2959
70
70
  invenio_vocabularies/contrib/awards/config.py,sha256=PlDHabkWDUzwa1Fvk_U2hG83kQYBqM1IyChg8Yg_VlY,1630
71
- invenio_vocabularies/contrib/awards/datastreams.py,sha256=k7GtQ0pmY9EehYUtus45G6opJb8v2Kc63YIn5zO4SU8,14004
71
+ invenio_vocabularies/contrib/awards/datastreams.py,sha256=4J6JFCqYosDO8y4FRq-LTWgRNPbeH4Tj4Tur6ziChtY,14234
72
72
  invenio_vocabularies/contrib/awards/models.py,sha256=mM-kSNf7kDH3oIbV8epxxbUi7muYqi4JreXxgWXlVzw,318
73
73
  invenio_vocabularies/contrib/awards/resources.py,sha256=_9YTqbhz8axFXGhG5y4WyjE27p9n-7e3c6HoBRditPA,411
74
74
  invenio_vocabularies/contrib/awards/schema.py,sha256=P_k9EONMMx0eWpALVuhGBzZlDeh4599elLlmMis-Vko,3302
@@ -139,7 +139,7 @@ invenio_vocabularies/contrib/subjects/datastreams.py,sha256=CiLSTm9PukzLKm34VDmm
139
139
  invenio_vocabularies/contrib/subjects/facets.py,sha256=qQ7_rppFBzsmrlZu4-MvOIdUcjeOmDA9gOHAcs0lWwI,695
140
140
  invenio_vocabularies/contrib/subjects/models.py,sha256=8XgbVRxDDvhWPjMWsoCriNlOKdmV_113a14yLRtlvM4,363
141
141
  invenio_vocabularies/contrib/subjects/resources.py,sha256=0KRfUMizwgIziZybk4HnIjiSsXbrCv_XmguNPwnxoo8,506
142
- invenio_vocabularies/contrib/subjects/schema.py,sha256=5pM9FFVdeb1nUuTwr2SqkUJh-oHIjYN-EXXI8_SSjCQ,2446
142
+ invenio_vocabularies/contrib/subjects/schema.py,sha256=MtJ-YijukyK77MlCayGHJbjEMf7YTiRAS9nfFJ3YAyI,2813
143
143
  invenio_vocabularies/contrib/subjects/services.py,sha256=s1U6HMmpjuz7rrgR0DtT9C28TC6sZEeDTsa4Jh1TXQk,864
144
144
  invenio_vocabularies/contrib/subjects/subjects.py,sha256=NwZycExLyV8l7ikGStH4GOecVuDSxFT70KoNv6qC78I,1877
145
145
  invenio_vocabularies/contrib/subjects/euroscivoc/__init__.py,sha256=e5L9E4l5JHqVzijAX8tn2DIa2n01vJ5wOAZdN62RnIo,247
@@ -162,7 +162,7 @@ invenio_vocabularies/datastreams/factories.py,sha256=H8a2gAy7KNImtdCdtqpVKC5gIvE
162
162
  invenio_vocabularies/datastreams/readers.py,sha256=Gr_KbN-tZtoQ8R4TJG4dTBTE8028eLRlcxSv2-IqI9c,11386
163
163
  invenio_vocabularies/datastreams/tasks.py,sha256=0fuH_PRt9Ncv6WHM4pkYmfheRVGDKkERZiMPvgV4bZU,1129
164
164
  invenio_vocabularies/datastreams/transformers.py,sha256=0ymZiHtNtgfYxt2MIjthtSzikRH1jAqhIAZ1yScoAHs,1733
165
- invenio_vocabularies/datastreams/writers.py,sha256=PO9UPQ8rT4pstfoCiB-zCfuvCNOHRgFbysq6-spXm34,7218
165
+ invenio_vocabularies/datastreams/writers.py,sha256=FMTQdGavRgq6Qk21UcifYeDVH3jUA3rmwdyICr1ywxU,6719
166
166
  invenio_vocabularies/datastreams/xml.py,sha256=HFa-lfxj7kFrr2IjeN1jxSLDfcvpBwO9nZLZF2-BryE,997
167
167
  invenio_vocabularies/records/__init__.py,sha256=Uj7O6fYdAtLOkLXUGSAYPADBB7aqP4yVs9b6OAjA158,243
168
168
  invenio_vocabularies/records/api.py,sha256=Lynt6Sz4BVN1orh0zgJ5ljhnUobEtcq8c22PmSeUo2U,1494
@@ -297,10 +297,10 @@ invenio_vocabularies/translations/zh_CN/LC_MESSAGES/messages.mo,sha256=g1I5aNO8r
297
297
  invenio_vocabularies/translations/zh_CN/LC_MESSAGES/messages.po,sha256=vg8qC8ofpAdJ3mQz7mWM1ylKDpiNWXFs7rlMdSPkgKk,4629
298
298
  invenio_vocabularies/translations/zh_TW/LC_MESSAGES/messages.mo,sha256=cqSm8NtMAwrP9O6qbmtkDtRT1e9D93qpsJN5X9_PPVw,600
299
299
  invenio_vocabularies/translations/zh_TW/LC_MESSAGES/messages.po,sha256=9ACePz_EpB-LfcIJajZ2kp8Q04tcdrQLOtug162ZUss,4115
300
- invenio_vocabularies-6.3.1.dist-info/AUTHORS.rst,sha256=8d0p_WWE1r9DavvzMDi2D4YIGBHiMYcN3LYxqQOj8sY,291
301
- invenio_vocabularies-6.3.1.dist-info/LICENSE,sha256=UvI8pR8jGWqe0sTkb_hRG6eIrozzWwWzyCGEpuXX4KE,1062
302
- invenio_vocabularies-6.3.1.dist-info/METADATA,sha256=i448NUrQmoezzIp3sY16Lxd2aQ4OKNUOdoOWCrtWUIo,10103
303
- invenio_vocabularies-6.3.1.dist-info/WHEEL,sha256=-G_t0oGuE7UD0DrSpVZnq1hHMBV9DD2XkS5v7XpmTnk,110
304
- invenio_vocabularies-6.3.1.dist-info/entry_points.txt,sha256=P9wLzJDDXGd4BEzRX4ylmnWge8wg_Q9jI0NBxNRVA2Q,2972
305
- invenio_vocabularies-6.3.1.dist-info/top_level.txt,sha256=x1gRNbaODF_bCD0SBLM3nVOFPGi06cmGX5X94WKrFKk,21
306
- invenio_vocabularies-6.3.1.dist-info/RECORD,,
300
+ invenio_vocabularies-6.4.1.dist-info/AUTHORS.rst,sha256=8d0p_WWE1r9DavvzMDi2D4YIGBHiMYcN3LYxqQOj8sY,291
301
+ invenio_vocabularies-6.4.1.dist-info/LICENSE,sha256=UvI8pR8jGWqe0sTkb_hRG6eIrozzWwWzyCGEpuXX4KE,1062
302
+ invenio_vocabularies-6.4.1.dist-info/METADATA,sha256=54qe6nCLmGJy9rNAImi2olrld8Cl2p3BGn2jp6u9ADY,10433
303
+ invenio_vocabularies-6.4.1.dist-info/WHEEL,sha256=-G_t0oGuE7UD0DrSpVZnq1hHMBV9DD2XkS5v7XpmTnk,110
304
+ invenio_vocabularies-6.4.1.dist-info/entry_points.txt,sha256=xskfdwhOV074XHL1g8x8BZ6BNsdZ6D7XPVqcU5JqB2g,3118
305
+ invenio_vocabularies-6.4.1.dist-info/top_level.txt,sha256=x1gRNbaODF_bCD0SBLM3nVOFPGi06cmGX5X94WKrFKk,21
306
+ invenio_vocabularies-6.4.1.dist-info/RECORD,,
@@ -50,8 +50,10 @@ vocabulary_model = invenio_vocabularies.records.models
50
50
  invenio_vocabularies = invenio_vocabularies
51
51
 
52
52
  [invenio_jobs.jobs]
53
+ import_awards_openaire = invenio_vocabularies.jobs:ImportAwardsOpenAIREJob
53
54
  process_ror_affiliations = invenio_vocabularies.jobs:ProcessRORAffiliationsJob
54
55
  process_ror_funders = invenio_vocabularies.jobs:ProcessRORFundersJob
56
+ update_awards_cordis = invenio_vocabularies.jobs:UpdateAwardsCordisJob
55
57
 
56
58
  [invenio_jsonschemas.schemas]
57
59
  affiliations = invenio_vocabularies.contrib.affiliations.jsonschemas