invenio-vocabularies 6.0.0__py2.py3-none-any.whl → 6.1.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of invenio-vocabularies might be problematic. Click here for more details.
- invenio_vocabularies/__init__.py +1 -1
- invenio_vocabularies/contrib/awards/jsonschemas/awards/award-v1.0.0.json +0 -20
- invenio_vocabularies/contrib/common/ror/datastreams.py +20 -7
- invenio_vocabularies/jobs.py +88 -0
- {invenio_vocabularies-6.0.0.dist-info → invenio_vocabularies-6.1.0.dist-info}/METADATA +7 -1
- {invenio_vocabularies-6.0.0.dist-info → invenio_vocabularies-6.1.0.dist-info}/RECORD +11 -10
- {invenio_vocabularies-6.0.0.dist-info → invenio_vocabularies-6.1.0.dist-info}/entry_points.txt +3 -0
- {invenio_vocabularies-6.0.0.dist-info → invenio_vocabularies-6.1.0.dist-info}/AUTHORS.rst +0 -0
- {invenio_vocabularies-6.0.0.dist-info → invenio_vocabularies-6.1.0.dist-info}/LICENSE +0 -0
- {invenio_vocabularies-6.0.0.dist-info → invenio_vocabularies-6.1.0.dist-info}/WHEEL +0 -0
- {invenio_vocabularies-6.0.0.dist-info → invenio_vocabularies-6.1.0.dist-info}/top_level.txt +0 -0
invenio_vocabularies/__init__.py
CHANGED
|
@@ -50,29 +50,9 @@
|
|
|
50
50
|
"id": {
|
|
51
51
|
"$ref": "local://definitions-v1.0.0.json#/identifier"
|
|
52
52
|
},
|
|
53
|
-
"scheme": {
|
|
54
|
-
"description": "Identifier of the subject scheme.",
|
|
55
|
-
"$ref": "local://definitions-v1.0.0.json#/identifier"
|
|
56
|
-
},
|
|
57
53
|
"subject": {
|
|
58
54
|
"description": "Human readable label.",
|
|
59
55
|
"type": "string"
|
|
60
|
-
},
|
|
61
|
-
"props": {
|
|
62
|
-
"type": "object",
|
|
63
|
-
"patternProperties": {
|
|
64
|
-
"^.*$": {
|
|
65
|
-
"type": "string"
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
},
|
|
69
|
-
"identifiers": {
|
|
70
|
-
"description": "Alternate identifiers for the subject.",
|
|
71
|
-
"type": "array",
|
|
72
|
-
"items": {
|
|
73
|
-
"$ref": "local://definitions-v2.0.0.json#/identifiers_with_scheme"
|
|
74
|
-
},
|
|
75
|
-
"uniqueItems": true
|
|
76
56
|
}
|
|
77
57
|
}
|
|
78
58
|
},
|
|
@@ -21,7 +21,11 @@ from invenio_vocabularies.datastreams.transformers import BaseTransformer
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
class RORHTTPReader(BaseReader):
|
|
24
|
-
"""ROR HTTP Reader
|
|
24
|
+
"""ROR HTTP Reader.
|
|
25
|
+
|
|
26
|
+
Returning an in-memory
|
|
27
|
+
binary stream of the latest ROR data dump ZIP file.
|
|
28
|
+
"""
|
|
25
29
|
|
|
26
30
|
def __init__(self, origin=None, mode="r", since=None, *args, **kwargs):
|
|
27
31
|
"""Constructor."""
|
|
@@ -30,7 +34,8 @@ class RORHTTPReader(BaseReader):
|
|
|
30
34
|
|
|
31
35
|
def _iter(self, fp, *args, **kwargs):
|
|
32
36
|
raise NotImplementedError(
|
|
33
|
-
"RORHTTPReader downloads one file
|
|
37
|
+
"RORHTTPReader downloads one file "
|
|
38
|
+
"and therefore does not iterate through items"
|
|
34
39
|
)
|
|
35
40
|
|
|
36
41
|
def _get_last_dump_date(self, linksets):
|
|
@@ -53,11 +58,16 @@ class RORHTTPReader(BaseReader):
|
|
|
53
58
|
return last_dump_date
|
|
54
59
|
else:
|
|
55
60
|
raise ReaderError(
|
|
56
|
-
"Couldn't find JSON-LD in publisher's linkset
|
|
61
|
+
"Couldn't find JSON-LD in publisher's linkset "
|
|
62
|
+
"to determine last dump date."
|
|
57
63
|
)
|
|
58
64
|
|
|
59
65
|
def read(self, item=None, *args, **kwargs):
|
|
60
|
-
"""Reads the latest ROR data dump
|
|
66
|
+
"""Reads the latest ROR data dump.
|
|
67
|
+
|
|
68
|
+
Read from ZIP file from
|
|
69
|
+
Zenodo and yields an in-memory binary stream of it.
|
|
70
|
+
"""
|
|
61
71
|
if item:
|
|
62
72
|
raise NotImplementedError(
|
|
63
73
|
"RORHTTPReader does not support being chained after another reader"
|
|
@@ -68,7 +78,8 @@ class RORHTTPReader(BaseReader):
|
|
|
68
78
|
landing_page = requests.get(dataset_doi_link, allow_redirects=True)
|
|
69
79
|
landing_page.raise_for_status()
|
|
70
80
|
|
|
71
|
-
# Call the signposting `linkset+json` endpoint for
|
|
81
|
+
# Call the signposting `linkset+json` endpoint for
|
|
82
|
+
# the Concept DOI (i.e. latest version) of the ROR data dump.
|
|
72
83
|
# See: https://github.com/inveniosoftware/rfcs/blob/master/rfcs/rdm-0071-signposting.md#provide-an-applicationlinksetjson-endpoint
|
|
73
84
|
if "linkset" not in landing_page.links:
|
|
74
85
|
raise ReaderError("Linkset not found in the ROR dataset record.")
|
|
@@ -94,8 +105,10 @@ class RORHTTPReader(BaseReader):
|
|
|
94
105
|
raise ReaderError(f"Expected 1 ZIP item but got {len(zip_files)}")
|
|
95
106
|
|
|
96
107
|
# Download the ZIP file and fully load the response bytes content in memory.
|
|
97
|
-
# The bytes content are then wrapped by a BytesIO to be
|
|
98
|
-
#
|
|
108
|
+
# The bytes content are then wrapped by a BytesIO to be
|
|
109
|
+
# file-like object (as required by `zipfile.ZipFile`).
|
|
110
|
+
# Using directly `file_resp.raw` is not possible since
|
|
111
|
+
# `zipfile.ZipFile` requires the file-like object to be seekable.
|
|
99
112
|
file_resp = requests.get(file_url)
|
|
100
113
|
file_resp.raise_for_status()
|
|
101
114
|
yield io.BytesIO(file_resp.content)
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
#
|
|
3
|
+
# Copyright (C) 2021-2022 CERN.
|
|
4
|
+
#
|
|
5
|
+
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
|
+
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
7
|
+
# details.
|
|
8
|
+
|
|
9
|
+
"""Jobs module."""
|
|
10
|
+
|
|
11
|
+
import datetime
|
|
12
|
+
from datetime import timezone
|
|
13
|
+
|
|
14
|
+
from invenio_i18n import gettext as _
|
|
15
|
+
from invenio_jobs.jobs import JobType
|
|
16
|
+
from marshmallow import Schema, fields
|
|
17
|
+
from marshmallow_utils.fields import TZDateTime
|
|
18
|
+
|
|
19
|
+
from invenio_vocabularies.services.tasks import process_datastream
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ArgsSchema(Schema):
|
|
23
|
+
"""Schema of task input arguments."""
|
|
24
|
+
|
|
25
|
+
since = TZDateTime(
|
|
26
|
+
timezone=timezone.utc,
|
|
27
|
+
format="iso",
|
|
28
|
+
metadata={
|
|
29
|
+
"description": _(
|
|
30
|
+
"YYYY-MM-DD HH:mm format. "
|
|
31
|
+
"Leave field empty if it should continue since last successful run."
|
|
32
|
+
)
|
|
33
|
+
},
|
|
34
|
+
)
|
|
35
|
+
job_arg_schema = fields.String(
|
|
36
|
+
metadata={"type": "hidden"},
|
|
37
|
+
dump_default="ArgsSchema",
|
|
38
|
+
load_default="ArgsSchema",
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class ProcessDataStreamJob(JobType):
|
|
43
|
+
"""Generic process data stream job type."""
|
|
44
|
+
|
|
45
|
+
arguments_schema = ArgsSchema
|
|
46
|
+
task = process_datastream
|
|
47
|
+
id = None
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class ProcessRORAffiliationsJob(ProcessDataStreamJob):
|
|
51
|
+
"""Process ROR affiliations datastream registered task."""
|
|
52
|
+
|
|
53
|
+
description = "Process ROR affiliations"
|
|
54
|
+
title = "Load ROR affiliations"
|
|
55
|
+
id = "process_ror_affiliations"
|
|
56
|
+
|
|
57
|
+
@classmethod
|
|
58
|
+
def default_args(cls, job_obj, since=None, **kwargs):
|
|
59
|
+
"""Generate default job arguments here."""
|
|
60
|
+
if since is None and job_obj.last_runs["success"]:
|
|
61
|
+
since = job_obj.last_runs["success"].started_at
|
|
62
|
+
else:
|
|
63
|
+
since = datetime.datetime.now()
|
|
64
|
+
|
|
65
|
+
return {
|
|
66
|
+
"config": {
|
|
67
|
+
"readers": [
|
|
68
|
+
{
|
|
69
|
+
"args": {"since": since},
|
|
70
|
+
"type": "ror-http",
|
|
71
|
+
},
|
|
72
|
+
{"args": {"regex": "_schema_v2\\.json$"}, "type": "zip"},
|
|
73
|
+
{"type": "json"},
|
|
74
|
+
],
|
|
75
|
+
"writers": [
|
|
76
|
+
{
|
|
77
|
+
"args": {
|
|
78
|
+
"writer": {
|
|
79
|
+
"type": "affiliations-service",
|
|
80
|
+
"args": {"update": True},
|
|
81
|
+
}
|
|
82
|
+
},
|
|
83
|
+
"type": "async",
|
|
84
|
+
}
|
|
85
|
+
],
|
|
86
|
+
"transformers": [{"type": "ror-affiliations"}],
|
|
87
|
+
}
|
|
88
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: invenio-vocabularies
|
|
3
|
-
Version: 6.
|
|
3
|
+
Version: 6.1.0
|
|
4
4
|
Summary: Invenio module for managing vocabularies.
|
|
5
5
|
Home-page: https://github.com/inveniosoftware/invenio-vocabularies
|
|
6
6
|
Author: CERN
|
|
@@ -13,6 +13,7 @@ Requires-Python: >=3.7
|
|
|
13
13
|
Requires-Dist: invenio-i18n <3.0.0,>=2.0.0
|
|
14
14
|
Requires-Dist: invenio-records-resources <7.0.0,>=6.0.0
|
|
15
15
|
Requires-Dist: invenio-administration <3.0.0,>=2.0.0
|
|
16
|
+
Requires-Dist: invenio-jobs <2.0.0,>=1.0.0
|
|
16
17
|
Requires-Dist: lxml >=4.5.0
|
|
17
18
|
Requires-Dist: PyYAML >=5.4.1
|
|
18
19
|
Requires-Dist: regex >=2024.7.24
|
|
@@ -82,6 +83,11 @@ https://invenio-vocabularies.readthedocs.io/
|
|
|
82
83
|
Changes
|
|
83
84
|
=======
|
|
84
85
|
|
|
86
|
+
Version v6.1.0 (released 2024-10-10)
|
|
87
|
+
|
|
88
|
+
- jobs: define invenio job wrapper for ROR affiliation data stream
|
|
89
|
+
- awards: remove subj props from jsonschema
|
|
90
|
+
|
|
85
91
|
Version v6.0.0 (released 2024-10-03)
|
|
86
92
|
|
|
87
93
|
- datastreams: writers: add option to not insert
|
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
invenio_vocabularies/__init__.py,sha256=
|
|
1
|
+
invenio_vocabularies/__init__.py,sha256=FP2iYtN9rANG4J8ur97OwNGV6rvg034f6BWA90bCUNM,377
|
|
2
2
|
invenio_vocabularies/cli.py,sha256=S3lBsLxsSYa83sCDaGZr5mP7TwPvmmwxzzbB13h8VBI,5856
|
|
3
3
|
invenio_vocabularies/config.py,sha256=bpNKVgwfb7bgkP5zbmoawnAMD6bHWXIJV-6CpEhi-M8,5752
|
|
4
4
|
invenio_vocabularies/ext.py,sha256=GujJ4UARd4Fxf4z7zznRk9JAgHamZuYCOdrKU5czg00,5987
|
|
5
5
|
invenio_vocabularies/factories.py,sha256=mVg4yGKe58e4uS8rYe0DmIO6oMpmtNTcK3wH9eM5jVU,4380
|
|
6
6
|
invenio_vocabularies/fixtures.py,sha256=nNWwH04HFASjfj1oy5kMdcQGKmVjzUuA5wSw-ER1QAg,1585
|
|
7
|
+
invenio_vocabularies/jobs.py,sha256=QBWgScrUlKJagpOvwXN12G1OsvW6NM-IFKRgkbW_edg,2549
|
|
7
8
|
invenio_vocabularies/proxies.py,sha256=k7cTUgWfnCoYIuNqAj_VFi1zBN33KNNclRSVnBkObEM,711
|
|
8
9
|
invenio_vocabularies/views.py,sha256=PNJ5nvc3O7ASwNe56xmqy5YaU9n3UYF3W2JwvtE_kYs,1561
|
|
9
10
|
invenio_vocabularies/webpack.py,sha256=bp2vz3O8QdZwrznsV-SjAmS0g1nV9WGWqc-7ZJQY5PQ,1891
|
|
@@ -74,7 +75,7 @@ invenio_vocabularies/contrib/awards/schema.py,sha256=P_k9EONMMx0eWpALVuhGBzZlDeh
|
|
|
74
75
|
invenio_vocabularies/contrib/awards/serializer.py,sha256=W59OMDxOXPdXrrimzMdIPbDIfVk6hARgyz1N2xSbchA,1440
|
|
75
76
|
invenio_vocabularies/contrib/awards/services.py,sha256=zwOMHqa4SyZuHopGZwEKhfw3kUHrWg73_4zMNo5kOe4,371
|
|
76
77
|
invenio_vocabularies/contrib/awards/jsonschemas/__init__.py,sha256=XB2l9hr53vqTk7o9lmy18FWGhHEUvNHu8D6nMF8Bz4k,246
|
|
77
|
-
invenio_vocabularies/contrib/awards/jsonschemas/awards/award-v1.0.0.json,sha256=
|
|
78
|
+
invenio_vocabularies/contrib/awards/jsonschemas/awards/award-v1.0.0.json,sha256=KUJW0XaHSU8iYNtK0Hb1H42jlhw-oL4Y644Q_-aSlTo,2131
|
|
78
79
|
invenio_vocabularies/contrib/awards/mappings/__init__.py,sha256=PbM5urjiSrJSx4Ak-H_lJrOOVKGT38MrGgRv61gIbAM,243
|
|
79
80
|
invenio_vocabularies/contrib/awards/mappings/os-v1/__init__.py,sha256=r8IZvjorG9SVz32Hv1fncoqLfz-5Ml0Ph3jiYWCHBPk,250
|
|
80
81
|
invenio_vocabularies/contrib/awards/mappings/os-v1/awards/award-v1.0.0.json,sha256=dcE1dvgGBYYb7vziGArmXb64rg2HpBdiIw_aP3oYaDw,2441
|
|
@@ -86,7 +87,7 @@ invenio_vocabularies/contrib/common/__init__.py,sha256=DdbGYRthEpQtObhY_YK4vOT9Z
|
|
|
86
87
|
invenio_vocabularies/contrib/common/openaire/__init__.py,sha256=L7UtSimFJ3NI6j53bHzYKsWpFti1uo4fPb9OaTl7leI,244
|
|
87
88
|
invenio_vocabularies/contrib/common/openaire/datastreams.py,sha256=BV6NtBCPFuii6KbTHGkgNQO5tt_3Hn9T_219bz8AINg,3514
|
|
88
89
|
invenio_vocabularies/contrib/common/ror/__init__.py,sha256=3u2-fre1SQ-4nz3Ay0nxj3ntmMZ8Ujh_4eV-fyxfmtc,239
|
|
89
|
-
invenio_vocabularies/contrib/common/ror/datastreams.py,sha256=
|
|
90
|
+
invenio_vocabularies/contrib/common/ror/datastreams.py,sha256=arhB2OKu3Hl0Sn6urTxBQh_kim4oc9TTMRAwzi4pbXE,8057
|
|
90
91
|
invenio_vocabularies/contrib/funders/__init__.py,sha256=YxFXBDnT7NM8rFwxT_Ge3xXR2n17EM0alknQq7r_Bt8,478
|
|
91
92
|
invenio_vocabularies/contrib/funders/api.py,sha256=QKGGeSnPHSoBfucvpaVruXT_txYidofZ080G3IxFkIo,306
|
|
92
93
|
invenio_vocabularies/contrib/funders/config.py,sha256=EU7UrwLOkr2Bem9Skz_HJIxyBQRkXEdPT8zIuV8vbzI,2217
|
|
@@ -296,10 +297,10 @@ invenio_vocabularies/translations/zh_CN/LC_MESSAGES/messages.mo,sha256=g1I5aNO8r
|
|
|
296
297
|
invenio_vocabularies/translations/zh_CN/LC_MESSAGES/messages.po,sha256=vg8qC8ofpAdJ3mQz7mWM1ylKDpiNWXFs7rlMdSPkgKk,4629
|
|
297
298
|
invenio_vocabularies/translations/zh_TW/LC_MESSAGES/messages.mo,sha256=cqSm8NtMAwrP9O6qbmtkDtRT1e9D93qpsJN5X9_PPVw,600
|
|
298
299
|
invenio_vocabularies/translations/zh_TW/LC_MESSAGES/messages.po,sha256=9ACePz_EpB-LfcIJajZ2kp8Q04tcdrQLOtug162ZUss,4115
|
|
299
|
-
invenio_vocabularies-6.
|
|
300
|
-
invenio_vocabularies-6.
|
|
301
|
-
invenio_vocabularies-6.
|
|
302
|
-
invenio_vocabularies-6.
|
|
303
|
-
invenio_vocabularies-6.
|
|
304
|
-
invenio_vocabularies-6.
|
|
305
|
-
invenio_vocabularies-6.
|
|
300
|
+
invenio_vocabularies-6.1.0.dist-info/AUTHORS.rst,sha256=8d0p_WWE1r9DavvzMDi2D4YIGBHiMYcN3LYxqQOj8sY,291
|
|
301
|
+
invenio_vocabularies-6.1.0.dist-info/LICENSE,sha256=UvI8pR8jGWqe0sTkb_hRG6eIrozzWwWzyCGEpuXX4KE,1062
|
|
302
|
+
invenio_vocabularies-6.1.0.dist-info/METADATA,sha256=QwLqpBcTeK4HsEnUp0i6yVeK-_6Btwf-wER2m_FmdBM,9659
|
|
303
|
+
invenio_vocabularies-6.1.0.dist-info/WHEEL,sha256=-G_t0oGuE7UD0DrSpVZnq1hHMBV9DD2XkS5v7XpmTnk,110
|
|
304
|
+
invenio_vocabularies-6.1.0.dist-info/entry_points.txt,sha256=qm4ydo2p2KWN3937X8argeeSHw-BX_IJ7gS11pkADEU,2903
|
|
305
|
+
invenio_vocabularies-6.1.0.dist-info/top_level.txt,sha256=x1gRNbaODF_bCD0SBLM3nVOFPGi06cmGX5X94WKrFKk,21
|
|
306
|
+
invenio_vocabularies-6.1.0.dist-info/RECORD,,
|
{invenio_vocabularies-6.0.0.dist-info → invenio_vocabularies-6.1.0.dist-info}/entry_points.txt
RENAMED
|
@@ -49,6 +49,9 @@ vocabulary_model = invenio_vocabularies.records.models
|
|
|
49
49
|
[invenio_i18n.translations]
|
|
50
50
|
invenio_vocabularies = invenio_vocabularies
|
|
51
51
|
|
|
52
|
+
[invenio_jobs.jobs]
|
|
53
|
+
process_ror_affiliations = invenio_vocabularies.jobs:ProcessRORAffiliationsJob
|
|
54
|
+
|
|
52
55
|
[invenio_jsonschemas.schemas]
|
|
53
56
|
affiliations = invenio_vocabularies.contrib.affiliations.jsonschemas
|
|
54
57
|
awards = invenio_vocabularies.contrib.awards.jsonschemas
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|