invenio-vocabularies 1.2.0__py2.py3-none-any.whl → 6.3.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of invenio-vocabularies might be problematic. Click here for more details.
- invenio_vocabularies/__init__.py +2 -2
- invenio_vocabularies/administration/__init__.py +10 -0
- invenio_vocabularies/administration/views/__init__.py +10 -0
- invenio_vocabularies/administration/views/vocabularies.py +45 -0
- invenio_vocabularies/alembic/4a9a4fd235f8_create_vocabulary_schemes.py +4 -4
- invenio_vocabularies/alembic/4f365fced43f_create_vocabularies_tables.py +2 -2
- invenio_vocabularies/alembic/55a700f897b6_add_names_and_afiliations_pid_column.py +96 -0
- invenio_vocabularies/alembic/676dd587542d_create_funders_vocabulary_table.py +1 -1
- invenio_vocabularies/alembic/e1146238edd3_create_awards_table.py +1 -1
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/.eslintrc.yml +11 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/.prettierrc +1 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/index.js +7 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/package.json +25 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/AwardResults.js +95 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/CustomAwardForm.js +139 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FunderDropdown.js +87 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingField.js +223 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingField.test.js +1 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingFieldItem.js +152 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingModal.js +270 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/NoAwardResults.js +37 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/index.js +8 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/index.js +7 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/index.js +7 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/index.js +7 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/i18next-scanner.config.js +63 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/i18next.js +36 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/index.js +1 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/package.json +53 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/scripts/compileCatalog.js +39 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/scripts/initCatalog.js +19 -0
- invenio_vocabularies/cli.py +31 -44
- invenio_vocabularies/config.py +74 -7
- invenio_vocabularies/contrib/affiliations/affiliations.py +22 -6
- invenio_vocabularies/contrib/affiliations/api.py +1 -2
- invenio_vocabularies/contrib/affiliations/config.py +10 -5
- invenio_vocabularies/contrib/affiliations/datastreams.py +186 -0
- invenio_vocabularies/contrib/affiliations/facets.py +36 -0
- invenio_vocabularies/contrib/affiliations/jsonschemas/affiliations/affiliation-v1.0.0.json +38 -7
- invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v1.0.0.json +22 -1
- invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v2.0.0.json +171 -0
- invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v1.0.0.json +22 -1
- invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v2.0.0.json +171 -0
- invenio_vocabularies/contrib/affiliations/mappings/v7/affiliations/affiliation-v1.0.0.json +22 -1
- invenio_vocabularies/contrib/affiliations/schema.py +23 -5
- invenio_vocabularies/contrib/affiliations/services.py +1 -2
- invenio_vocabularies/contrib/awards/awards.py +18 -6
- invenio_vocabularies/contrib/awards/config.py +1 -3
- invenio_vocabularies/contrib/awards/datastreams.py +246 -3
- invenio_vocabularies/contrib/awards/jsonschemas/awards/award-v1.0.0.json +41 -0
- invenio_vocabularies/contrib/awards/mappings/os-v1/awards/award-v1.0.0.json +53 -1
- invenio_vocabularies/contrib/awards/mappings/os-v2/awards/award-v1.0.0.json +53 -1
- invenio_vocabularies/contrib/awards/mappings/v7/awards/award-v1.0.0.json +53 -1
- invenio_vocabularies/contrib/awards/schema.py +27 -35
- invenio_vocabularies/contrib/awards/serializer.py +9 -1
- invenio_vocabularies/contrib/awards/services.py +1 -2
- invenio_vocabularies/contrib/common/__init__.py +9 -0
- invenio_vocabularies/contrib/common/openaire/__init__.py +9 -0
- invenio_vocabularies/contrib/common/openaire/datastreams.py +84 -0
- invenio_vocabularies/contrib/common/ror/__init__.py +9 -0
- invenio_vocabularies/contrib/common/ror/datastreams.py +220 -0
- invenio_vocabularies/contrib/funders/config.py +12 -5
- invenio_vocabularies/contrib/funders/datastreams.py +40 -62
- invenio_vocabularies/contrib/funders/facets.py +13 -5
- invenio_vocabularies/contrib/funders/funders.py +4 -2
- invenio_vocabularies/contrib/funders/jsonschemas/funders/funder-v1.0.0.json +36 -1
- invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v1.0.0.json +22 -1
- invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v2.0.0.json +156 -0
- invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v1.0.0.json +22 -1
- invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v2.0.0.json +156 -0
- invenio_vocabularies/contrib/funders/mappings/v7/funders/funder-v1.0.0.json +22 -1
- invenio_vocabularies/contrib/funders/schema.py +8 -0
- invenio_vocabularies/contrib/funders/serializer.py +2 -1
- invenio_vocabularies/contrib/names/config.py +5 -3
- invenio_vocabularies/contrib/names/datastreams.py +177 -38
- invenio_vocabularies/contrib/names/jsonschemas/names/name-v1.0.0.json +2 -6
- invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v2.0.0.json +150 -0
- invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v2.0.0.json +150 -0
- invenio_vocabularies/contrib/names/mappings/v7/names/name-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/names/names.py +29 -13
- invenio_vocabularies/contrib/names/permissions.py +20 -0
- invenio_vocabularies/contrib/names/s3client.py +44 -0
- invenio_vocabularies/contrib/names/schema.py +31 -4
- invenio_vocabularies/contrib/subjects/config.py +9 -3
- invenio_vocabularies/contrib/subjects/datastreams.py +61 -0
- invenio_vocabularies/contrib/subjects/euroscivoc/__init__.py +9 -0
- invenio_vocabularies/contrib/subjects/euroscivoc/datastreams.py +171 -0
- invenio_vocabularies/contrib/subjects/jsonschemas/subjects/subject-v1.0.0.json +31 -0
- invenio_vocabularies/contrib/subjects/mappings/os-v1/subjects/subject-v1.0.0.json +35 -0
- invenio_vocabularies/contrib/subjects/mappings/os-v2/subjects/subject-v1.0.0.json +35 -0
- invenio_vocabularies/contrib/subjects/mappings/v7/subjects/subject-v1.0.0.json +35 -0
- invenio_vocabularies/contrib/subjects/mesh/__init__.py +9 -0
- invenio_vocabularies/contrib/subjects/mesh/datastreams.py +43 -0
- invenio_vocabularies/contrib/subjects/schema.py +47 -5
- invenio_vocabularies/contrib/subjects/subjects.py +10 -0
- invenio_vocabularies/datastreams/datastreams.py +61 -13
- invenio_vocabularies/datastreams/factories.py +1 -2
- invenio_vocabularies/datastreams/readers.py +138 -29
- invenio_vocabularies/datastreams/tasks.py +37 -0
- invenio_vocabularies/datastreams/transformers.py +17 -27
- invenio_vocabularies/datastreams/writers.py +116 -14
- invenio_vocabularies/datastreams/xml.py +34 -0
- invenio_vocabularies/ext.py +59 -5
- invenio_vocabularies/factories.py +137 -0
- invenio_vocabularies/jobs.py +133 -0
- invenio_vocabularies/proxies.py +2 -2
- invenio_vocabularies/records/jsonschemas/vocabularies/definitions-v1.0.0.json +7 -0
- invenio_vocabularies/records/jsonschemas/vocabularies/vocabulary-v1.0.0.json +1 -4
- invenio_vocabularies/records/mappings/os-v1/vocabularies/vocabulary-v1.0.0.json +3 -3
- invenio_vocabularies/records/mappings/os-v2/vocabularies/vocabulary-v1.0.0.json +3 -3
- invenio_vocabularies/records/mappings/v7/vocabularies/vocabulary-v1.0.0.json +3 -3
- invenio_vocabularies/records/models.py +8 -10
- invenio_vocabularies/records/pidprovider.py +1 -2
- invenio_vocabularies/records/systemfields/relations.py +2 -2
- invenio_vocabularies/resources/__init__.py +9 -1
- invenio_vocabularies/resources/config.py +105 -0
- invenio_vocabularies/resources/resource.py +31 -41
- invenio_vocabularies/resources/schema.py +2 -1
- invenio_vocabularies/services/__init__.py +5 -2
- invenio_vocabularies/services/config.py +179 -0
- invenio_vocabularies/services/custom_fields/__init__.py +6 -2
- invenio_vocabularies/services/custom_fields/subject.py +82 -0
- invenio_vocabularies/services/custom_fields/vocabulary.py +19 -9
- invenio_vocabularies/services/facets.py +67 -37
- invenio_vocabularies/services/permissions.py +3 -1
- invenio_vocabularies/services/results.py +110 -0
- invenio_vocabularies/services/schema.py +39 -2
- invenio_vocabularies/services/service.py +46 -94
- invenio_vocabularies/services/tasks.py +1 -1
- invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/subjects.html +23 -0
- invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabularies-list.html +12 -0
- invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabulary-details.html +71 -0
- invenio_vocabularies/translations/af/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/af/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/ar/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ar/LC_MESSAGES/messages.po +9 -8
- invenio_vocabularies/translations/bg/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/bg/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/ca/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ca/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/cs/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/cs/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/da/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/da/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/de/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/de/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/de_AT/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/de_AT/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/de_DE/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/de_DE/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/el/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/el/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/en/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/en_AT/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/en_AT/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/en_HU/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/en_HU/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/es/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/es/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/es_CU/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/es_CU/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/es_MX/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/es_MX/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/et/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/et/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/et_EE/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/et_EE/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/fa/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/fa/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/fa_IR/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/fa_IR/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/fr/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/fr/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/fr_CI/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/fr_CI/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/fr_FR/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/fr_FR/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/gl/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/gl/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/hi_IN/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/hi_IN/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/hr/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/hr/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/hu/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/hu/LC_MESSAGES/messages.po +4 -4
- invenio_vocabularies/translations/hu_HU/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/hu_HU/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/it/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/it/LC_MESSAGES/messages.po +4 -3
- invenio_vocabularies/translations/ja/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ja/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/ka/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ka/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/lt/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/lt/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/messages.pot +95 -48
- invenio_vocabularies/translations/ne/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ne/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/no/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/no/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/pl/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/pl/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/pt/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/pt/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/ro/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ro/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/ru/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ru/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/rw/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/rw/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/sk/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/sk/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/sv/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/sv/LC_MESSAGES/messages.po +4 -3
- invenio_vocabularies/translations/sv_SE/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/sv_SE/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/tr/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/tr/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/uk/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/uk/LC_MESSAGES/messages.po +17 -13
- invenio_vocabularies/translations/uk_UA/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/uk_UA/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/zh_CN/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/zh_CN/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/zh_TW/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/zh_TW/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/views.py +12 -26
- invenio_vocabularies/webpack.py +51 -0
- invenio_vocabularies-6.3.1.dist-info/METADATA +346 -0
- invenio_vocabularies-6.3.1.dist-info/RECORD +306 -0
- {invenio_vocabularies-1.2.0.dist-info → invenio_vocabularies-6.3.1.dist-info}/WHEEL +1 -1
- {invenio_vocabularies-1.2.0.dist-info → invenio_vocabularies-6.3.1.dist-info}/entry_points.txt +20 -0
- invenio_vocabularies-1.2.0.dist-info/METADATA +0 -133
- invenio_vocabularies-1.2.0.dist-info/RECORD +0 -220
- {invenio_vocabularies-1.2.0.dist-info → invenio_vocabularies-6.3.1.dist-info}/AUTHORS.rst +0 -0
- {invenio_vocabularies-1.2.0.dist-info → invenio_vocabularies-6.3.1.dist-info}/LICENSE +0 -0
- {invenio_vocabularies-1.2.0.dist-info → invenio_vocabularies-6.3.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
#
|
|
3
|
+
# Copyright (C) 2022-2024 CERN.
|
|
4
|
+
# Copyright (C) 2024 California Institute of Technology.
|
|
5
|
+
#
|
|
6
|
+
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
8
|
+
# details.
|
|
9
|
+
|
|
10
|
+
"""MeSH subjects datastreams, transformers, writers and readers."""
|
|
11
|
+
|
|
12
|
+
from invenio_vocabularies.datastreams.transformers import (
|
|
13
|
+
BaseTransformer,
|
|
14
|
+
TransformerError,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class MeshSubjectsTransformer(BaseTransformer):
|
|
19
|
+
"""MeSH subjects Transformer."""
|
|
20
|
+
|
|
21
|
+
def apply(self, stream_entry, *args, **kwargs):
|
|
22
|
+
"""Apply transformation on steam entry."""
|
|
23
|
+
entry_data = stream_entry.entry
|
|
24
|
+
|
|
25
|
+
# ID in MeSH data is the URL, ex. https://id.nlm.nih.gov/mesh/D000001
|
|
26
|
+
# We just want to use the ID prefixed by "mesh:""
|
|
27
|
+
try:
|
|
28
|
+
mesh_id = entry_data["id"].split("/")[-1]
|
|
29
|
+
except Exception:
|
|
30
|
+
raise TransformerError("Not a valid MeSH ID.")
|
|
31
|
+
|
|
32
|
+
entry_data["id"] = "mesh:" + mesh_id
|
|
33
|
+
return stream_entry
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
VOCABULARIES_DATASTREAM_READERS = {}
|
|
37
|
+
"""MeSH datastream readers."""
|
|
38
|
+
|
|
39
|
+
VOCABULARIES_DATASTREAM_WRITERS = {}
|
|
40
|
+
"""MeSH subject datastream writers."""
|
|
41
|
+
|
|
42
|
+
VOCABULARIES_DATASTREAM_TRANSFORMERS = {"mesh-subjects": MeshSubjectsTransformer}
|
|
43
|
+
"""MeSH subjects datastream transformers."""
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
3
|
# Copyright (C) 2021 Northwestern University.
|
|
4
|
-
# Copyright (C) 2021-
|
|
4
|
+
# Copyright (C) 2021-2024 CERN.
|
|
5
|
+
# Copyright (C) 2024 University of Münster.
|
|
5
6
|
#
|
|
6
7
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
7
8
|
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
@@ -9,10 +10,19 @@
|
|
|
9
10
|
|
|
10
11
|
"""Subjects schema."""
|
|
11
12
|
|
|
12
|
-
from
|
|
13
|
-
from marshmallow_utils.fields import SanitizedUnicode
|
|
13
|
+
from functools import partial
|
|
14
14
|
|
|
15
|
-
from
|
|
15
|
+
from invenio_i18n import get_locale
|
|
16
|
+
from marshmallow import Schema, fields, pre_load
|
|
17
|
+
from marshmallow_utils.fields import IdentifierSet, SanitizedUnicode
|
|
18
|
+
from marshmallow_utils.schemas import IdentifierSchema
|
|
19
|
+
|
|
20
|
+
from ...services.schema import (
|
|
21
|
+
BaseVocabularySchema,
|
|
22
|
+
ContribVocabularyRelationSchema,
|
|
23
|
+
i18n_strings,
|
|
24
|
+
)
|
|
25
|
+
from .config import subject_schemes
|
|
16
26
|
|
|
17
27
|
|
|
18
28
|
class SubjectSchema(BaseVocabularySchema):
|
|
@@ -24,6 +34,26 @@ class SubjectSchema(BaseVocabularySchema):
|
|
|
24
34
|
id = SanitizedUnicode(required=True)
|
|
25
35
|
scheme = SanitizedUnicode(required=True)
|
|
26
36
|
subject = SanitizedUnicode(required=True)
|
|
37
|
+
title = i18n_strings
|
|
38
|
+
props = fields.Dict(keys=SanitizedUnicode(), values=SanitizedUnicode())
|
|
39
|
+
identifiers = IdentifierSet(
|
|
40
|
+
fields.Nested(
|
|
41
|
+
partial(
|
|
42
|
+
IdentifierSchema,
|
|
43
|
+
allowed_schemes=subject_schemes,
|
|
44
|
+
identifier_required=False,
|
|
45
|
+
)
|
|
46
|
+
)
|
|
47
|
+
)
|
|
48
|
+
synonyms = fields.List(SanitizedUnicode())
|
|
49
|
+
|
|
50
|
+
@pre_load
|
|
51
|
+
def add_subject_from_title(self, data, **kwargs):
|
|
52
|
+
"""Add subject from title if not present."""
|
|
53
|
+
locale = get_locale().language
|
|
54
|
+
if "subject" not in data:
|
|
55
|
+
data["subject"] = data["title"].get(locale) or data["title"].values()[0]
|
|
56
|
+
return data
|
|
27
57
|
|
|
28
58
|
|
|
29
59
|
class SubjectRelationSchema(ContribVocabularyRelationSchema):
|
|
@@ -32,4 +62,16 @@ class SubjectRelationSchema(ContribVocabularyRelationSchema):
|
|
|
32
62
|
ftf_name = "subject"
|
|
33
63
|
parent_field_name = "subjects"
|
|
34
64
|
subject = SanitizedUnicode()
|
|
35
|
-
scheme = SanitizedUnicode()
|
|
65
|
+
scheme = SanitizedUnicode(dump_only=True)
|
|
66
|
+
title = fields.Dict(dump_only=True)
|
|
67
|
+
props = fields.Dict(dump_only=True)
|
|
68
|
+
identifiers = IdentifierSet(
|
|
69
|
+
fields.Nested(
|
|
70
|
+
partial(
|
|
71
|
+
IdentifierSchema,
|
|
72
|
+
allowed_schemes=subject_schemes,
|
|
73
|
+
identifier_required=False,
|
|
74
|
+
)
|
|
75
|
+
)
|
|
76
|
+
)
|
|
77
|
+
synonyms = fields.List(SanitizedUnicode(), dump_only=True)
|
|
@@ -9,9 +9,11 @@
|
|
|
9
9
|
|
|
10
10
|
"""Vocabulary subjects."""
|
|
11
11
|
|
|
12
|
+
from flask_resources import JSONSerializer, ResponseHandler
|
|
12
13
|
from invenio_records.dumpers import SearchDumper
|
|
13
14
|
from invenio_records.dumpers.indexedat import IndexedAtDumperExt
|
|
14
15
|
from invenio_records_resources.factories.factory import RecordTypeFactory
|
|
16
|
+
from invenio_records_resources.resources.records.headers import etag_headers
|
|
15
17
|
|
|
16
18
|
from ...records.pidprovider import PIDProviderFactory
|
|
17
19
|
from ...records.systemfields import BaseVocabularyPIDFieldContext
|
|
@@ -42,4 +44,12 @@ record_type = RecordTypeFactory(
|
|
|
42
44
|
permission_policy_cls=PermissionPolicy,
|
|
43
45
|
# Resource layer
|
|
44
46
|
endpoint_route="/subjects",
|
|
47
|
+
resource_cls_attrs={
|
|
48
|
+
"response_handlers": {
|
|
49
|
+
"application/json": ResponseHandler(JSONSerializer(), headers=etag_headers),
|
|
50
|
+
"application/vnd.inveniordm.v1+json": ResponseHandler(
|
|
51
|
+
JSONSerializer(), headers=etag_headers
|
|
52
|
+
),
|
|
53
|
+
}
|
|
54
|
+
},
|
|
45
55
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
|
-
# Copyright (C) 2021-
|
|
3
|
+
# Copyright (C) 2021-2024 CERN.
|
|
4
4
|
#
|
|
5
5
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
6
|
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
@@ -8,17 +8,41 @@
|
|
|
8
8
|
|
|
9
9
|
"""Base data stream."""
|
|
10
10
|
|
|
11
|
+
from flask import current_app
|
|
12
|
+
|
|
11
13
|
from .errors import ReaderError, TransformerError, WriterError
|
|
12
14
|
|
|
13
15
|
|
|
14
16
|
class StreamEntry:
|
|
15
17
|
"""Object to encapsulate streams processing."""
|
|
16
18
|
|
|
17
|
-
def __init__(self, entry, errors=None):
|
|
18
|
-
"""Constructor.
|
|
19
|
+
def __init__(self, entry, record=None, errors=None, op_type=None, exc=None):
|
|
20
|
+
"""Constructor for the StreamEntry class.
|
|
21
|
+
|
|
22
|
+
:param entry (object): The entry object, usually a record dict.
|
|
23
|
+
:param record (object): The record object, usually a record class.
|
|
24
|
+
:param errors (list, optional): List of errors. Defaults to None.
|
|
25
|
+
:param op_type (str, optional): The operation type. Defaults to None.
|
|
26
|
+
:param exc (str, optional): The raised unhandled exception. Defaults to None.
|
|
27
|
+
"""
|
|
19
28
|
self.entry = entry
|
|
29
|
+
self.record = record
|
|
20
30
|
self.filtered = False
|
|
21
31
|
self.errors = errors or []
|
|
32
|
+
self.op_type = op_type
|
|
33
|
+
self.exc = exc
|
|
34
|
+
|
|
35
|
+
def log_errors(self, logger=None):
|
|
36
|
+
"""Log the errors using the provided logger or the default logger.
|
|
37
|
+
|
|
38
|
+
:param logger (logging.Logger, optional): Logger instance to use. Defaults to None.
|
|
39
|
+
"""
|
|
40
|
+
if logger is None:
|
|
41
|
+
logger = current_app.logger
|
|
42
|
+
for error in self.errors:
|
|
43
|
+
logger.error(f"Error in entry {self.entry}: {error}")
|
|
44
|
+
if self.exc:
|
|
45
|
+
logger.error(f"Exception in entry {self.entry}: {self.exc}")
|
|
22
46
|
|
|
23
47
|
|
|
24
48
|
class DataStream:
|
|
@@ -39,15 +63,10 @@ class DataStream:
|
|
|
39
63
|
"""Checks if an stream_entry should be filtered out (skipped)."""
|
|
40
64
|
return False
|
|
41
65
|
|
|
42
|
-
def
|
|
43
|
-
"""
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
It will iterate over the `StreamEntry` objects returned by
|
|
47
|
-
the reader, apply the transformations and yield the result of
|
|
48
|
-
writing it.
|
|
49
|
-
"""
|
|
50
|
-
for stream_entry in self.read():
|
|
66
|
+
def process_batch(self, batch, write_many=False):
|
|
67
|
+
"""Process a batch of entries."""
|
|
68
|
+
transformed_entries = []
|
|
69
|
+
for stream_entry in batch:
|
|
51
70
|
if stream_entry.errors:
|
|
52
71
|
yield stream_entry # reading errors
|
|
53
72
|
else:
|
|
@@ -58,7 +77,31 @@ class DataStream:
|
|
|
58
77
|
transformed_entry.filtered = True
|
|
59
78
|
yield transformed_entry
|
|
60
79
|
else:
|
|
61
|
-
|
|
80
|
+
transformed_entries.append(transformed_entry)
|
|
81
|
+
if transformed_entries:
|
|
82
|
+
if write_many:
|
|
83
|
+
yield from self.batch_write(transformed_entries)
|
|
84
|
+
else:
|
|
85
|
+
yield from (self.write(entry) for entry in transformed_entries)
|
|
86
|
+
|
|
87
|
+
def process(self, batch_size=100, write_many=False, *args, **kwargs):
|
|
88
|
+
"""Iterates over the entries.
|
|
89
|
+
|
|
90
|
+
Uses the reader to get the raw entries and transforms them.
|
|
91
|
+
It will iterate over the `StreamEntry` objects returned by
|
|
92
|
+
the reader, apply the transformations and yield the result of
|
|
93
|
+
writing it.
|
|
94
|
+
"""
|
|
95
|
+
batch = []
|
|
96
|
+
for stream_entry in self.read():
|
|
97
|
+
batch.append(stream_entry)
|
|
98
|
+
if len(batch) >= batch_size:
|
|
99
|
+
yield from self.process_batch(batch, write_many=write_many)
|
|
100
|
+
batch = []
|
|
101
|
+
|
|
102
|
+
# Process any remaining entries in the last batch
|
|
103
|
+
if batch:
|
|
104
|
+
yield from self.process_batch(batch, write_many=write_many)
|
|
62
105
|
|
|
63
106
|
def read(self):
|
|
64
107
|
"""Recursively read the entries."""
|
|
@@ -107,6 +150,11 @@ class DataStream:
|
|
|
107
150
|
|
|
108
151
|
return stream_entry
|
|
109
152
|
|
|
153
|
+
def batch_write(self, stream_entries, *args, **kwargs):
|
|
154
|
+
"""Apply the transformations to an stream_entry. Errors are handler in the service layer."""
|
|
155
|
+
for writer in self._writers:
|
|
156
|
+
yield from writer.write_many(stream_entries)
|
|
157
|
+
|
|
110
158
|
def total(self, *args, **kwargs):
|
|
111
159
|
"""The total of entries obtained from the origin."""
|
|
112
160
|
raise NotImplementedError()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
|
-
# Copyright (C) 2021-
|
|
3
|
+
# Copyright (C) 2021-2024 CERN.
|
|
4
4
|
#
|
|
5
5
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
6
|
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
@@ -36,7 +36,6 @@ class Factory:
|
|
|
36
36
|
try:
|
|
37
37
|
type_ = config["type"]
|
|
38
38
|
args = config.get("args", {})
|
|
39
|
-
|
|
40
39
|
return cls.options()[type_](**args)
|
|
41
40
|
except KeyError:
|
|
42
41
|
raise FactoryError(name=cls.FACTORY_NAME, key=type_)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
|
-
# Copyright (C) 2021-
|
|
3
|
+
# Copyright (C) 2021-2024 CERN.
|
|
4
|
+
# Copyright (C) 2024 University of Münster.
|
|
4
5
|
#
|
|
5
6
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
7
|
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
@@ -15,14 +16,21 @@ import re
|
|
|
15
16
|
import tarfile
|
|
16
17
|
import zipfile
|
|
17
18
|
from abc import ABC, abstractmethod
|
|
18
|
-
from collections import defaultdict
|
|
19
19
|
from json.decoder import JSONDecodeError
|
|
20
20
|
|
|
21
21
|
import requests
|
|
22
22
|
import yaml
|
|
23
|
+
from lxml import etree
|
|
24
|
+
from lxml.html import fromstring
|
|
23
25
|
from lxml.html import parse as html_parse
|
|
24
26
|
|
|
25
27
|
from .errors import ReaderError
|
|
28
|
+
from .xml import etree_to_dict
|
|
29
|
+
|
|
30
|
+
try:
|
|
31
|
+
import oaipmh_scythe
|
|
32
|
+
except ImportError:
|
|
33
|
+
oaipmh_scythe = None
|
|
26
34
|
|
|
27
35
|
|
|
28
36
|
class BaseReader(ABC):
|
|
@@ -79,7 +87,12 @@ class TarReader(BaseReader):
|
|
|
79
87
|
def read(self, item=None, *args, **kwargs):
|
|
80
88
|
"""Opens a tar archive or uses the given file pointer."""
|
|
81
89
|
if item:
|
|
82
|
-
|
|
90
|
+
if isinstance(item, tarfile.TarFile):
|
|
91
|
+
yield from self._iter(fp=item, *args, **kwargs)
|
|
92
|
+
else:
|
|
93
|
+
# If the item is not already a TarFile (e.g. if it is a BytesIO), try to create a TarFile from the item.
|
|
94
|
+
with tarfile.open(mode=self._mode, fileobj=item) as archive:
|
|
95
|
+
yield from self._iter(fp=archive, *args, **kwargs)
|
|
83
96
|
else:
|
|
84
97
|
with tarfile.open(self._origin, self._mode) as archive:
|
|
85
98
|
yield from self._iter(fp=archive, *args, **kwargs)
|
|
@@ -135,7 +148,12 @@ class ZipReader(BaseReader):
|
|
|
135
148
|
"""Opens a Zip archive or uses the given file pointer."""
|
|
136
149
|
# https://docs.python.org/3/library/zipfile.html
|
|
137
150
|
if item:
|
|
138
|
-
|
|
151
|
+
if isinstance(item, zipfile.ZipFile):
|
|
152
|
+
yield from self._iter(fp=item, *args, **kwargs)
|
|
153
|
+
else:
|
|
154
|
+
# If the item is not already a ZipFile (e.g. if it is a BytesIO), try to create a ZipFile from the item.
|
|
155
|
+
with zipfile.ZipFile(item, **self._options) as archive:
|
|
156
|
+
yield from self._iter(fp=archive, *args, **kwargs)
|
|
139
157
|
else:
|
|
140
158
|
with zipfile.ZipFile(self._origin, **self._options) as archive:
|
|
141
159
|
yield from self._iter(fp=archive, *args, **kwargs)
|
|
@@ -206,34 +224,125 @@ class CSVReader(BaseReader):
|
|
|
206
224
|
class XMLReader(BaseReader):
|
|
207
225
|
"""XML reader."""
|
|
208
226
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
if children:
|
|
214
|
-
dd = defaultdict(list)
|
|
215
|
-
for dc in map(cls._etree_to_dict, children):
|
|
216
|
-
for k, v in dc.items():
|
|
217
|
-
dd[k].append(v)
|
|
218
|
-
d = {tree.tag: {k: v[0] if len(v) == 1 else v for k, v in dd.items()}}
|
|
219
|
-
if tree.attrib:
|
|
220
|
-
d[tree.tag].update(("@" + k, v) for k, v in tree.attrib.items())
|
|
221
|
-
if tree.text:
|
|
222
|
-
text = tree.text.strip()
|
|
223
|
-
if children or tree.attrib:
|
|
224
|
-
if text:
|
|
225
|
-
d[tree.tag]["#text"] = text
|
|
226
|
-
else:
|
|
227
|
-
d[tree.tag] = text
|
|
228
|
-
return d
|
|
227
|
+
def __init__(self, root_element=None, *args, **kwargs):
|
|
228
|
+
"""Constructor."""
|
|
229
|
+
self.root_element = root_element
|
|
230
|
+
super().__init__(*args, **kwargs)
|
|
229
231
|
|
|
230
232
|
def _iter(self, fp, *args, **kwargs):
|
|
231
233
|
"""Read and parse an XML file to dict."""
|
|
232
234
|
# NOTE: We parse HTML, to skip XML validation and strip XML namespaces
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
235
|
+
record = None
|
|
236
|
+
try:
|
|
237
|
+
xml_tree = fromstring(fp)
|
|
238
|
+
xml_dict = etree_to_dict(xml_tree)
|
|
239
|
+
except Exception as e:
|
|
240
|
+
xml_tree = html_parse(fp).getroot()
|
|
241
|
+
xml_dict = etree_to_dict(xml_tree)["html"]["body"]
|
|
242
|
+
|
|
243
|
+
if self.root_element:
|
|
244
|
+
record = xml_dict.get(self.root_element)
|
|
245
|
+
if not record:
|
|
246
|
+
raise ReaderError(
|
|
247
|
+
f"Root element '{self.root_element}' not found in XML entry."
|
|
248
|
+
)
|
|
249
|
+
else:
|
|
250
|
+
record = xml_dict
|
|
238
251
|
|
|
239
252
|
yield record
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
class OAIPMHReader(BaseReader):
|
|
256
|
+
"""OAIPMH reader."""
|
|
257
|
+
|
|
258
|
+
def __init__(
|
|
259
|
+
self,
|
|
260
|
+
*args,
|
|
261
|
+
base_url=None,
|
|
262
|
+
metadata_prefix=None,
|
|
263
|
+
set=None,
|
|
264
|
+
from_date=None,
|
|
265
|
+
until_date=None,
|
|
266
|
+
verb=None,
|
|
267
|
+
**kwargs,
|
|
268
|
+
):
|
|
269
|
+
"""Constructor."""
|
|
270
|
+
self._base_url = base_url
|
|
271
|
+
self._metadata_prefix = metadata_prefix if not None else "oai_dc"
|
|
272
|
+
self._set = set
|
|
273
|
+
self._until = until_date
|
|
274
|
+
self._from = from_date
|
|
275
|
+
self._verb = verb if not None else "ListRecords"
|
|
276
|
+
super().__init__(*args, **kwargs)
|
|
277
|
+
|
|
278
|
+
def _iter(self, scythe, *args, **kwargs):
|
|
279
|
+
"""Read and parse an OAIPMH stream to dict."""
|
|
280
|
+
|
|
281
|
+
class OAIRecord(oaipmh_scythe.models.Record):
|
|
282
|
+
"""An XML unpacking implementation for more complicated formats."""
|
|
283
|
+
|
|
284
|
+
def get_metadata(self):
|
|
285
|
+
"""Extract and return the record's metadata as a dictionary."""
|
|
286
|
+
return xml_to_dict(
|
|
287
|
+
self.xml.find(f".//{self._oai_namespace}metadata").getchildren()[0],
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
if self._verb == "ListRecords":
|
|
291
|
+
scythe.class_mapping["ListRecords"] = OAIRecord
|
|
292
|
+
try:
|
|
293
|
+
records = scythe.list_records(
|
|
294
|
+
from_=self._from,
|
|
295
|
+
until=self._until,
|
|
296
|
+
metadata_prefix=self._metadata_prefix,
|
|
297
|
+
set_=self._set,
|
|
298
|
+
ignore_deleted=True,
|
|
299
|
+
)
|
|
300
|
+
for record in records:
|
|
301
|
+
yield {"record": record}
|
|
302
|
+
except oaipmh_scythe.NoRecordsMatch:
|
|
303
|
+
raise ReaderError("No records found in OAI-PMH request.")
|
|
304
|
+
else:
|
|
305
|
+
scythe.class_mapping["GetRecord"] = OAIRecord
|
|
306
|
+
try:
|
|
307
|
+
headers = scythe.list_identifiers(
|
|
308
|
+
from_=self._from,
|
|
309
|
+
until=self._until,
|
|
310
|
+
metadata_prefix=self._metadata_prefix,
|
|
311
|
+
set_=self._set,
|
|
312
|
+
ignore_deleted=True,
|
|
313
|
+
)
|
|
314
|
+
for header in headers:
|
|
315
|
+
record = scythe.get_record(
|
|
316
|
+
identifier=header.identifier,
|
|
317
|
+
metadata_prefix=self._metadata_prefix,
|
|
318
|
+
)
|
|
319
|
+
yield {"record": record}
|
|
320
|
+
except oaipmh_scythe.NoRecordsMatch:
|
|
321
|
+
raise ReaderError("No records found in OAI-PMH request.")
|
|
322
|
+
|
|
323
|
+
def read(self, item=None, *args, **kwargs):
|
|
324
|
+
"""Reads from item or opens the file descriptor from origin."""
|
|
325
|
+
if item:
|
|
326
|
+
raise NotImplementedError(
|
|
327
|
+
"OAIPMHReader does not support being chained after another reader"
|
|
328
|
+
)
|
|
329
|
+
else:
|
|
330
|
+
with oaipmh_scythe.Scythe(self._base_url) as scythe:
|
|
331
|
+
yield from self._iter(scythe=scythe, *args, **kwargs)
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def xml_to_dict(tree: etree._Element):
|
|
335
|
+
"""Convert an XML tree to a dictionary.
|
|
336
|
+
|
|
337
|
+
This function takes an XML element tree and converts it into a dictionary.
|
|
338
|
+
|
|
339
|
+
Args:
|
|
340
|
+
tree: The root element of the XML tree to be converted.
|
|
341
|
+
|
|
342
|
+
Returns:
|
|
343
|
+
A dictionary with the key "record".
|
|
344
|
+
"""
|
|
345
|
+
dict_obj = dict()
|
|
346
|
+
dict_obj["record"] = etree.tostring(tree)
|
|
347
|
+
|
|
348
|
+
return dict_obj
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
#
|
|
3
|
+
# Copyright (C) 2022-2024 CERN.
|
|
4
|
+
#
|
|
5
|
+
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
|
+
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
7
|
+
# details.
|
|
8
|
+
|
|
9
|
+
"""Data Streams Celery tasks."""
|
|
10
|
+
|
|
11
|
+
from celery import shared_task
|
|
12
|
+
|
|
13
|
+
from ..datastreams import StreamEntry
|
|
14
|
+
from ..datastreams.factories import WriterFactory
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@shared_task(ignore_result=True)
|
|
18
|
+
def write_entry(writer_config, entry):
|
|
19
|
+
"""Write an entry.
|
|
20
|
+
|
|
21
|
+
:param writer: writer configuration as accepted by the WriterFactory.
|
|
22
|
+
:param entry: dictionary, StreamEntry is not serializable.
|
|
23
|
+
"""
|
|
24
|
+
writer = WriterFactory.create(config=writer_config)
|
|
25
|
+
writer.write(StreamEntry(entry))
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@shared_task(ignore_result=True)
|
|
29
|
+
def write_many_entry(writer_config, entries):
|
|
30
|
+
"""Write many entries.
|
|
31
|
+
|
|
32
|
+
:param writer: writer configuration as accepted by the WriterFactory.
|
|
33
|
+
:param entry: lisf ot dictionaries, StreamEntry is not serializable.
|
|
34
|
+
"""
|
|
35
|
+
writer = WriterFactory.create(config=writer_config)
|
|
36
|
+
stream_entries = [StreamEntry(entry) for entry in entries]
|
|
37
|
+
writer.write_many(stream_entries)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
|
-
# Copyright (C) 2021-
|
|
3
|
+
# Copyright (C) 2021-2024 CERN.
|
|
4
4
|
#
|
|
5
5
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
6
|
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
@@ -9,11 +9,11 @@
|
|
|
9
9
|
"""Transformers module."""
|
|
10
10
|
|
|
11
11
|
from abc import ABC, abstractmethod
|
|
12
|
-
from collections import defaultdict
|
|
13
12
|
|
|
14
13
|
from lxml import etree
|
|
15
14
|
|
|
16
15
|
from .errors import TransformerError
|
|
16
|
+
from .xml import etree_to_dict
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
class BaseTransformer(ABC):
|
|
@@ -32,42 +32,32 @@ class BaseTransformer(ABC):
|
|
|
32
32
|
class XMLTransformer(BaseTransformer):
|
|
33
33
|
"""XML transformer."""
|
|
34
34
|
|
|
35
|
+
def __init__(self, root_element=None, *args, **kwargs):
|
|
36
|
+
"""Initializes the transformer."""
|
|
37
|
+
self.root_element = root_element
|
|
38
|
+
super().__init__(*args, **kwargs)
|
|
39
|
+
|
|
35
40
|
@classmethod
|
|
36
41
|
def _xml_to_etree(cls, xml):
|
|
37
42
|
"""Converts XML to a lxml etree."""
|
|
38
43
|
return etree.HTML(xml)
|
|
39
44
|
|
|
40
|
-
@classmethod
|
|
41
|
-
def _etree_to_dict(cls, tree):
|
|
42
|
-
d = {tree.tag: {} if tree.attrib else None}
|
|
43
|
-
children = list(tree)
|
|
44
|
-
if children:
|
|
45
|
-
dd = defaultdict(list)
|
|
46
|
-
for dc in map(cls._etree_to_dict, children):
|
|
47
|
-
for k, v in dc.items():
|
|
48
|
-
dd[k].append(v)
|
|
49
|
-
d = {tree.tag: {k: v[0] if len(v) == 1 else v for k, v in dd.items()}}
|
|
50
|
-
if tree.attrib:
|
|
51
|
-
d[tree.tag].update(("@" + k, v) for k, v in tree.attrib.items())
|
|
52
|
-
if tree.text:
|
|
53
|
-
text = tree.text.strip()
|
|
54
|
-
if children or tree.attrib:
|
|
55
|
-
if text:
|
|
56
|
-
d[tree.tag]["#text"] = text
|
|
57
|
-
else:
|
|
58
|
-
d[tree.tag] = text
|
|
59
|
-
return d
|
|
60
|
-
|
|
61
45
|
def apply(self, stream_entry, **kwargs):
|
|
62
46
|
"""Applies the transformation to the stream entry.
|
|
63
47
|
|
|
64
48
|
Requires the root element to be named "record".
|
|
65
49
|
"""
|
|
66
50
|
xml_tree = self._xml_to_etree(stream_entry.entry)
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
if
|
|
70
|
-
|
|
51
|
+
xml_dict = etree_to_dict(xml_tree)["html"]["body"]
|
|
52
|
+
|
|
53
|
+
if self.root_element:
|
|
54
|
+
record = xml_dict.get(self.root_element)
|
|
55
|
+
if not record:
|
|
56
|
+
raise TransformerError(
|
|
57
|
+
f"Root element '{self.root_element}' not found in XML entry."
|
|
58
|
+
)
|
|
59
|
+
else:
|
|
60
|
+
record = xml_dict
|
|
71
61
|
|
|
72
62
|
stream_entry.entry = record
|
|
73
63
|
return stream_entry
|