invenio-vocabularies 9.1.2__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invenio_vocabularies/__init__.py +16 -0
- invenio_vocabularies/administration/__init__.py +10 -0
- invenio_vocabularies/administration/views/__init__.py +10 -0
- invenio_vocabularies/administration/views/vocabularies.py +43 -0
- invenio_vocabularies/alembic/17c703ce1eb7_create_names_table.py +54 -0
- invenio_vocabularies/alembic/3ba812d80559_add_internal_name_id.py +36 -0
- invenio_vocabularies/alembic/4a9a4fd235f8_create_vocabulary_schemes.py +37 -0
- invenio_vocabularies/alembic/4f365fced43f_create_vocabularies_tables.py +92 -0
- invenio_vocabularies/alembic/55a700f897b6_add_names_and_afiliations_pid_column.py +96 -0
- invenio_vocabularies/alembic/6312f33645c1_create_affiliations_table.py +54 -0
- invenio_vocabularies/alembic/676dd587542d_create_funders_vocabulary_table.py +58 -0
- invenio_vocabularies/alembic/8ff82dfb0be8_create_vocabularies_branch.py +28 -0
- invenio_vocabularies/alembic/__init__.py +9 -0
- invenio_vocabularies/alembic/af2457652217_drop_unique_constraint_from_internal_id.py +37 -0
- invenio_vocabularies/alembic/e1146238edd3_create_awards_table.py +56 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/.eslintrc.yml +11 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/.prettierrc +1 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/index.js +7 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/package.json +25 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/AwardResults.js +95 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/CustomAwardForm.js +139 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FunderDropdown.js +87 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingField.js +244 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingField.test.js +1 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingFieldItem.js +152 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingModal.js +246 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/NoAwardResults.js +37 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/index.js +8 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/index.js +7 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/index.js +7 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/index.js +7 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/i18next.js +36 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/_generatedTranslations.js +66 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ar/messages.po +96 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ar/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/bg/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/bg/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ca/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ca/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/cs/messages.po +97 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/cs/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/da/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/da/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/de/messages.po +98 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/de/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/el/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/el/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/en/messages.po +88 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/en/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/es/messages.po +96 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/es/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/et/messages.po +95 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/et/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/fa/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/fa/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/fr/messages.po +96 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/fr/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/hr/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/hr/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/hu/messages.po +96 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/hu/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/index.js +24 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/it/messages.po +96 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/it/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ja/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ja/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ka/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ka/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ko/messages.po +90 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ko/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/lt/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/lt/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/no/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/no/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/pl/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/pl/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/pt/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/pt/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ro/messages.po +95 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ro/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ru/messages.po +95 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ru/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/sk/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/sk/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/sv/messages.po +98 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/sv/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/tr/messages.po +96 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/tr/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/uk/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/uk/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/zh_CN/messages.po +96 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/zh_CN/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/zh_TW/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/zh_TW/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/package.json +19 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/translations.pot +88 -0
- invenio_vocabularies/cli.py +175 -0
- invenio_vocabularies/config.py +231 -0
- invenio_vocabularies/contrib/__init__.py +9 -0
- invenio_vocabularies/contrib/affiliations/__init__.py +20 -0
- invenio_vocabularies/contrib/affiliations/affiliations.py +61 -0
- invenio_vocabularies/contrib/affiliations/api.py +13 -0
- invenio_vocabularies/contrib/affiliations/config.py +79 -0
- invenio_vocabularies/contrib/affiliations/datastreams.py +301 -0
- invenio_vocabularies/contrib/affiliations/facets.py +36 -0
- invenio_vocabularies/contrib/affiliations/jsonschemas/__init__.py +9 -0
- invenio_vocabularies/contrib/affiliations/jsonschemas/affiliations/affiliation-v1.0.0.json +63 -0
- invenio_vocabularies/contrib/affiliations/mappings/__init__.py +10 -0
- invenio_vocabularies/contrib/affiliations/mappings/os-v1/__init__.py +9 -0
- invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v1.0.0.json +112 -0
- invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v2.0.0.json +171 -0
- invenio_vocabularies/contrib/affiliations/mappings/os-v2/__init__.py +9 -0
- invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v1.0.0.json +112 -0
- invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v2.0.0.json +171 -0
- invenio_vocabularies/contrib/affiliations/mappings/v7/__init__.py +9 -0
- invenio_vocabularies/contrib/affiliations/mappings/v7/affiliations/affiliation-v1.0.0.json +112 -0
- invenio_vocabularies/contrib/affiliations/models.py +13 -0
- invenio_vocabularies/contrib/affiliations/resources.py +16 -0
- invenio_vocabularies/contrib/affiliations/schema.py +71 -0
- invenio_vocabularies/contrib/affiliations/services.py +15 -0
- invenio_vocabularies/contrib/awards/__init__.py +19 -0
- invenio_vocabularies/contrib/awards/api.py +13 -0
- invenio_vocabularies/contrib/awards/awards.py +96 -0
- invenio_vocabularies/contrib/awards/config.py +59 -0
- invenio_vocabularies/contrib/awards/datastreams.py +372 -0
- invenio_vocabularies/contrib/awards/jsonschemas/__init__.py +9 -0
- invenio_vocabularies/contrib/awards/jsonschemas/awards/award-v1.0.0.json +91 -0
- invenio_vocabularies/contrib/awards/mappings/__init__.py +9 -0
- invenio_vocabularies/contrib/awards/mappings/os-v1/__init__.py +9 -0
- invenio_vocabularies/contrib/awards/mappings/os-v1/awards/award-v1.0.0.json +147 -0
- invenio_vocabularies/contrib/awards/mappings/os-v2/__init__.py +9 -0
- invenio_vocabularies/contrib/awards/mappings/os-v2/awards/award-v1.0.0.json +147 -0
- invenio_vocabularies/contrib/awards/mappings/v7/__init__.py +9 -0
- invenio_vocabularies/contrib/awards/mappings/v7/awards/award-v1.0.0.json +147 -0
- invenio_vocabularies/contrib/awards/models.py +13 -0
- invenio_vocabularies/contrib/awards/resources.py +16 -0
- invenio_vocabularies/contrib/awards/schema.py +119 -0
- invenio_vocabularies/contrib/awards/serializer.py +47 -0
- invenio_vocabularies/contrib/awards/services.py +15 -0
- invenio_vocabularies/contrib/common/__init__.py +9 -0
- invenio_vocabularies/contrib/common/openaire/__init__.py +9 -0
- invenio_vocabularies/contrib/common/openaire/datastreams.py +84 -0
- invenio_vocabularies/contrib/common/ror/__init__.py +9 -0
- invenio_vocabularies/contrib/common/ror/datastreams.py +230 -0
- invenio_vocabularies/contrib/funders/__init__.py +19 -0
- invenio_vocabularies/contrib/funders/api.py +13 -0
- invenio_vocabularies/contrib/funders/config.py +78 -0
- invenio_vocabularies/contrib/funders/datastreams.py +97 -0
- invenio_vocabularies/contrib/funders/facets.py +36 -0
- invenio_vocabularies/contrib/funders/funders.py +72 -0
- invenio_vocabularies/contrib/funders/jsonschemas/__init__.py +9 -0
- invenio_vocabularies/contrib/funders/jsonschemas/funders/funder-v1.0.0.json +65 -0
- invenio_vocabularies/contrib/funders/mappings/__init__.py +9 -0
- invenio_vocabularies/contrib/funders/mappings/os-v1/__init__.py +9 -0
- invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v1.0.0.json +90 -0
- invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v2.0.0.json +156 -0
- invenio_vocabularies/contrib/funders/mappings/os-v2/__init__.py +9 -0
- invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v1.0.0.json +90 -0
- invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v2.0.0.json +156 -0
- invenio_vocabularies/contrib/funders/mappings/v7/__init__.py +9 -0
- invenio_vocabularies/contrib/funders/mappings/v7/funders/funder-v1.0.0.json +90 -0
- invenio_vocabularies/contrib/funders/models.py +13 -0
- invenio_vocabularies/contrib/funders/resources.py +16 -0
- invenio_vocabularies/contrib/funders/schema.py +88 -0
- invenio_vocabularies/contrib/funders/serializer.py +33 -0
- invenio_vocabularies/contrib/funders/services.py +15 -0
- invenio_vocabularies/contrib/names/__init__.py +19 -0
- invenio_vocabularies/contrib/names/api.py +13 -0
- invenio_vocabularies/contrib/names/components.py +24 -0
- invenio_vocabularies/contrib/names/config.py +75 -0
- invenio_vocabularies/contrib/names/datastreams.py +483 -0
- invenio_vocabularies/contrib/names/jsonschemas/__init__.py +9 -0
- invenio_vocabularies/contrib/names/jsonschemas/names/name-v1.0.0.json +68 -0
- invenio_vocabularies/contrib/names/mappings/__init__.py +9 -0
- invenio_vocabularies/contrib/names/mappings/os-v1/__init__.py +9 -0
- invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v1.0.0.json +101 -0
- invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v2.0.0.json +165 -0
- invenio_vocabularies/contrib/names/mappings/os-v2/__init__.py +9 -0
- invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v1.0.0.json +101 -0
- invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v2.0.0.json +165 -0
- invenio_vocabularies/contrib/names/mappings/v7/__init__.py +9 -0
- invenio_vocabularies/contrib/names/mappings/v7/names/name-v1.0.0.json +101 -0
- invenio_vocabularies/contrib/names/models.py +13 -0
- invenio_vocabularies/contrib/names/names.py +80 -0
- invenio_vocabularies/contrib/names/permissions.py +30 -0
- invenio_vocabularies/contrib/names/resources.py +54 -0
- invenio_vocabularies/contrib/names/s3client.py +50 -0
- invenio_vocabularies/contrib/names/schema.py +121 -0
- invenio_vocabularies/contrib/names/services.py +64 -0
- invenio_vocabularies/contrib/subjects/__init__.py +22 -0
- invenio_vocabularies/contrib/subjects/api.py +14 -0
- invenio_vocabularies/contrib/subjects/config.py +90 -0
- invenio_vocabularies/contrib/subjects/datastreams.py +63 -0
- invenio_vocabularies/contrib/subjects/euroscivoc/__init__.py +9 -0
- invenio_vocabularies/contrib/subjects/euroscivoc/datastreams.py +101 -0
- invenio_vocabularies/contrib/subjects/facets.py +23 -0
- invenio_vocabularies/contrib/subjects/gemet/__init__.py +9 -0
- invenio_vocabularies/contrib/subjects/gemet/datastreams.py +140 -0
- invenio_vocabularies/contrib/subjects/jsonschemas/__init__.py +10 -0
- invenio_vocabularies/contrib/subjects/jsonschemas/subjects/subject-v1.0.0.json +69 -0
- invenio_vocabularies/contrib/subjects/mappings/__init__.py +9 -0
- invenio_vocabularies/contrib/subjects/mappings/os-v1/__init__.py +9 -0
- invenio_vocabularies/contrib/subjects/mappings/os-v1/subjects/subject-v1.0.0.json +96 -0
- invenio_vocabularies/contrib/subjects/mappings/os-v2/__init__.py +9 -0
- invenio_vocabularies/contrib/subjects/mappings/os-v2/subjects/subject-v1.0.0.json +96 -0
- invenio_vocabularies/contrib/subjects/mappings/v7/__init__.py +9 -0
- invenio_vocabularies/contrib/subjects/mappings/v7/subjects/subject-v1.0.0.json +96 -0
- invenio_vocabularies/contrib/subjects/mesh/__init__.py +9 -0
- invenio_vocabularies/contrib/subjects/mesh/datastreams.py +48 -0
- invenio_vocabularies/contrib/subjects/models.py +14 -0
- invenio_vocabularies/contrib/subjects/nvs/__init__.py +9 -0
- invenio_vocabularies/contrib/subjects/nvs/datastreams.py +114 -0
- invenio_vocabularies/contrib/subjects/resources.py +17 -0
- invenio_vocabularies/contrib/subjects/schema.py +101 -0
- invenio_vocabularies/contrib/subjects/services.py +30 -0
- invenio_vocabularies/contrib/subjects/subjects.py +55 -0
- invenio_vocabularies/datastreams/__init__.py +18 -0
- invenio_vocabularies/datastreams/datastreams.py +239 -0
- invenio_vocabularies/datastreams/errors.py +29 -0
- invenio_vocabularies/datastreams/factories.py +86 -0
- invenio_vocabularies/datastreams/readers.py +448 -0
- invenio_vocabularies/datastreams/tasks.py +115 -0
- invenio_vocabularies/datastreams/transformers.py +130 -0
- invenio_vocabularies/datastreams/writers.py +222 -0
- invenio_vocabularies/datastreams/xml.py +34 -0
- invenio_vocabularies/ext.py +179 -0
- invenio_vocabularies/factories.py +193 -0
- invenio_vocabularies/fixtures.py +52 -0
- invenio_vocabularies/jobs.py +207 -0
- invenio_vocabularies/proxies.py +27 -0
- invenio_vocabularies/records/__init__.py +9 -0
- invenio_vocabularies/records/api.py +53 -0
- invenio_vocabularies/records/jsonschemas/__init__.py +9 -0
- invenio_vocabularies/records/jsonschemas/vocabularies/definitions-v1.0.0.json +30 -0
- invenio_vocabularies/records/jsonschemas/vocabularies/vocabulary-v1.0.0.json +55 -0
- invenio_vocabularies/records/mappings/__init__.py +9 -0
- invenio_vocabularies/records/mappings/os-v1/__init__.py +9 -0
- invenio_vocabularies/records/mappings/os-v1/vocabularies/vocabulary-v1.0.0.json +109 -0
- invenio_vocabularies/records/mappings/os-v2/__init__.py +9 -0
- invenio_vocabularies/records/mappings/os-v2/vocabularies/vocabulary-v1.0.0.json +109 -0
- invenio_vocabularies/records/mappings/v7/__init__.py +9 -0
- invenio_vocabularies/records/mappings/v7/vocabularies/vocabulary-v1.0.0.json +109 -0
- invenio_vocabularies/records/models.py +90 -0
- invenio_vocabularies/records/pidprovider.py +118 -0
- invenio_vocabularies/records/systemfields/__init__.py +16 -0
- invenio_vocabularies/records/systemfields/pid.py +125 -0
- invenio_vocabularies/records/systemfields/relations.py +51 -0
- invenio_vocabularies/resources/__init__.py +23 -0
- invenio_vocabularies/resources/config.py +105 -0
- invenio_vocabularies/resources/resource.py +156 -0
- invenio_vocabularies/resources/schema.py +21 -0
- invenio_vocabularies/resources/serializer.py +39 -0
- invenio_vocabularies/services/__init__.py +19 -0
- invenio_vocabularies/services/components.py +58 -0
- invenio_vocabularies/services/config.py +173 -0
- invenio_vocabularies/services/custom_fields/__init__.py +17 -0
- invenio_vocabularies/services/custom_fields/subject.py +82 -0
- invenio_vocabularies/services/custom_fields/vocabulary.py +96 -0
- invenio_vocabularies/services/facets.py +114 -0
- invenio_vocabularies/services/generators.py +38 -0
- invenio_vocabularies/services/permissions.py +30 -0
- invenio_vocabularies/services/querystr.py +57 -0
- invenio_vocabularies/services/results.py +110 -0
- invenio_vocabularies/services/schema.py +163 -0
- invenio_vocabularies/services/service.py +189 -0
- invenio_vocabularies/services/tasks.py +38 -0
- invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/subjects.html +23 -0
- invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabularies-list.html +12 -0
- invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabulary-details.html +71 -0
- invenio_vocabularies/translations/ar/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ar/LC_MESSAGES/messages.po +277 -0
- invenio_vocabularies/translations/bg/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/bg/LC_MESSAGES/messages.po +275 -0
- invenio_vocabularies/translations/ca/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ca/LC_MESSAGES/messages.po +276 -0
- invenio_vocabularies/translations/cs/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/cs/LC_MESSAGES/messages.po +281 -0
- invenio_vocabularies/translations/da/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/da/LC_MESSAGES/messages.po +271 -0
- invenio_vocabularies/translations/de/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/de/LC_MESSAGES/messages.po +293 -0
- invenio_vocabularies/translations/el/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/el/LC_MESSAGES/messages.po +275 -0
- invenio_vocabularies/translations/es/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/es/LC_MESSAGES/messages.po +281 -0
- invenio_vocabularies/translations/et/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/et/LC_MESSAGES/messages.po +276 -0
- invenio_vocabularies/translations/fa/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/fa/LC_MESSAGES/messages.po +275 -0
- invenio_vocabularies/translations/fr/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/fr/LC_MESSAGES/messages.po +279 -0
- invenio_vocabularies/translations/hr/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/hr/LC_MESSAGES/messages.po +275 -0
- invenio_vocabularies/translations/hu/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/hu/LC_MESSAGES/messages.po +280 -0
- invenio_vocabularies/translations/it/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/it/LC_MESSAGES/messages.po +277 -0
- invenio_vocabularies/translations/ja/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ja/LC_MESSAGES/messages.po +275 -0
- invenio_vocabularies/translations/ka/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ka/LC_MESSAGES/messages.po +275 -0
- invenio_vocabularies/translations/ko/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ko/LC_MESSAGES/messages.po +275 -0
- invenio_vocabularies/translations/lt/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/lt/LC_MESSAGES/messages.po +275 -0
- invenio_vocabularies/translations/messages.pot +270 -0
- invenio_vocabularies/translations/no/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/no/LC_MESSAGES/messages.po +275 -0
- invenio_vocabularies/translations/pl/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/pl/LC_MESSAGES/messages.po +275 -0
- invenio_vocabularies/translations/pt/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/pt/LC_MESSAGES/messages.po +275 -0
- invenio_vocabularies/translations/ro/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ro/LC_MESSAGES/messages.po +280 -0
- invenio_vocabularies/translations/ru/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ru/LC_MESSAGES/messages.po +276 -0
- invenio_vocabularies/translations/sk/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/sk/LC_MESSAGES/messages.po +276 -0
- invenio_vocabularies/translations/sv/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/sv/LC_MESSAGES/messages.po +280 -0
- invenio_vocabularies/translations/tr/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/tr/LC_MESSAGES/messages.po +277 -0
- invenio_vocabularies/translations/uk/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/uk/LC_MESSAGES/messages.po +275 -0
- invenio_vocabularies/translations/zh_CN/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/zh_CN/LC_MESSAGES/messages.po +276 -0
- invenio_vocabularies/translations/zh_TW/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/zh_TW/LC_MESSAGES/messages.po +275 -0
- invenio_vocabularies/views.py +53 -0
- invenio_vocabularies/webpack.py +51 -0
- invenio_vocabularies-9.1.2.dist-info/METADATA +517 -0
- invenio_vocabularies-9.1.2.dist-info/RECORD +337 -0
- invenio_vocabularies-9.1.2.dist-info/WHEEL +6 -0
- invenio_vocabularies-9.1.2.dist-info/entry_points.txt +73 -0
- invenio_vocabularies-9.1.2.dist-info/licenses/AUTHORS.rst +13 -0
- invenio_vocabularies-9.1.2.dist-info/licenses/LICENSE +21 -0
- invenio_vocabularies-9.1.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
#
|
|
3
|
+
# Copyright (C) 2021-2024 CERN.
|
|
4
|
+
#
|
|
5
|
+
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
|
+
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
7
|
+
# details.
|
|
8
|
+
|
|
9
|
+
"""Data stream factory."""
|
|
10
|
+
|
|
11
|
+
from flask import current_app
|
|
12
|
+
|
|
13
|
+
from .datastreams import DataStream
|
|
14
|
+
from .errors import FactoryError
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class OptionsConfigMixin:
|
|
18
|
+
"""Options from config mixin."""
|
|
19
|
+
|
|
20
|
+
CONFIG_VAR = None
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def options(cls):
|
|
24
|
+
"""Reads the possible options form config."""
|
|
25
|
+
return current_app.config.get(cls.CONFIG_VAR, {})
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Factory:
|
|
29
|
+
"""Factory."""
|
|
30
|
+
|
|
31
|
+
FACTORY_NAME = None
|
|
32
|
+
|
|
33
|
+
@classmethod
|
|
34
|
+
def create(cls, config):
|
|
35
|
+
"""Creats a factory from config."""
|
|
36
|
+
try:
|
|
37
|
+
type_ = config["type"]
|
|
38
|
+
args = config.get("args", {})
|
|
39
|
+
return cls.options()[type_](**args)
|
|
40
|
+
except KeyError:
|
|
41
|
+
raise FactoryError(name=cls.FACTORY_NAME, key=type_)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class WriterFactory(Factory, OptionsConfigMixin):
|
|
45
|
+
"""Writer factory."""
|
|
46
|
+
|
|
47
|
+
FACTORY_NAME = "Writer"
|
|
48
|
+
CONFIG_VAR = "VOCABULARIES_DATASTREAM_WRITERS"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class ReaderFactory(Factory, OptionsConfigMixin):
|
|
52
|
+
"""Reader factory."""
|
|
53
|
+
|
|
54
|
+
FACTORY_NAME = "Reader"
|
|
55
|
+
CONFIG_VAR = "VOCABULARIES_DATASTREAM_READERS"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class TransformerFactory(Factory, OptionsConfigMixin):
|
|
59
|
+
"""Transformer factory."""
|
|
60
|
+
|
|
61
|
+
FACTORY_NAME = "Transformer"
|
|
62
|
+
CONFIG_VAR = "VOCABULARIES_DATASTREAM_TRANSFORMERS"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class DataStreamFactory:
|
|
66
|
+
"""Data streams factory."""
|
|
67
|
+
|
|
68
|
+
@classmethod
|
|
69
|
+
def create(cls, readers_config, writers_config, transformers_config=None, **kwargs):
|
|
70
|
+
"""Creates a data stream based on the config."""
|
|
71
|
+
readers = []
|
|
72
|
+
for r_conf in readers_config:
|
|
73
|
+
readers.append(ReaderFactory.create(r_conf))
|
|
74
|
+
|
|
75
|
+
writers = []
|
|
76
|
+
for w_conf in writers_config:
|
|
77
|
+
writers.append(WriterFactory.create(w_conf))
|
|
78
|
+
|
|
79
|
+
transformers = []
|
|
80
|
+
if transformers_config:
|
|
81
|
+
for t_conf in transformers_config:
|
|
82
|
+
transformers.append(TransformerFactory.create(t_conf))
|
|
83
|
+
|
|
84
|
+
return DataStream(
|
|
85
|
+
readers=readers, writers=writers, transformers=transformers, **kwargs
|
|
86
|
+
)
|
|
@@ -0,0 +1,448 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
#
|
|
3
|
+
# Copyright (C) 2021-2024 CERN.
|
|
4
|
+
# Copyright (C) 2024 University of Münster.
|
|
5
|
+
#
|
|
6
|
+
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
8
|
+
# details.
|
|
9
|
+
|
|
10
|
+
"""Readers module."""
|
|
11
|
+
|
|
12
|
+
import csv
|
|
13
|
+
import gzip
|
|
14
|
+
import io
|
|
15
|
+
import json
|
|
16
|
+
import re
|
|
17
|
+
import tarfile
|
|
18
|
+
import zipfile
|
|
19
|
+
from abc import ABC, abstractmethod
|
|
20
|
+
from json.decoder import JSONDecodeError
|
|
21
|
+
|
|
22
|
+
import requests
|
|
23
|
+
import yaml
|
|
24
|
+
from lxml import etree
|
|
25
|
+
from lxml.html import fromstring
|
|
26
|
+
from lxml.html import parse as html_parse
|
|
27
|
+
|
|
28
|
+
from .errors import ReaderError
|
|
29
|
+
from .xml import etree_to_dict
|
|
30
|
+
|
|
31
|
+
# Extras dependencies
|
|
32
|
+
# "oaipmh"
|
|
33
|
+
try:
|
|
34
|
+
import oaipmh_scythe
|
|
35
|
+
except ImportError:
|
|
36
|
+
oaipmh_scythe = None
|
|
37
|
+
|
|
38
|
+
# "rdf"
|
|
39
|
+
try:
|
|
40
|
+
import rdflib
|
|
41
|
+
except ImportError:
|
|
42
|
+
rdflib = None
|
|
43
|
+
|
|
44
|
+
# "sparql"
|
|
45
|
+
try:
|
|
46
|
+
import SPARQLWrapper as sparql
|
|
47
|
+
except ImportError:
|
|
48
|
+
sparql = None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class BaseReader(ABC):
|
|
52
|
+
"""Base reader."""
|
|
53
|
+
|
|
54
|
+
def __init__(self, origin=None, mode="r", *args, **kwargs):
|
|
55
|
+
"""Constructor.
|
|
56
|
+
|
|
57
|
+
:param origin: Data source (e.g. filepath).
|
|
58
|
+
Can be none in case of piped readers.
|
|
59
|
+
"""
|
|
60
|
+
self._origin = origin
|
|
61
|
+
self._mode = mode
|
|
62
|
+
|
|
63
|
+
@abstractmethod
|
|
64
|
+
def _iter(self, fp, *args, **kwargs):
|
|
65
|
+
"""Yields data objects file pointer."""
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
def read(self, item=None, *args, **kwargs):
|
|
69
|
+
"""Reads from item or opens the file descriptor from origin."""
|
|
70
|
+
if item:
|
|
71
|
+
yield from self._iter(fp=item, *args, **kwargs)
|
|
72
|
+
else:
|
|
73
|
+
with open(self._origin, self._mode) as file:
|
|
74
|
+
yield from self._iter(fp=file, *args, **kwargs)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class YamlReader(BaseReader):
|
|
78
|
+
"""Yaml reader."""
|
|
79
|
+
|
|
80
|
+
def _iter(self, fp, *args, **kwargs):
|
|
81
|
+
"""Reads a yaml file and returns a dictionary per element."""
|
|
82
|
+
data = yaml.safe_load(fp) or []
|
|
83
|
+
for entry in data:
|
|
84
|
+
yield entry
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class TarReader(BaseReader):
|
|
88
|
+
"""Tar reader."""
|
|
89
|
+
|
|
90
|
+
def __init__(self, *args, mode="r|gz", regex=None, **kwargs):
|
|
91
|
+
"""Constructor."""
|
|
92
|
+
self._regex = re.compile(regex) if regex else None
|
|
93
|
+
super().__init__(*args, mode=mode, **kwargs)
|
|
94
|
+
|
|
95
|
+
def _iter(self, fp, *args, **kwargs):
|
|
96
|
+
"""Iterates through the files in the archive."""
|
|
97
|
+
for member in fp:
|
|
98
|
+
match = not self._regex or self._regex.search(member.name)
|
|
99
|
+
if member.isfile() and match:
|
|
100
|
+
yield fp.extractfile(member)
|
|
101
|
+
|
|
102
|
+
def read(self, item=None, *args, **kwargs):
|
|
103
|
+
"""Opens a tar archive or uses the given file pointer."""
|
|
104
|
+
if item:
|
|
105
|
+
if isinstance(item, tarfile.TarFile):
|
|
106
|
+
yield from self._iter(fp=item, *args, **kwargs)
|
|
107
|
+
else:
|
|
108
|
+
# If the item is not already a TarFile (e.g. if it is a BytesIO), try to create a TarFile from the item.
|
|
109
|
+
with tarfile.open(mode=self._mode, fileobj=item) as archive:
|
|
110
|
+
yield from self._iter(fp=archive, *args, **kwargs)
|
|
111
|
+
else:
|
|
112
|
+
with tarfile.open(self._origin, self._mode) as archive:
|
|
113
|
+
yield from self._iter(fp=archive, *args, **kwargs)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class SimpleHTTPReader(BaseReader):
|
|
117
|
+
"""Simple HTTP Reader."""
|
|
118
|
+
|
|
119
|
+
def __init__(self, origin, id=None, ids=None, content_type=None, *args, **kwargs):
|
|
120
|
+
"""Constructor."""
|
|
121
|
+
self._ids = ids if ids else ([id] if id else None)
|
|
122
|
+
self.content_type = content_type
|
|
123
|
+
super().__init__(origin, *args, **kwargs)
|
|
124
|
+
|
|
125
|
+
def _iter(self, url, *args, **kwargs):
|
|
126
|
+
"""Queries an URL."""
|
|
127
|
+
base_url = url
|
|
128
|
+
headers = {"Accept": self.content_type}
|
|
129
|
+
|
|
130
|
+
# If there are no IDs, query the base URL
|
|
131
|
+
if not self._ids:
|
|
132
|
+
resp = requests.get(url, headers=headers)
|
|
133
|
+
if resp.status_code == 200:
|
|
134
|
+
yield resp.content
|
|
135
|
+
else:
|
|
136
|
+
print(f"Failed to fetch URL {url}: {resp.status_code}")
|
|
137
|
+
else:
|
|
138
|
+
for id_ in self._ids:
|
|
139
|
+
url = base_url.format(id=id_)
|
|
140
|
+
resp = requests.get(url, headers=headers)
|
|
141
|
+
if resp.status_code != 200:
|
|
142
|
+
# todo add logging/fail
|
|
143
|
+
pass
|
|
144
|
+
|
|
145
|
+
yield resp.content
|
|
146
|
+
|
|
147
|
+
def read(self, item=None, *args, **kwargs):
|
|
148
|
+
"""Chooses between item and origin as url."""
|
|
149
|
+
url = item if item else self._origin
|
|
150
|
+
yield from self._iter(url=url, *args, **kwargs)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class ZipReader(BaseReader):
|
|
154
|
+
"""ZIP reader."""
|
|
155
|
+
|
|
156
|
+
def __init__(self, *args, options=None, regex=None, **kwargs):
|
|
157
|
+
"""Constructor."""
|
|
158
|
+
self._options = options or {}
|
|
159
|
+
self._regex = re.compile(regex) if regex else None
|
|
160
|
+
super().__init__(*args, **kwargs)
|
|
161
|
+
|
|
162
|
+
def _iter(self, fp, *args, **kwargs):
|
|
163
|
+
"""Iterates through the files in the archive."""
|
|
164
|
+
for member in fp.infolist():
|
|
165
|
+
match = not self._regex or self._regex.search(member.filename)
|
|
166
|
+
if not member.is_dir() and match:
|
|
167
|
+
yield fp.open(member)
|
|
168
|
+
|
|
169
|
+
def read(self, item=None, *args, **kwargs):
|
|
170
|
+
"""Opens a Zip archive or uses the given file pointer."""
|
|
171
|
+
# https://docs.python.org/3/library/zipfile.html
|
|
172
|
+
if item:
|
|
173
|
+
if isinstance(item, zipfile.ZipFile):
|
|
174
|
+
yield from self._iter(fp=item, *args, **kwargs)
|
|
175
|
+
else:
|
|
176
|
+
# If the item is not already a ZipFile (e.g. if it is a BytesIO), try to create a ZipFile from the item.
|
|
177
|
+
with zipfile.ZipFile(item, **self._options) as archive:
|
|
178
|
+
yield from self._iter(fp=archive, *args, **kwargs)
|
|
179
|
+
else:
|
|
180
|
+
with zipfile.ZipFile(self._origin, **self._options) as archive:
|
|
181
|
+
yield from self._iter(fp=archive, *args, **kwargs)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
class JsonReader(BaseReader):
|
|
185
|
+
"""JSON object reader."""
|
|
186
|
+
|
|
187
|
+
def _iter(self, fp, *args, **kwargs):
|
|
188
|
+
"""Reads (loads) a json object and yields its items."""
|
|
189
|
+
try:
|
|
190
|
+
entries = json.load(fp)
|
|
191
|
+
if isinstance(entries, list):
|
|
192
|
+
for entry in entries:
|
|
193
|
+
yield entry
|
|
194
|
+
else:
|
|
195
|
+
yield entries # just one entry
|
|
196
|
+
except JSONDecodeError as err:
|
|
197
|
+
raise ReaderError(f"Cannot decode JSON file {fp.name}: {str(err)}")
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
class JsonLinesReader(BaseReader):
|
|
201
|
+
"""JSON Lines reader."""
|
|
202
|
+
|
|
203
|
+
def _iter(self, fp, *args, **kwargs):
|
|
204
|
+
for idx, line in enumerate(fp):
|
|
205
|
+
try:
|
|
206
|
+
data = json.loads(line)
|
|
207
|
+
if isinstance(data, list):
|
|
208
|
+
for entry in data:
|
|
209
|
+
yield entry
|
|
210
|
+
else:
|
|
211
|
+
yield data # just one entry
|
|
212
|
+
except JSONDecodeError as err:
|
|
213
|
+
raise ReaderError(
|
|
214
|
+
f"Cannot decode JSON line {fp.name}:{idx}: {str(err)}"
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
class GzipReader(BaseReader):
|
|
219
|
+
"""Gzip reader."""
|
|
220
|
+
|
|
221
|
+
def _iter(self, fp, *args, **kwargs):
|
|
222
|
+
if isinstance(fp, bytes):
|
|
223
|
+
fp = io.BytesIO(fp)
|
|
224
|
+
|
|
225
|
+
with gzip.open(fp) as gp:
|
|
226
|
+
yield gp
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
class CSVReader(BaseReader):
|
|
230
|
+
"""Reads a CSV file and returns a dictionary per element."""
|
|
231
|
+
|
|
232
|
+
def __init__(self, *args, csv_options=None, as_dict=True, **kwargs):
|
|
233
|
+
"""Constructor."""
|
|
234
|
+
self.csv_options = csv_options or {}
|
|
235
|
+
self.as_dict = as_dict
|
|
236
|
+
super().__init__(*args, **kwargs)
|
|
237
|
+
|
|
238
|
+
def _iter(self, fp, *args, **kwargs):
|
|
239
|
+
"""Reads a csv file and returns a dictionary per element."""
|
|
240
|
+
csvfile = fp
|
|
241
|
+
if self.as_dict:
|
|
242
|
+
reader = csv.DictReader(csvfile, **self.csv_options)
|
|
243
|
+
else:
|
|
244
|
+
reader = csv.reader(csvfile, **self.csv_options)
|
|
245
|
+
for row in reader:
|
|
246
|
+
yield row
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
class XMLReader(BaseReader):
|
|
250
|
+
"""XML reader."""
|
|
251
|
+
|
|
252
|
+
def __init__(self, root_element=None, *args, **kwargs):
|
|
253
|
+
"""Constructor."""
|
|
254
|
+
self.root_element = root_element
|
|
255
|
+
super().__init__(*args, **kwargs)
|
|
256
|
+
|
|
257
|
+
def _iter(self, fp, *args, **kwargs):
|
|
258
|
+
"""Read and parse an XML file to dict."""
|
|
259
|
+
# NOTE: We parse HTML, to skip XML validation and strip XML namespaces
|
|
260
|
+
record = None
|
|
261
|
+
try:
|
|
262
|
+
xml_tree = fromstring(fp)
|
|
263
|
+
xml_dict = etree_to_dict(xml_tree)
|
|
264
|
+
except Exception:
|
|
265
|
+
xml_tree = html_parse(fp).getroot()
|
|
266
|
+
xml_dict = etree_to_dict(xml_tree)["html"]["body"]
|
|
267
|
+
|
|
268
|
+
if self.root_element:
|
|
269
|
+
record = xml_dict.get(self.root_element)
|
|
270
|
+
if not record:
|
|
271
|
+
raise ReaderError(
|
|
272
|
+
f"Root element '{self.root_element}' not found in XML entry."
|
|
273
|
+
)
|
|
274
|
+
else:
|
|
275
|
+
record = xml_dict
|
|
276
|
+
|
|
277
|
+
yield record
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
class OAIPMHReader(BaseReader):
|
|
281
|
+
"""OAIPMH reader."""
|
|
282
|
+
|
|
283
|
+
def __init__(
|
|
284
|
+
self,
|
|
285
|
+
*args,
|
|
286
|
+
base_url=None,
|
|
287
|
+
metadata_prefix=None,
|
|
288
|
+
set=None,
|
|
289
|
+
from_date=None,
|
|
290
|
+
until_date=None,
|
|
291
|
+
verb=None,
|
|
292
|
+
**kwargs,
|
|
293
|
+
):
|
|
294
|
+
"""Constructor."""
|
|
295
|
+
self._base_url = base_url
|
|
296
|
+
self._metadata_prefix = metadata_prefix if not None else "oai_dc"
|
|
297
|
+
self._set = set
|
|
298
|
+
self._until = until_date
|
|
299
|
+
self._from = from_date
|
|
300
|
+
self._verb = verb if not None else "ListRecords"
|
|
301
|
+
super().__init__(*args, **kwargs)
|
|
302
|
+
|
|
303
|
+
def _iter(self, scythe, *args, **kwargs):
|
|
304
|
+
"""Read and parse an OAIPMH stream to dict."""
|
|
305
|
+
|
|
306
|
+
class OAIRecord(oaipmh_scythe.models.Record):
|
|
307
|
+
"""An XML unpacking implementation for more complicated formats."""
|
|
308
|
+
|
|
309
|
+
def get_metadata(self):
|
|
310
|
+
"""Extract and return the record's metadata as a dictionary."""
|
|
311
|
+
return xml_to_dict(
|
|
312
|
+
self.xml.find(f".//{self._oai_namespace}metadata").getchildren()[0],
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
if self._verb == "ListRecords":
|
|
316
|
+
scythe.class_mapping["ListRecords"] = OAIRecord
|
|
317
|
+
try:
|
|
318
|
+
records = scythe.list_records(
|
|
319
|
+
from_=self._from,
|
|
320
|
+
until=self._until,
|
|
321
|
+
metadata_prefix=self._metadata_prefix,
|
|
322
|
+
set_=self._set,
|
|
323
|
+
ignore_deleted=True,
|
|
324
|
+
)
|
|
325
|
+
for record in records:
|
|
326
|
+
yield {"record": record}
|
|
327
|
+
except oaipmh_scythe.NoRecordsMatch:
|
|
328
|
+
raise ReaderError("No records found in OAI-PMH request.")
|
|
329
|
+
else:
|
|
330
|
+
scythe.class_mapping["GetRecord"] = OAIRecord
|
|
331
|
+
try:
|
|
332
|
+
headers = scythe.list_identifiers(
|
|
333
|
+
from_=self._from,
|
|
334
|
+
until=self._until,
|
|
335
|
+
metadata_prefix=self._metadata_prefix,
|
|
336
|
+
set_=self._set,
|
|
337
|
+
ignore_deleted=True,
|
|
338
|
+
)
|
|
339
|
+
for header in headers:
|
|
340
|
+
record = scythe.get_record(
|
|
341
|
+
identifier=header.identifier,
|
|
342
|
+
metadata_prefix=self._metadata_prefix,
|
|
343
|
+
)
|
|
344
|
+
yield {"record": record}
|
|
345
|
+
except oaipmh_scythe.NoRecordsMatch:
|
|
346
|
+
raise ReaderError("No records found in OAI-PMH request.")
|
|
347
|
+
|
|
348
|
+
def read(self, item=None, *args, **kwargs):
|
|
349
|
+
"""Reads from item or opens the file descriptor from origin."""
|
|
350
|
+
if item:
|
|
351
|
+
raise NotImplementedError(
|
|
352
|
+
"OAIPMHReader does not support being chained after another reader"
|
|
353
|
+
)
|
|
354
|
+
else:
|
|
355
|
+
with oaipmh_scythe.Scythe(self._base_url) as scythe:
|
|
356
|
+
yield from self._iter(scythe=scythe, *args, **kwargs)
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
def xml_to_dict(tree: etree._Element):
|
|
360
|
+
"""Convert an XML tree to a dictionary.
|
|
361
|
+
|
|
362
|
+
This function takes an XML element tree and converts it into a dictionary.
|
|
363
|
+
|
|
364
|
+
Args:
|
|
365
|
+
tree: The root element of the XML tree to be converted.
|
|
366
|
+
|
|
367
|
+
Returns:
|
|
368
|
+
A dictionary with the key "record".
|
|
369
|
+
"""
|
|
370
|
+
dict_obj = dict()
|
|
371
|
+
dict_obj["record"] = etree.tostring(tree)
|
|
372
|
+
|
|
373
|
+
return dict_obj
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
class RDFReader(BaseReader):
|
|
377
|
+
"""Base Reader class to fetch and process RDF data."""
|
|
378
|
+
|
|
379
|
+
@property
|
|
380
|
+
def skos_core(self):
|
|
381
|
+
"""Return the SKOS Core namespace."""
|
|
382
|
+
return rdflib.Namespace("http://www.w3.org/2004/02/skos/core#")
|
|
383
|
+
|
|
384
|
+
def _iter(self, rdf_graph):
|
|
385
|
+
"""Iterate over the RDF graph, yielding one subject at a time."""
|
|
386
|
+
for subject, _, _ in rdf_graph.triples(
|
|
387
|
+
(None, rdflib.RDF.type, self.skos_core.Concept)
|
|
388
|
+
):
|
|
389
|
+
yield {"subject": subject, "rdf_graph": rdf_graph}
|
|
390
|
+
|
|
391
|
+
def read(self, item=None, *args, **kwargs):
|
|
392
|
+
"""Fetch and process the RDF data, yielding it one subject at a time."""
|
|
393
|
+
if isinstance(item, gzip.GzipFile):
|
|
394
|
+
rdf_content = item.read().decode("utf-8")
|
|
395
|
+
|
|
396
|
+
elif isinstance(item, bytes):
|
|
397
|
+
rdf_content = item.decode("utf-8")
|
|
398
|
+
else:
|
|
399
|
+
raise ReaderError("Unsupported content type")
|
|
400
|
+
|
|
401
|
+
rdf_graph = rdflib.Graph()
|
|
402
|
+
rdf_graph.parse(io.StringIO(rdf_content), format="xml")
|
|
403
|
+
|
|
404
|
+
yield from self._iter(rdf_graph)
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
class SPARQLReader(BaseReader):
|
|
408
|
+
"""Generic reader class to fetch and process RDF data from a SPARQL endpoint."""
|
|
409
|
+
|
|
410
|
+
def __init__(self, origin, query, mode="r", client_params=None, *args, **kwargs):
|
|
411
|
+
"""Initialize the reader with the data source.
|
|
412
|
+
|
|
413
|
+
:param origin: The SPARQL endpoint from which to fetch the RDF data.
|
|
414
|
+
:param query: The SPARQL query to execute.
|
|
415
|
+
:param mode: Mode of operation (default is 'r' for reading).
|
|
416
|
+
:param client_params: Additional client parameters to pass to the SPARQL client.
|
|
417
|
+
"""
|
|
418
|
+
self._origin = origin
|
|
419
|
+
self._query = query
|
|
420
|
+
self._client_params = client_params or {}
|
|
421
|
+
|
|
422
|
+
super().__init__(origin=origin, mode=mode, *args, **kwargs)
|
|
423
|
+
|
|
424
|
+
def _iter(self, fp, *args, **kwargs):
|
|
425
|
+
raise NotImplementedError(
|
|
426
|
+
"SPARQLReader downloads one result set from SPARQL and therefore does not iterate through items"
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
def read(self, item=None, *args, **kwargs):
|
|
430
|
+
"""Fetch and process RDF data, yielding results one at a time."""
|
|
431
|
+
if item:
|
|
432
|
+
raise NotImplementedError(
|
|
433
|
+
"SPARQLReader does not support being chained after another reader"
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
# Avoid overwriting SPARQLWrapper's default value for the user agent string
|
|
437
|
+
if self._client_params.get("user_agent"):
|
|
438
|
+
sparql_client = sparql.SPARQLWrapper(
|
|
439
|
+
self._origin, agent=self._client_params.get("user_agent")
|
|
440
|
+
)
|
|
441
|
+
else:
|
|
442
|
+
sparql_client = sparql.SPARQLWrapper(self._origin)
|
|
443
|
+
|
|
444
|
+
sparql_client.setQuery(self._query)
|
|
445
|
+
sparql_client.setReturnFormat(sparql.JSON)
|
|
446
|
+
|
|
447
|
+
results = sparql_client.query().convert()
|
|
448
|
+
yield from results["results"]["bindings"]
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
#
|
|
3
|
+
# Copyright (C) 2022-2024 CERN.
|
|
4
|
+
#
|
|
5
|
+
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
|
+
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
7
|
+
# details.
|
|
8
|
+
|
|
9
|
+
"""Data Streams Celery tasks."""
|
|
10
|
+
|
|
11
|
+
from celery import shared_task
|
|
12
|
+
from flask import current_app
|
|
13
|
+
from invenio_access.permissions import system_identity
|
|
14
|
+
from invenio_jobs.logging.jobs import EMPTY_JOB_CTX, job_context
|
|
15
|
+
from invenio_jobs.proxies import current_runs_service
|
|
16
|
+
|
|
17
|
+
from ..datastreams import StreamEntry
|
|
18
|
+
from ..datastreams.factories import WriterFactory
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@shared_task(ignore_result=True)
|
|
22
|
+
def write_entry(writer_config, entry, subtask_run_id=None):
|
|
23
|
+
"""Write an entry.
|
|
24
|
+
|
|
25
|
+
:param writer: writer configuration as accepted by the WriterFactory.
|
|
26
|
+
:param entry: dictionary, StreamEntry is not serializable.
|
|
27
|
+
"""
|
|
28
|
+
job_ctx = job_context.get()
|
|
29
|
+
job_id = job_ctx.get("job_id", None) if job_ctx is not EMPTY_JOB_CTX else None
|
|
30
|
+
if subtask_run_id and job_id:
|
|
31
|
+
subtask_run = current_runs_service.get(
|
|
32
|
+
system_identity, job_id=job_id, run_id=subtask_run_id
|
|
33
|
+
)
|
|
34
|
+
current_runs_service.start_processing_subtask(
|
|
35
|
+
system_identity, subtask_run.id, job_id=job_id
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
writer = WriterFactory.create(config=writer_config)
|
|
39
|
+
try:
|
|
40
|
+
processed_stream_entry = writer.write(StreamEntry(entry))
|
|
41
|
+
errored_entries_count = 1 if processed_stream_entry.errors else 0
|
|
42
|
+
inserted_count = 1 if processed_stream_entry.op_type == "create" else 0
|
|
43
|
+
updated_count = 1 if processed_stream_entry.op_type == "update" else 0
|
|
44
|
+
if subtask_run_id and job_id:
|
|
45
|
+
current_runs_service.finalize_subtask(
|
|
46
|
+
system_identity,
|
|
47
|
+
subtask_run_id,
|
|
48
|
+
job_id,
|
|
49
|
+
success=True if not processed_stream_entry.errors else False,
|
|
50
|
+
errored_entries_count=errored_entries_count,
|
|
51
|
+
inserted_entries_count=inserted_count,
|
|
52
|
+
updated_entries_count=updated_count,
|
|
53
|
+
)
|
|
54
|
+
except Exception as exc:
|
|
55
|
+
current_app.logger.error(f"Error writing entry {entry}: {exc}")
|
|
56
|
+
if subtask_run_id and job_id:
|
|
57
|
+
current_runs_service.finalize_subtask(
|
|
58
|
+
system_identity,
|
|
59
|
+
subtask_run_id,
|
|
60
|
+
job_id,
|
|
61
|
+
success=False,
|
|
62
|
+
errored_entries_count=1,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@shared_task(ignore_result=True)
|
|
67
|
+
def write_many_entry(writer_config, entries, subtask_run_id=None):
|
|
68
|
+
"""Write many entries.
|
|
69
|
+
|
|
70
|
+
:param writer: writer configuration as accepted by the WriterFactory.
|
|
71
|
+
:param entry: lisf ot dictionaries, StreamEntry is not serializable.
|
|
72
|
+
"""
|
|
73
|
+
job_ctx = job_context.get()
|
|
74
|
+
job_id = job_ctx.get("job_id", None) if job_ctx is not EMPTY_JOB_CTX else None
|
|
75
|
+
if subtask_run_id and job_id:
|
|
76
|
+
subtask_run = current_runs_service.get(
|
|
77
|
+
system_identity, job_id=job_id, run_id=subtask_run_id
|
|
78
|
+
)
|
|
79
|
+
current_runs_service.start_processing_subtask(
|
|
80
|
+
system_identity, subtask_run.id, job_id=job_id
|
|
81
|
+
)
|
|
82
|
+
writer = WriterFactory.create(config=writer_config)
|
|
83
|
+
stream_entries = [StreamEntry(entry) for entry in entries]
|
|
84
|
+
try:
|
|
85
|
+
processed_stream_entries = writer.write_many(stream_entries)
|
|
86
|
+
errored_entries_count = sum(
|
|
87
|
+
1 for entry in processed_stream_entries if entry.errors
|
|
88
|
+
)
|
|
89
|
+
inserted_count = sum(
|
|
90
|
+
1 for entry in processed_stream_entries if entry.op_type == "create"
|
|
91
|
+
)
|
|
92
|
+
updated_count = sum(
|
|
93
|
+
1 for entry in processed_stream_entries if entry.op_type == "update"
|
|
94
|
+
)
|
|
95
|
+
if subtask_run_id and job_id:
|
|
96
|
+
current_runs_service.finalize_subtask(
|
|
97
|
+
system_identity,
|
|
98
|
+
subtask_run_id,
|
|
99
|
+
job_id,
|
|
100
|
+
success=True,
|
|
101
|
+
errored_entries_count=errored_entries_count,
|
|
102
|
+
inserted_entries_count=inserted_count,
|
|
103
|
+
updated_entries_count=updated_count,
|
|
104
|
+
)
|
|
105
|
+
except Exception as exc:
|
|
106
|
+
current_app.logger.error(
|
|
107
|
+
f"Error writing entries {entries}: {exc}. The errorred entries count might be incorrect as an entire batch might have failed"
|
|
108
|
+
)
|
|
109
|
+
if subtask_run_id and job_id:
|
|
110
|
+
current_runs_service.finalize_subtask(
|
|
111
|
+
system_identity,
|
|
112
|
+
subtask_run_id,
|
|
113
|
+
job_id,
|
|
114
|
+
success=False,
|
|
115
|
+
)
|