invenio-vocabularies 9.1.2__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invenio_vocabularies/__init__.py +16 -0
- invenio_vocabularies/administration/__init__.py +10 -0
- invenio_vocabularies/administration/views/__init__.py +10 -0
- invenio_vocabularies/administration/views/vocabularies.py +43 -0
- invenio_vocabularies/alembic/17c703ce1eb7_create_names_table.py +54 -0
- invenio_vocabularies/alembic/3ba812d80559_add_internal_name_id.py +36 -0
- invenio_vocabularies/alembic/4a9a4fd235f8_create_vocabulary_schemes.py +37 -0
- invenio_vocabularies/alembic/4f365fced43f_create_vocabularies_tables.py +92 -0
- invenio_vocabularies/alembic/55a700f897b6_add_names_and_afiliations_pid_column.py +96 -0
- invenio_vocabularies/alembic/6312f33645c1_create_affiliations_table.py +54 -0
- invenio_vocabularies/alembic/676dd587542d_create_funders_vocabulary_table.py +58 -0
- invenio_vocabularies/alembic/8ff82dfb0be8_create_vocabularies_branch.py +28 -0
- invenio_vocabularies/alembic/__init__.py +9 -0
- invenio_vocabularies/alembic/af2457652217_drop_unique_constraint_from_internal_id.py +37 -0
- invenio_vocabularies/alembic/e1146238edd3_create_awards_table.py +56 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/.eslintrc.yml +11 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/.prettierrc +1 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/index.js +7 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/package.json +25 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/AwardResults.js +95 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/CustomAwardForm.js +139 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FunderDropdown.js +87 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingField.js +244 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingField.test.js +1 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingFieldItem.js +152 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingModal.js +246 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/NoAwardResults.js +37 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/index.js +8 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/index.js +7 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/index.js +7 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/index.js +7 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/i18next.js +36 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/_generatedTranslations.js +66 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ar/messages.po +96 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ar/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/bg/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/bg/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ca/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ca/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/cs/messages.po +97 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/cs/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/da/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/da/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/de/messages.po +98 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/de/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/el/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/el/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/en/messages.po +88 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/en/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/es/messages.po +96 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/es/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/et/messages.po +95 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/et/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/fa/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/fa/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/fr/messages.po +96 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/fr/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/hr/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/hr/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/hu/messages.po +96 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/hu/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/index.js +24 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/it/messages.po +96 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/it/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ja/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ja/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ka/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ka/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ko/messages.po +90 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ko/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/lt/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/lt/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/no/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/no/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/pl/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/pl/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/pt/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/pt/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ro/messages.po +95 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ro/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ru/messages.po +95 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ru/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/sk/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/sk/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/sv/messages.po +98 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/sv/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/tr/messages.po +96 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/tr/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/uk/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/uk/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/zh_CN/messages.po +96 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/zh_CN/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/zh_TW/messages.po +94 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/zh_TW/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/package.json +19 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/translations.pot +88 -0
- invenio_vocabularies/cli.py +175 -0
- invenio_vocabularies/config.py +231 -0
- invenio_vocabularies/contrib/__init__.py +9 -0
- invenio_vocabularies/contrib/affiliations/__init__.py +20 -0
- invenio_vocabularies/contrib/affiliations/affiliations.py +61 -0
- invenio_vocabularies/contrib/affiliations/api.py +13 -0
- invenio_vocabularies/contrib/affiliations/config.py +79 -0
- invenio_vocabularies/contrib/affiliations/datastreams.py +301 -0
- invenio_vocabularies/contrib/affiliations/facets.py +36 -0
- invenio_vocabularies/contrib/affiliations/jsonschemas/__init__.py +9 -0
- invenio_vocabularies/contrib/affiliations/jsonschemas/affiliations/affiliation-v1.0.0.json +63 -0
- invenio_vocabularies/contrib/affiliations/mappings/__init__.py +10 -0
- invenio_vocabularies/contrib/affiliations/mappings/os-v1/__init__.py +9 -0
- invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v1.0.0.json +112 -0
- invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v2.0.0.json +171 -0
- invenio_vocabularies/contrib/affiliations/mappings/os-v2/__init__.py +9 -0
- invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v1.0.0.json +112 -0
- invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v2.0.0.json +171 -0
- invenio_vocabularies/contrib/affiliations/mappings/v7/__init__.py +9 -0
- invenio_vocabularies/contrib/affiliations/mappings/v7/affiliations/affiliation-v1.0.0.json +112 -0
- invenio_vocabularies/contrib/affiliations/models.py +13 -0
- invenio_vocabularies/contrib/affiliations/resources.py +16 -0
- invenio_vocabularies/contrib/affiliations/schema.py +71 -0
- invenio_vocabularies/contrib/affiliations/services.py +15 -0
- invenio_vocabularies/contrib/awards/__init__.py +19 -0
- invenio_vocabularies/contrib/awards/api.py +13 -0
- invenio_vocabularies/contrib/awards/awards.py +96 -0
- invenio_vocabularies/contrib/awards/config.py +59 -0
- invenio_vocabularies/contrib/awards/datastreams.py +372 -0
- invenio_vocabularies/contrib/awards/jsonschemas/__init__.py +9 -0
- invenio_vocabularies/contrib/awards/jsonschemas/awards/award-v1.0.0.json +91 -0
- invenio_vocabularies/contrib/awards/mappings/__init__.py +9 -0
- invenio_vocabularies/contrib/awards/mappings/os-v1/__init__.py +9 -0
- invenio_vocabularies/contrib/awards/mappings/os-v1/awards/award-v1.0.0.json +147 -0
- invenio_vocabularies/contrib/awards/mappings/os-v2/__init__.py +9 -0
- invenio_vocabularies/contrib/awards/mappings/os-v2/awards/award-v1.0.0.json +147 -0
- invenio_vocabularies/contrib/awards/mappings/v7/__init__.py +9 -0
- invenio_vocabularies/contrib/awards/mappings/v7/awards/award-v1.0.0.json +147 -0
- invenio_vocabularies/contrib/awards/models.py +13 -0
- invenio_vocabularies/contrib/awards/resources.py +16 -0
- invenio_vocabularies/contrib/awards/schema.py +119 -0
- invenio_vocabularies/contrib/awards/serializer.py +47 -0
- invenio_vocabularies/contrib/awards/services.py +15 -0
- invenio_vocabularies/contrib/common/__init__.py +9 -0
- invenio_vocabularies/contrib/common/openaire/__init__.py +9 -0
- invenio_vocabularies/contrib/common/openaire/datastreams.py +84 -0
- invenio_vocabularies/contrib/common/ror/__init__.py +9 -0
- invenio_vocabularies/contrib/common/ror/datastreams.py +230 -0
- invenio_vocabularies/contrib/funders/__init__.py +19 -0
- invenio_vocabularies/contrib/funders/api.py +13 -0
- invenio_vocabularies/contrib/funders/config.py +78 -0
- invenio_vocabularies/contrib/funders/datastreams.py +97 -0
- invenio_vocabularies/contrib/funders/facets.py +36 -0
- invenio_vocabularies/contrib/funders/funders.py +72 -0
- invenio_vocabularies/contrib/funders/jsonschemas/__init__.py +9 -0
- invenio_vocabularies/contrib/funders/jsonschemas/funders/funder-v1.0.0.json +65 -0
- invenio_vocabularies/contrib/funders/mappings/__init__.py +9 -0
- invenio_vocabularies/contrib/funders/mappings/os-v1/__init__.py +9 -0
- invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v1.0.0.json +90 -0
- invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v2.0.0.json +156 -0
- invenio_vocabularies/contrib/funders/mappings/os-v2/__init__.py +9 -0
- invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v1.0.0.json +90 -0
- invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v2.0.0.json +156 -0
- invenio_vocabularies/contrib/funders/mappings/v7/__init__.py +9 -0
- invenio_vocabularies/contrib/funders/mappings/v7/funders/funder-v1.0.0.json +90 -0
- invenio_vocabularies/contrib/funders/models.py +13 -0
- invenio_vocabularies/contrib/funders/resources.py +16 -0
- invenio_vocabularies/contrib/funders/schema.py +88 -0
- invenio_vocabularies/contrib/funders/serializer.py +33 -0
- invenio_vocabularies/contrib/funders/services.py +15 -0
- invenio_vocabularies/contrib/names/__init__.py +19 -0
- invenio_vocabularies/contrib/names/api.py +13 -0
- invenio_vocabularies/contrib/names/components.py +24 -0
- invenio_vocabularies/contrib/names/config.py +75 -0
- invenio_vocabularies/contrib/names/datastreams.py +483 -0
- invenio_vocabularies/contrib/names/jsonschemas/__init__.py +9 -0
- invenio_vocabularies/contrib/names/jsonschemas/names/name-v1.0.0.json +68 -0
- invenio_vocabularies/contrib/names/mappings/__init__.py +9 -0
- invenio_vocabularies/contrib/names/mappings/os-v1/__init__.py +9 -0
- invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v1.0.0.json +101 -0
- invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v2.0.0.json +165 -0
- invenio_vocabularies/contrib/names/mappings/os-v2/__init__.py +9 -0
- invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v1.0.0.json +101 -0
- invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v2.0.0.json +165 -0
- invenio_vocabularies/contrib/names/mappings/v7/__init__.py +9 -0
- invenio_vocabularies/contrib/names/mappings/v7/names/name-v1.0.0.json +101 -0
- invenio_vocabularies/contrib/names/models.py +13 -0
- invenio_vocabularies/contrib/names/names.py +80 -0
- invenio_vocabularies/contrib/names/permissions.py +30 -0
- invenio_vocabularies/contrib/names/resources.py +54 -0
- invenio_vocabularies/contrib/names/s3client.py +50 -0
- invenio_vocabularies/contrib/names/schema.py +121 -0
- invenio_vocabularies/contrib/names/services.py +64 -0
- invenio_vocabularies/contrib/subjects/__init__.py +22 -0
- invenio_vocabularies/contrib/subjects/api.py +14 -0
- invenio_vocabularies/contrib/subjects/config.py +90 -0
- invenio_vocabularies/contrib/subjects/datastreams.py +63 -0
- invenio_vocabularies/contrib/subjects/euroscivoc/__init__.py +9 -0
- invenio_vocabularies/contrib/subjects/euroscivoc/datastreams.py +101 -0
- invenio_vocabularies/contrib/subjects/facets.py +23 -0
- invenio_vocabularies/contrib/subjects/gemet/__init__.py +9 -0
- invenio_vocabularies/contrib/subjects/gemet/datastreams.py +140 -0
- invenio_vocabularies/contrib/subjects/jsonschemas/__init__.py +10 -0
- invenio_vocabularies/contrib/subjects/jsonschemas/subjects/subject-v1.0.0.json +69 -0
- invenio_vocabularies/contrib/subjects/mappings/__init__.py +9 -0
- invenio_vocabularies/contrib/subjects/mappings/os-v1/__init__.py +9 -0
- invenio_vocabularies/contrib/subjects/mappings/os-v1/subjects/subject-v1.0.0.json +96 -0
- invenio_vocabularies/contrib/subjects/mappings/os-v2/__init__.py +9 -0
- invenio_vocabularies/contrib/subjects/mappings/os-v2/subjects/subject-v1.0.0.json +96 -0
- invenio_vocabularies/contrib/subjects/mappings/v7/__init__.py +9 -0
- invenio_vocabularies/contrib/subjects/mappings/v7/subjects/subject-v1.0.0.json +96 -0
- invenio_vocabularies/contrib/subjects/mesh/__init__.py +9 -0
- invenio_vocabularies/contrib/subjects/mesh/datastreams.py +48 -0
- invenio_vocabularies/contrib/subjects/models.py +14 -0
- invenio_vocabularies/contrib/subjects/nvs/__init__.py +9 -0
- invenio_vocabularies/contrib/subjects/nvs/datastreams.py +114 -0
- invenio_vocabularies/contrib/subjects/resources.py +17 -0
- invenio_vocabularies/contrib/subjects/schema.py +101 -0
- invenio_vocabularies/contrib/subjects/services.py +30 -0
- invenio_vocabularies/contrib/subjects/subjects.py +55 -0
- invenio_vocabularies/datastreams/__init__.py +18 -0
- invenio_vocabularies/datastreams/datastreams.py +239 -0
- invenio_vocabularies/datastreams/errors.py +29 -0
- invenio_vocabularies/datastreams/factories.py +86 -0
- invenio_vocabularies/datastreams/readers.py +448 -0
- invenio_vocabularies/datastreams/tasks.py +115 -0
- invenio_vocabularies/datastreams/transformers.py +130 -0
- invenio_vocabularies/datastreams/writers.py +222 -0
- invenio_vocabularies/datastreams/xml.py +34 -0
- invenio_vocabularies/ext.py +179 -0
- invenio_vocabularies/factories.py +193 -0
- invenio_vocabularies/fixtures.py +52 -0
- invenio_vocabularies/jobs.py +207 -0
- invenio_vocabularies/proxies.py +27 -0
- invenio_vocabularies/records/__init__.py +9 -0
- invenio_vocabularies/records/api.py +53 -0
- invenio_vocabularies/records/jsonschemas/__init__.py +9 -0
- invenio_vocabularies/records/jsonschemas/vocabularies/definitions-v1.0.0.json +30 -0
- invenio_vocabularies/records/jsonschemas/vocabularies/vocabulary-v1.0.0.json +55 -0
- invenio_vocabularies/records/mappings/__init__.py +9 -0
- invenio_vocabularies/records/mappings/os-v1/__init__.py +9 -0
- invenio_vocabularies/records/mappings/os-v1/vocabularies/vocabulary-v1.0.0.json +109 -0
- invenio_vocabularies/records/mappings/os-v2/__init__.py +9 -0
- invenio_vocabularies/records/mappings/os-v2/vocabularies/vocabulary-v1.0.0.json +109 -0
- invenio_vocabularies/records/mappings/v7/__init__.py +9 -0
- invenio_vocabularies/records/mappings/v7/vocabularies/vocabulary-v1.0.0.json +109 -0
- invenio_vocabularies/records/models.py +90 -0
- invenio_vocabularies/records/pidprovider.py +118 -0
- invenio_vocabularies/records/systemfields/__init__.py +16 -0
- invenio_vocabularies/records/systemfields/pid.py +125 -0
- invenio_vocabularies/records/systemfields/relations.py +51 -0
- invenio_vocabularies/resources/__init__.py +23 -0
- invenio_vocabularies/resources/config.py +105 -0
- invenio_vocabularies/resources/resource.py +156 -0
- invenio_vocabularies/resources/schema.py +21 -0
- invenio_vocabularies/resources/serializer.py +39 -0
- invenio_vocabularies/services/__init__.py +19 -0
- invenio_vocabularies/services/components.py +58 -0
- invenio_vocabularies/services/config.py +173 -0
- invenio_vocabularies/services/custom_fields/__init__.py +17 -0
- invenio_vocabularies/services/custom_fields/subject.py +82 -0
- invenio_vocabularies/services/custom_fields/vocabulary.py +96 -0
- invenio_vocabularies/services/facets.py +114 -0
- invenio_vocabularies/services/generators.py +38 -0
- invenio_vocabularies/services/permissions.py +30 -0
- invenio_vocabularies/services/querystr.py +57 -0
- invenio_vocabularies/services/results.py +110 -0
- invenio_vocabularies/services/schema.py +163 -0
- invenio_vocabularies/services/service.py +189 -0
- invenio_vocabularies/services/tasks.py +38 -0
- invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/subjects.html +23 -0
- invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabularies-list.html +12 -0
- invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabulary-details.html +71 -0
- invenio_vocabularies/translations/ar/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ar/LC_MESSAGES/messages.po +277 -0
- invenio_vocabularies/translations/bg/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/bg/LC_MESSAGES/messages.po +275 -0
- invenio_vocabularies/translations/ca/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ca/LC_MESSAGES/messages.po +276 -0
- invenio_vocabularies/translations/cs/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/cs/LC_MESSAGES/messages.po +281 -0
- invenio_vocabularies/translations/da/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/da/LC_MESSAGES/messages.po +271 -0
- invenio_vocabularies/translations/de/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/de/LC_MESSAGES/messages.po +293 -0
- invenio_vocabularies/translations/el/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/el/LC_MESSAGES/messages.po +275 -0
- invenio_vocabularies/translations/es/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/es/LC_MESSAGES/messages.po +281 -0
- invenio_vocabularies/translations/et/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/et/LC_MESSAGES/messages.po +276 -0
- invenio_vocabularies/translations/fa/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/fa/LC_MESSAGES/messages.po +275 -0
- invenio_vocabularies/translations/fr/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/fr/LC_MESSAGES/messages.po +279 -0
- invenio_vocabularies/translations/hr/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/hr/LC_MESSAGES/messages.po +275 -0
- invenio_vocabularies/translations/hu/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/hu/LC_MESSAGES/messages.po +280 -0
- invenio_vocabularies/translations/it/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/it/LC_MESSAGES/messages.po +277 -0
- invenio_vocabularies/translations/ja/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ja/LC_MESSAGES/messages.po +275 -0
- invenio_vocabularies/translations/ka/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ka/LC_MESSAGES/messages.po +275 -0
- invenio_vocabularies/translations/ko/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ko/LC_MESSAGES/messages.po +275 -0
- invenio_vocabularies/translations/lt/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/lt/LC_MESSAGES/messages.po +275 -0
- invenio_vocabularies/translations/messages.pot +270 -0
- invenio_vocabularies/translations/no/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/no/LC_MESSAGES/messages.po +275 -0
- invenio_vocabularies/translations/pl/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/pl/LC_MESSAGES/messages.po +275 -0
- invenio_vocabularies/translations/pt/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/pt/LC_MESSAGES/messages.po +275 -0
- invenio_vocabularies/translations/ro/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ro/LC_MESSAGES/messages.po +280 -0
- invenio_vocabularies/translations/ru/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ru/LC_MESSAGES/messages.po +276 -0
- invenio_vocabularies/translations/sk/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/sk/LC_MESSAGES/messages.po +276 -0
- invenio_vocabularies/translations/sv/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/sv/LC_MESSAGES/messages.po +280 -0
- invenio_vocabularies/translations/tr/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/tr/LC_MESSAGES/messages.po +277 -0
- invenio_vocabularies/translations/uk/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/uk/LC_MESSAGES/messages.po +275 -0
- invenio_vocabularies/translations/zh_CN/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/zh_CN/LC_MESSAGES/messages.po +276 -0
- invenio_vocabularies/translations/zh_TW/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/zh_TW/LC_MESSAGES/messages.po +275 -0
- invenio_vocabularies/views.py +53 -0
- invenio_vocabularies/webpack.py +51 -0
- invenio_vocabularies-9.1.2.dist-info/METADATA +517 -0
- invenio_vocabularies-9.1.2.dist-info/RECORD +337 -0
- invenio_vocabularies-9.1.2.dist-info/WHEEL +6 -0
- invenio_vocabularies-9.1.2.dist-info/entry_points.txt +73 -0
- invenio_vocabularies-9.1.2.dist-info/licenses/AUTHORS.rst +13 -0
- invenio_vocabularies-9.1.2.dist-info/licenses/LICENSE +21 -0
- invenio_vocabularies-9.1.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,483 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
#
|
|
3
|
+
# Copyright (C) 2021-2025 CERN.
|
|
4
|
+
#
|
|
5
|
+
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
|
+
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
7
|
+
# details.
|
|
8
|
+
|
|
9
|
+
"""Names datastreams, transformers, writers and readers."""
|
|
10
|
+
|
|
11
|
+
import csv
|
|
12
|
+
import io
|
|
13
|
+
import tarfile
|
|
14
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
15
|
+
from contextvars import copy_context
|
|
16
|
+
from datetime import datetime, timedelta
|
|
17
|
+
from itertools import islice
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
import regex as re
|
|
21
|
+
from flask import current_app
|
|
22
|
+
from invenio_access.permissions import system_identity
|
|
23
|
+
from werkzeug.utils import cached_property
|
|
24
|
+
|
|
25
|
+
from invenio_vocabularies.contrib.names.s3client import S3OrcidClient
|
|
26
|
+
|
|
27
|
+
from ...datastreams.errors import TransformerError
|
|
28
|
+
from ...datastreams.readers import BaseReader, SimpleHTTPReader
|
|
29
|
+
from ...datastreams.transformers import BaseTransformer
|
|
30
|
+
from ...datastreams.writers import ServiceWriter
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class OrcidDataSyncReader(BaseReader):
|
|
34
|
+
"""ORCiD Data Sync Reader."""
|
|
35
|
+
|
|
36
|
+
def __init__(self, origin=None, mode="r", since=None, *args, **kwargs):
|
|
37
|
+
"""Constructor.
|
|
38
|
+
|
|
39
|
+
:param origin: Data source (e.g. filepath).
|
|
40
|
+
Can be none in case of piped readers.
|
|
41
|
+
"""
|
|
42
|
+
super().__init__(origin=origin, mode=mode, *args, **kwargs)
|
|
43
|
+
self.s3_client = S3OrcidClient()
|
|
44
|
+
self.since = since
|
|
45
|
+
|
|
46
|
+
def _fetch_orcid_data(self, app, orcid_to_sync, bucket):
|
|
47
|
+
"""Fetches a single ORCiD record from S3."""
|
|
48
|
+
# The ORCiD file key is located in a folder which name corresponds to the last three digits of the ORCiD
|
|
49
|
+
suffix = orcid_to_sync[-3:]
|
|
50
|
+
key = f"{suffix}/{orcid_to_sync}.xml"
|
|
51
|
+
app.logger.debug(f"Fetching ORCiD record: {key} from bucket: {bucket}")
|
|
52
|
+
try:
|
|
53
|
+
# Potential improvement: use the a XML jax parser to avoid loading the whole file in memory
|
|
54
|
+
# and choose the sections we need to read (probably the summary)
|
|
55
|
+
return self.s3_client.read_file(f"s3://{bucket}/{key}")
|
|
56
|
+
except Exception:
|
|
57
|
+
app.logger.exception(f"Failed to fetch ORCiD record: {key}")
|
|
58
|
+
|
|
59
|
+
def _process_lambda_file(self, fileobj):
|
|
60
|
+
"""Process the ORCiD lambda file and returns a list of ORCiDs to sync.
|
|
61
|
+
|
|
62
|
+
The decoded fileobj looks like the following:
|
|
63
|
+
orcid, path, date_created, last_modified
|
|
64
|
+
0000-0001-5109-3700, http://orcid.org/0000-0001-5109-3700, 2014-08-02 15:00:00.000,2021-08-02 15:00:00.000
|
|
65
|
+
|
|
66
|
+
Yield ORCiDs to sync until the last sync date is reached.
|
|
67
|
+
"""
|
|
68
|
+
date_format = "%Y-%m-%d %H:%M:%S.%f"
|
|
69
|
+
date_format_no_millis = "%Y-%m-%d %H:%M:%S"
|
|
70
|
+
|
|
71
|
+
if self.since:
|
|
72
|
+
last_sync = datetime.strptime(self.since, date_format)
|
|
73
|
+
else:
|
|
74
|
+
last_sync = datetime.now() - timedelta(
|
|
75
|
+
**current_app.config["VOCABULARIES_ORCID_SYNC_SINCE"]
|
|
76
|
+
)
|
|
77
|
+
try:
|
|
78
|
+
content = io.TextIOWrapper(fileobj, encoding="utf-8")
|
|
79
|
+
csv_reader = csv.DictReader(content)
|
|
80
|
+
|
|
81
|
+
for row in csv_reader: # Skip the header line
|
|
82
|
+
orcid = row["orcid"]
|
|
83
|
+
|
|
84
|
+
# Lambda file is ordered by last modified date
|
|
85
|
+
last_modified_str = row["last_modified"]
|
|
86
|
+
try:
|
|
87
|
+
last_modified_date = datetime.strptime(
|
|
88
|
+
last_modified_str, date_format
|
|
89
|
+
)
|
|
90
|
+
except Exception:
|
|
91
|
+
last_modified_date = datetime.strptime(
|
|
92
|
+
last_modified_str, date_format_no_millis
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
if last_modified_date < last_sync:
|
|
96
|
+
current_app.logger.debug(
|
|
97
|
+
f"Skipping ORCiD {orcid}: last modified {last_modified_date} is older than cutoff {last_sync}"
|
|
98
|
+
)
|
|
99
|
+
current_app.logger.info(
|
|
100
|
+
"Reached cutoff date. No more recent records to process."
|
|
101
|
+
)
|
|
102
|
+
break
|
|
103
|
+
current_app.logger.debug(f"Yielding ORCiD {orcid} for sync.")
|
|
104
|
+
yield orcid
|
|
105
|
+
finally:
|
|
106
|
+
fileobj.close()
|
|
107
|
+
|
|
108
|
+
def _iter(self, orcids):
|
|
109
|
+
"""Iterates over the ORCiD records yielding each one."""
|
|
110
|
+
with ThreadPoolExecutor(
|
|
111
|
+
max_workers=current_app.config["VOCABULARIES_ORCID_SYNC_MAX_WORKERS"]
|
|
112
|
+
) as executor:
|
|
113
|
+
app = current_app._get_current_object()
|
|
114
|
+
# futures is a dictionary where the key is the ORCID value and the item is the Future object
|
|
115
|
+
# Flask does not propagate app/request context to new threads, so `copy_context().run`
|
|
116
|
+
# ensures the current instantianted contextvars (such as job_context) is preserved in each thread.
|
|
117
|
+
futures = {
|
|
118
|
+
orcid: executor.submit(
|
|
119
|
+
copy_context().run, # Required to pass the context to the thread
|
|
120
|
+
self._fetch_orcid_data,
|
|
121
|
+
app, # Pass the Flask app to the thread
|
|
122
|
+
orcid,
|
|
123
|
+
current_app.config["VOCABULARIES_ORCID_SUMMARIES_BUCKET"],
|
|
124
|
+
)
|
|
125
|
+
for orcid in orcids
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
for orcid in list(futures.keys()):
|
|
129
|
+
try:
|
|
130
|
+
result = futures[orcid].result()
|
|
131
|
+
if result:
|
|
132
|
+
current_app.logger.debug(
|
|
133
|
+
f"Successfully fetched ORCiD record: {orcid}"
|
|
134
|
+
)
|
|
135
|
+
yield result
|
|
136
|
+
except Exception:
|
|
137
|
+
current_app.logger.exception(
|
|
138
|
+
f"Error processing ORCiD record: {orcid}"
|
|
139
|
+
)
|
|
140
|
+
finally:
|
|
141
|
+
# Explicitly release memory, as we don't need the future anymore.
|
|
142
|
+
# This is mostly required because as long as we keep a reference to the future
|
|
143
|
+
# (in the above futures dict), the garbage collector won't collect it
|
|
144
|
+
# and it will keep the memory allocated.
|
|
145
|
+
del futures[orcid]
|
|
146
|
+
|
|
147
|
+
def read(self, item=None, *args, **kwargs):
|
|
148
|
+
"""Streams the ORCiD lambda file, process it to get the ORCiDS to sync and yields it's data."""
|
|
149
|
+
# Read the file from S3
|
|
150
|
+
tar_content = self.s3_client.read_file(
|
|
151
|
+
"s3://orcid-lambda-file/last_modified.csv.tar"
|
|
152
|
+
)
|
|
153
|
+
current_app.logger.info("Fetching ORCiD lambda file")
|
|
154
|
+
# Opens tar file and process it
|
|
155
|
+
with tarfile.open(fileobj=io.BytesIO(tar_content)) as tar:
|
|
156
|
+
# Iterate over each member (file or directory) in the tar file
|
|
157
|
+
for member in tar.getmembers():
|
|
158
|
+
# Extract the file
|
|
159
|
+
extracted_file = tar.extractfile(member)
|
|
160
|
+
if extracted_file:
|
|
161
|
+
current_app.logger.info(f"Processing lambda file: {member.name}")
|
|
162
|
+
# Process the file and get the ORCiDs to sync
|
|
163
|
+
orcids_to_sync = set(self._process_lambda_file(extracted_file))
|
|
164
|
+
|
|
165
|
+
# Close the file explicitly after processing
|
|
166
|
+
extracted_file.close()
|
|
167
|
+
|
|
168
|
+
# Process ORCIDs in smaller batches
|
|
169
|
+
for orcid_batch in self._chunked_iter(
|
|
170
|
+
orcids_to_sync, batch_size=100
|
|
171
|
+
):
|
|
172
|
+
yield from self._iter(orcid_batch)
|
|
173
|
+
|
|
174
|
+
def _chunked_iter(self, iterable, batch_size):
|
|
175
|
+
"""Yield successive chunks of a given size."""
|
|
176
|
+
it = iter(iterable)
|
|
177
|
+
while chunk := list(islice(it, batch_size)):
|
|
178
|
+
current_app.logger.debug(f"Processing batch of size {len(chunk)}.")
|
|
179
|
+
yield chunk
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class OrcidHTTPReader(SimpleHTTPReader):
|
|
183
|
+
"""ORCiD HTTP Reader."""
|
|
184
|
+
|
|
185
|
+
def __init__(self, *args, test_mode=True, **kwargs):
|
|
186
|
+
"""Constructor."""
|
|
187
|
+
if test_mode:
|
|
188
|
+
origin = "https://sandbox.orcid.org/{id}"
|
|
189
|
+
else:
|
|
190
|
+
origin = "https://orcid.org/{id}"
|
|
191
|
+
|
|
192
|
+
super().__init__(origin, *args, **kwargs)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
DEFAULT_NAMES_EXCLUDE_REGEX = r"[\p{P}\p{S}\p{Nd}\p{No}\p{Emoji}--,.()\-']"
|
|
196
|
+
"""Regex to filter out names with punctuation, symbols, numbers and emojis."""
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class OrcidOrgToAffiliationMapper:
|
|
200
|
+
"""Default ORCiD Org ID to affiliation ID mapper."""
|
|
201
|
+
|
|
202
|
+
def __init__(self, org_ids_mapping=None, org_ids_mapping_file=None):
|
|
203
|
+
"""Constructor."""
|
|
204
|
+
self._org_ids_mapping = org_ids_mapping
|
|
205
|
+
self._org_ids_mapping_file = org_ids_mapping_file
|
|
206
|
+
|
|
207
|
+
@cached_property
|
|
208
|
+
def org_ids_mapping(self):
|
|
209
|
+
"""Mapping of ORCiD org IDs to affiliation IDs."""
|
|
210
|
+
org_ids_mapping_file = self._org_ids_mapping_file or current_app.config.get(
|
|
211
|
+
"VOCABULARIES_ORCID_ORG_IDS_MAPPING_PATH"
|
|
212
|
+
)
|
|
213
|
+
if org_ids_mapping_file:
|
|
214
|
+
org_ids_mapping_file = Path(org_ids_mapping_file)
|
|
215
|
+
# If the path is relative, prepend the instance path
|
|
216
|
+
if not org_ids_mapping_file.is_absolute():
|
|
217
|
+
org_ids_mapping_file = (
|
|
218
|
+
Path(current_app.instance_path) / org_ids_mapping_file
|
|
219
|
+
)
|
|
220
|
+
with open(org_ids_mapping_file) as fin:
|
|
221
|
+
result = {}
|
|
222
|
+
reader = csv.reader(fin)
|
|
223
|
+
|
|
224
|
+
# Check if the first row is a header
|
|
225
|
+
org_scheme, org_id, aff_id = next(reader)
|
|
226
|
+
if org_scheme.lower() != "org_scheme":
|
|
227
|
+
result[(org_scheme, org_id)] = aff_id
|
|
228
|
+
|
|
229
|
+
for org_scheme, org_id, aff_id in reader:
|
|
230
|
+
result[(org_scheme, org_id)] = aff_id
|
|
231
|
+
|
|
232
|
+
return result
|
|
233
|
+
|
|
234
|
+
return self._org_ids_mapping or {}
|
|
235
|
+
|
|
236
|
+
def __call__(self, org_scheme, org_id):
|
|
237
|
+
"""Map an ORCiD org ID to an affiliation ID."""
|
|
238
|
+
# By default we know that ROR IDs are linkable
|
|
239
|
+
if org_scheme == "ROR":
|
|
240
|
+
return org_id.split("/")[-1]
|
|
241
|
+
# Otherwise see if we have a mapping from other schemes to an affiliation ID
|
|
242
|
+
return self.org_ids_mapping.get((org_scheme, org_id))
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
class OrcidTransformer(BaseTransformer):
|
|
246
|
+
"""Transforms an ORCiD record into a names record."""
|
|
247
|
+
|
|
248
|
+
def __init__(
|
|
249
|
+
self,
|
|
250
|
+
*args,
|
|
251
|
+
names_exclude_regex=DEFAULT_NAMES_EXCLUDE_REGEX,
|
|
252
|
+
org_id_to_affiliation_id_func=None,
|
|
253
|
+
**kwargs,
|
|
254
|
+
) -> None:
|
|
255
|
+
"""Constructor."""
|
|
256
|
+
self._names_exclude_regex = names_exclude_regex
|
|
257
|
+
self._org_id_to_affiliation_id_func = (
|
|
258
|
+
org_id_to_affiliation_id_func or OrcidOrgToAffiliationMapper()
|
|
259
|
+
)
|
|
260
|
+
super().__init__()
|
|
261
|
+
|
|
262
|
+
def org_id_to_affiliation_id(self, org_scheme, org_id):
|
|
263
|
+
"""Convert and ORCiD org ID to a linkable affiliation ID."""
|
|
264
|
+
return self._org_id_to_affiliation_id_func(org_scheme, org_id)
|
|
265
|
+
|
|
266
|
+
def apply(self, stream_entry, **kwargs):
|
|
267
|
+
"""Applies the transformation to the stream entry."""
|
|
268
|
+
current_app.logger.debug("Applying transformation to stream entry.")
|
|
269
|
+
record = stream_entry.entry
|
|
270
|
+
person = record["person"]
|
|
271
|
+
orcid_id = record["orcid-identifier"]["path"]
|
|
272
|
+
errors = []
|
|
273
|
+
|
|
274
|
+
name = person.get("name", None)
|
|
275
|
+
family_name = name.get("family-name", None) if name else None
|
|
276
|
+
given_names = name.get("given-names", None) if name else None
|
|
277
|
+
|
|
278
|
+
if name is None:
|
|
279
|
+
errors.append(
|
|
280
|
+
TransformerError(
|
|
281
|
+
f"Name not found in ORCiD entry for ORCiD ID: {orcid_id}."
|
|
282
|
+
)
|
|
283
|
+
)
|
|
284
|
+
if family_name is None:
|
|
285
|
+
errors.append(
|
|
286
|
+
TransformerError(
|
|
287
|
+
f"Family name not found in ORCiD entry for ORCiD ID: {orcid_id}."
|
|
288
|
+
)
|
|
289
|
+
)
|
|
290
|
+
full_name = " ".join(
|
|
291
|
+
p.strip()
|
|
292
|
+
for p in (given_names, family_name)
|
|
293
|
+
if isinstance(p, str) and p.strip()
|
|
294
|
+
)
|
|
295
|
+
if not self._is_valid_name(full_name):
|
|
296
|
+
errors.append(
|
|
297
|
+
TransformerError(
|
|
298
|
+
f"Invalid characters in name for ORCiD ID: {orcid_id}."
|
|
299
|
+
)
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
if errors:
|
|
303
|
+
all_errors = "\n".join(str(e) for e in errors)
|
|
304
|
+
error_message = (
|
|
305
|
+
f"ORCiD entry with ORCiD ID: #{orcid_id} failed transformation. "
|
|
306
|
+
f"See errors:\n{all_errors}"
|
|
307
|
+
)
|
|
308
|
+
stream_entry.errors.append(error_message)
|
|
309
|
+
|
|
310
|
+
entry = {
|
|
311
|
+
"id": orcid_id,
|
|
312
|
+
"given_name": given_names,
|
|
313
|
+
"family_name": family_name,
|
|
314
|
+
"identifiers": [{"scheme": "orcid", "identifier": orcid_id}],
|
|
315
|
+
"affiliations": self._extract_affiliations(record),
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
stream_entry.entry = entry
|
|
319
|
+
current_app.logger.debug(f"Transformed entry: {entry}")
|
|
320
|
+
return stream_entry
|
|
321
|
+
|
|
322
|
+
def _is_valid_name(self, name):
|
|
323
|
+
"""Check whether the name passes the regex."""
|
|
324
|
+
if not name:
|
|
325
|
+
return False
|
|
326
|
+
if not self._names_exclude_regex:
|
|
327
|
+
return True
|
|
328
|
+
return not bool(re.search(self._names_exclude_regex, name, re.UNICODE | re.V1))
|
|
329
|
+
|
|
330
|
+
def _extract_affiliations(self, record):
|
|
331
|
+
"""Extract affiliations from the ORCiD record."""
|
|
332
|
+
current_app.logger.debug("Extracting affiliations from ORCiD record.")
|
|
333
|
+
result = []
|
|
334
|
+
try:
|
|
335
|
+
employments = (
|
|
336
|
+
record.get("activities-summary", {})
|
|
337
|
+
.get("employments", {})
|
|
338
|
+
.get("affiliation-group", [])
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
# If there are single values, the XML to dict, doesn't wrap them in a list
|
|
342
|
+
if isinstance(employments, dict):
|
|
343
|
+
employments = [employments]
|
|
344
|
+
|
|
345
|
+
# Remove the "employment-summary" nesting
|
|
346
|
+
employments = [
|
|
347
|
+
employment.get("employment-summary", {}) for employment in employments
|
|
348
|
+
]
|
|
349
|
+
|
|
350
|
+
for employment in employments:
|
|
351
|
+
terminated = employment.get("end-date")
|
|
352
|
+
if terminated:
|
|
353
|
+
continue
|
|
354
|
+
|
|
355
|
+
org = employment["organization"]
|
|
356
|
+
aff_id = self._extract_affiliation_id(org)
|
|
357
|
+
|
|
358
|
+
# Skip adding if the ID already exists in result
|
|
359
|
+
if aff_id and any(aff.get("id") == aff_id for aff in result):
|
|
360
|
+
continue
|
|
361
|
+
|
|
362
|
+
# Skip adding if the name exists in result with no ID
|
|
363
|
+
if any(
|
|
364
|
+
aff.get("name") == org["name"] and "id" not in aff for aff in result
|
|
365
|
+
):
|
|
366
|
+
continue
|
|
367
|
+
|
|
368
|
+
aff = {"name": org["name"]}
|
|
369
|
+
if aff_id:
|
|
370
|
+
aff["id"] = aff_id
|
|
371
|
+
|
|
372
|
+
result.append(aff)
|
|
373
|
+
except Exception:
|
|
374
|
+
current_app.logger.error("Error extracting affiliations.")
|
|
375
|
+
return result
|
|
376
|
+
|
|
377
|
+
def _extract_affiliation_id(self, org):
|
|
378
|
+
"""Extract the affiliation ID from an ORCiD organization."""
|
|
379
|
+
dis_org = org.get("disambiguated-organization")
|
|
380
|
+
if not dis_org:
|
|
381
|
+
return
|
|
382
|
+
|
|
383
|
+
aff_id = None
|
|
384
|
+
org_id = dis_org.get("disambiguated-organization-identifier")
|
|
385
|
+
org_scheme = dis_org.get("disambiguation-source")
|
|
386
|
+
if org_id and org_scheme:
|
|
387
|
+
aff_id = self.org_id_to_affiliation_id(org_scheme, org_id)
|
|
388
|
+
return aff_id
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
class NamesServiceWriter(ServiceWriter):
|
|
392
|
+
"""Names service writer."""
|
|
393
|
+
|
|
394
|
+
def __init__(self, *args, **kwargs):
|
|
395
|
+
"""Constructor."""
|
|
396
|
+
service_or_name = kwargs.pop("service_or_name", "names")
|
|
397
|
+
super().__init__(service_or_name=service_or_name, *args, **kwargs)
|
|
398
|
+
|
|
399
|
+
def _entry_id(self, entry):
|
|
400
|
+
"""Get the id from an entry."""
|
|
401
|
+
return entry["id"]
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
VOCABULARIES_DATASTREAM_READERS = {
|
|
405
|
+
"orcid-http": OrcidHTTPReader,
|
|
406
|
+
"orcid-data-sync": OrcidDataSyncReader,
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
VOCABULARIES_DATASTREAM_TRANSFORMERS = {
|
|
411
|
+
"orcid": OrcidTransformer,
|
|
412
|
+
}
|
|
413
|
+
"""ORCiD Data Streams transformers."""
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
VOCABULARIES_DATASTREAM_WRITERS = {
|
|
417
|
+
"names-service": NamesServiceWriter,
|
|
418
|
+
}
|
|
419
|
+
"""ORCiD Data Streams transformers."""
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
DATASTREAM_CONFIG = {
|
|
423
|
+
"readers": [
|
|
424
|
+
{
|
|
425
|
+
"type": "tar",
|
|
426
|
+
"args": {
|
|
427
|
+
"regex": "\\.xml$",
|
|
428
|
+
},
|
|
429
|
+
},
|
|
430
|
+
{
|
|
431
|
+
"type": "xml",
|
|
432
|
+
"args": {
|
|
433
|
+
"root_element": "record",
|
|
434
|
+
},
|
|
435
|
+
},
|
|
436
|
+
],
|
|
437
|
+
"transformers": [{"type": "orcid"}],
|
|
438
|
+
"writers": [
|
|
439
|
+
{
|
|
440
|
+
"type": "names-service",
|
|
441
|
+
"args": {
|
|
442
|
+
"identity": system_identity,
|
|
443
|
+
},
|
|
444
|
+
}
|
|
445
|
+
],
|
|
446
|
+
}
|
|
447
|
+
"""ORCiD Data Stream configuration.
|
|
448
|
+
|
|
449
|
+
An origin is required for the reader.
|
|
450
|
+
"""
|
|
451
|
+
|
|
452
|
+
# TODO: Used on the jobs and should be set as a "PRESET" (naming to be defined)
|
|
453
|
+
ORCID_PRESET_DATASTREAM_CONFIG = {
|
|
454
|
+
"readers": [
|
|
455
|
+
{
|
|
456
|
+
"type": "orcid-data-sync",
|
|
457
|
+
},
|
|
458
|
+
{
|
|
459
|
+
"type": "xml",
|
|
460
|
+
"args": {
|
|
461
|
+
"root_element": "record",
|
|
462
|
+
},
|
|
463
|
+
},
|
|
464
|
+
],
|
|
465
|
+
"transformers": [{"type": "orcid"}],
|
|
466
|
+
"writers": [
|
|
467
|
+
{
|
|
468
|
+
"type": "async",
|
|
469
|
+
"args": {
|
|
470
|
+
"writer": {
|
|
471
|
+
"type": "names-service",
|
|
472
|
+
"args": {"update": True},
|
|
473
|
+
}
|
|
474
|
+
},
|
|
475
|
+
}
|
|
476
|
+
],
|
|
477
|
+
"batch_size": 1000,
|
|
478
|
+
"write_many": True,
|
|
479
|
+
}
|
|
480
|
+
"""ORCiD Data Stream configuration.
|
|
481
|
+
|
|
482
|
+
An origin is required for the reader.
|
|
483
|
+
"""
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"$id": "local://names/name-v1.0.0.json",
|
|
4
|
+
"description": "Names vocabulary.",
|
|
5
|
+
"type": "object",
|
|
6
|
+
"additionalProperties": false,
|
|
7
|
+
"properties": {
|
|
8
|
+
"$schema": {
|
|
9
|
+
"$ref": "local://definitions-v1.0.0.json#/$schema"
|
|
10
|
+
},
|
|
11
|
+
"tags": {
|
|
12
|
+
"$ref": "local://vocabularies/definitions-v1.0.0.json#/tags"
|
|
13
|
+
},
|
|
14
|
+
"scheme": {
|
|
15
|
+
"description": "Identifier of the name scheme.",
|
|
16
|
+
"$ref": "local://definitions-v1.0.0.json#/identifier"
|
|
17
|
+
},
|
|
18
|
+
"name": {
|
|
19
|
+
"type": "string"
|
|
20
|
+
},
|
|
21
|
+
"given_name": {
|
|
22
|
+
"type": "string"
|
|
23
|
+
},
|
|
24
|
+
"family_name": {
|
|
25
|
+
"type": "string"
|
|
26
|
+
},
|
|
27
|
+
"identifiers": {
|
|
28
|
+
"description": "Identifiers for the person.",
|
|
29
|
+
"type": "array",
|
|
30
|
+
"items": {
|
|
31
|
+
"$ref": "local://definitions-v1.0.0.json#/identifiers_with_scheme"
|
|
32
|
+
},
|
|
33
|
+
"uniqueItems": true
|
|
34
|
+
},
|
|
35
|
+
"props": {
|
|
36
|
+
"type": "object",
|
|
37
|
+
"patternProperties": {
|
|
38
|
+
"^.*$": {
|
|
39
|
+
"oneOf": [
|
|
40
|
+
{
|
|
41
|
+
"type": "string"
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
"type": "boolean"
|
|
45
|
+
}
|
|
46
|
+
]
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
},
|
|
50
|
+
"affiliations": {
|
|
51
|
+
"description": "Affiliations of the person.",
|
|
52
|
+
"type": "array",
|
|
53
|
+
"items": {
|
|
54
|
+
"type": "object",
|
|
55
|
+
"additionalProperties": false,
|
|
56
|
+
"properties": {
|
|
57
|
+
"id": {
|
|
58
|
+
"$ref": "local://definitions-v1.0.0.json#/identifier"
|
|
59
|
+
},
|
|
60
|
+
"name": {
|
|
61
|
+
"type": "string"
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
},
|
|
65
|
+
"uniqueItems": true
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
{
|
|
2
|
+
"mappings": {
|
|
3
|
+
"dynamic": "strict",
|
|
4
|
+
"properties": {
|
|
5
|
+
"$schema": {
|
|
6
|
+
"type": "keyword",
|
|
7
|
+
"index": "false"
|
|
8
|
+
},
|
|
9
|
+
"created": {
|
|
10
|
+
"type": "date"
|
|
11
|
+
},
|
|
12
|
+
"updated": {
|
|
13
|
+
"type": "date"
|
|
14
|
+
},
|
|
15
|
+
"indexed_at": {
|
|
16
|
+
"type": "date"
|
|
17
|
+
},
|
|
18
|
+
"uuid": {
|
|
19
|
+
"type": "keyword"
|
|
20
|
+
},
|
|
21
|
+
"version_id": {
|
|
22
|
+
"type": "integer"
|
|
23
|
+
},
|
|
24
|
+
"id": {
|
|
25
|
+
"type": "keyword"
|
|
26
|
+
},
|
|
27
|
+
"tags": {
|
|
28
|
+
"type": "keyword"
|
|
29
|
+
},
|
|
30
|
+
"name_sort": {
|
|
31
|
+
"type": "keyword"
|
|
32
|
+
},
|
|
33
|
+
"name": {
|
|
34
|
+
"type": "text",
|
|
35
|
+
"copy_to": "name_sort",
|
|
36
|
+
"fields": {
|
|
37
|
+
"suggest": {
|
|
38
|
+
"type": "search_as_you_type"
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
},
|
|
42
|
+
"given_name": {
|
|
43
|
+
"type": "text"
|
|
44
|
+
},
|
|
45
|
+
"family_name": {
|
|
46
|
+
"type": "text"
|
|
47
|
+
},
|
|
48
|
+
"identifiers": {
|
|
49
|
+
"properties": {
|
|
50
|
+
"identifier": {
|
|
51
|
+
"type": "keyword",
|
|
52
|
+
"fields": {
|
|
53
|
+
"suggest": {
|
|
54
|
+
"type": "search_as_you_type"
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
},
|
|
58
|
+
"scheme": {
|
|
59
|
+
"type": "keyword"
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
},
|
|
63
|
+
"affiliations": {
|
|
64
|
+
"type": "object",
|
|
65
|
+
"properties": {
|
|
66
|
+
"@v": {
|
|
67
|
+
"type": "keyword"
|
|
68
|
+
},
|
|
69
|
+
"id": {
|
|
70
|
+
"type": "keyword"
|
|
71
|
+
},
|
|
72
|
+
"name": {
|
|
73
|
+
"type": "keyword",
|
|
74
|
+
"fields": {
|
|
75
|
+
"suggest": {
|
|
76
|
+
"type": "search_as_you_type"
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
},
|
|
82
|
+
"pid": {
|
|
83
|
+
"type": "object",
|
|
84
|
+
"properties": {
|
|
85
|
+
"pk": {
|
|
86
|
+
"type": "integer"
|
|
87
|
+
},
|
|
88
|
+
"pid_type": {
|
|
89
|
+
"type": "keyword"
|
|
90
|
+
},
|
|
91
|
+
"obj_type": {
|
|
92
|
+
"type": "keyword"
|
|
93
|
+
},
|
|
94
|
+
"status": {
|
|
95
|
+
"type": "keyword"
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|