invenio-vocabularies 1.2.0__py2.py3-none-any.whl → 6.3.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of invenio-vocabularies might be problematic. Click here for more details.
- invenio_vocabularies/__init__.py +2 -2
- invenio_vocabularies/administration/__init__.py +10 -0
- invenio_vocabularies/administration/views/__init__.py +10 -0
- invenio_vocabularies/administration/views/vocabularies.py +45 -0
- invenio_vocabularies/alembic/4a9a4fd235f8_create_vocabulary_schemes.py +4 -4
- invenio_vocabularies/alembic/4f365fced43f_create_vocabularies_tables.py +2 -2
- invenio_vocabularies/alembic/55a700f897b6_add_names_and_afiliations_pid_column.py +96 -0
- invenio_vocabularies/alembic/676dd587542d_create_funders_vocabulary_table.py +1 -1
- invenio_vocabularies/alembic/e1146238edd3_create_awards_table.py +1 -1
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/.eslintrc.yml +11 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/.prettierrc +1 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/index.js +7 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/package.json +25 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/AwardResults.js +95 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/CustomAwardForm.js +139 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FunderDropdown.js +87 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingField.js +223 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingField.test.js +1 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingFieldItem.js +152 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingModal.js +270 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/NoAwardResults.js +37 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/index.js +8 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/index.js +7 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/index.js +7 -0
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/index.js +7 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/i18next-scanner.config.js +63 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/i18next.js +36 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/index.js +1 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/package.json +53 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/scripts/compileCatalog.js +39 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/scripts/initCatalog.js +19 -0
- invenio_vocabularies/cli.py +31 -44
- invenio_vocabularies/config.py +74 -7
- invenio_vocabularies/contrib/affiliations/affiliations.py +22 -6
- invenio_vocabularies/contrib/affiliations/api.py +1 -2
- invenio_vocabularies/contrib/affiliations/config.py +10 -5
- invenio_vocabularies/contrib/affiliations/datastreams.py +186 -0
- invenio_vocabularies/contrib/affiliations/facets.py +36 -0
- invenio_vocabularies/contrib/affiliations/jsonschemas/affiliations/affiliation-v1.0.0.json +38 -7
- invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v1.0.0.json +22 -1
- invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v2.0.0.json +171 -0
- invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v1.0.0.json +22 -1
- invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v2.0.0.json +171 -0
- invenio_vocabularies/contrib/affiliations/mappings/v7/affiliations/affiliation-v1.0.0.json +22 -1
- invenio_vocabularies/contrib/affiliations/schema.py +23 -5
- invenio_vocabularies/contrib/affiliations/services.py +1 -2
- invenio_vocabularies/contrib/awards/awards.py +18 -6
- invenio_vocabularies/contrib/awards/config.py +1 -3
- invenio_vocabularies/contrib/awards/datastreams.py +246 -3
- invenio_vocabularies/contrib/awards/jsonschemas/awards/award-v1.0.0.json +41 -0
- invenio_vocabularies/contrib/awards/mappings/os-v1/awards/award-v1.0.0.json +53 -1
- invenio_vocabularies/contrib/awards/mappings/os-v2/awards/award-v1.0.0.json +53 -1
- invenio_vocabularies/contrib/awards/mappings/v7/awards/award-v1.0.0.json +53 -1
- invenio_vocabularies/contrib/awards/schema.py +27 -35
- invenio_vocabularies/contrib/awards/serializer.py +9 -1
- invenio_vocabularies/contrib/awards/services.py +1 -2
- invenio_vocabularies/contrib/common/__init__.py +9 -0
- invenio_vocabularies/contrib/common/openaire/__init__.py +9 -0
- invenio_vocabularies/contrib/common/openaire/datastreams.py +84 -0
- invenio_vocabularies/contrib/common/ror/__init__.py +9 -0
- invenio_vocabularies/contrib/common/ror/datastreams.py +220 -0
- invenio_vocabularies/contrib/funders/config.py +12 -5
- invenio_vocabularies/contrib/funders/datastreams.py +40 -62
- invenio_vocabularies/contrib/funders/facets.py +13 -5
- invenio_vocabularies/contrib/funders/funders.py +4 -2
- invenio_vocabularies/contrib/funders/jsonschemas/funders/funder-v1.0.0.json +36 -1
- invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v1.0.0.json +22 -1
- invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v2.0.0.json +156 -0
- invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v1.0.0.json +22 -1
- invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v2.0.0.json +156 -0
- invenio_vocabularies/contrib/funders/mappings/v7/funders/funder-v1.0.0.json +22 -1
- invenio_vocabularies/contrib/funders/schema.py +8 -0
- invenio_vocabularies/contrib/funders/serializer.py +2 -1
- invenio_vocabularies/contrib/names/config.py +5 -3
- invenio_vocabularies/contrib/names/datastreams.py +177 -38
- invenio_vocabularies/contrib/names/jsonschemas/names/name-v1.0.0.json +2 -6
- invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v2.0.0.json +150 -0
- invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v2.0.0.json +150 -0
- invenio_vocabularies/contrib/names/mappings/v7/names/name-v1.0.0.json +3 -0
- invenio_vocabularies/contrib/names/names.py +29 -13
- invenio_vocabularies/contrib/names/permissions.py +20 -0
- invenio_vocabularies/contrib/names/s3client.py +44 -0
- invenio_vocabularies/contrib/names/schema.py +31 -4
- invenio_vocabularies/contrib/subjects/config.py +9 -3
- invenio_vocabularies/contrib/subjects/datastreams.py +61 -0
- invenio_vocabularies/contrib/subjects/euroscivoc/__init__.py +9 -0
- invenio_vocabularies/contrib/subjects/euroscivoc/datastreams.py +171 -0
- invenio_vocabularies/contrib/subjects/jsonschemas/subjects/subject-v1.0.0.json +31 -0
- invenio_vocabularies/contrib/subjects/mappings/os-v1/subjects/subject-v1.0.0.json +35 -0
- invenio_vocabularies/contrib/subjects/mappings/os-v2/subjects/subject-v1.0.0.json +35 -0
- invenio_vocabularies/contrib/subjects/mappings/v7/subjects/subject-v1.0.0.json +35 -0
- invenio_vocabularies/contrib/subjects/mesh/__init__.py +9 -0
- invenio_vocabularies/contrib/subjects/mesh/datastreams.py +43 -0
- invenio_vocabularies/contrib/subjects/schema.py +47 -5
- invenio_vocabularies/contrib/subjects/subjects.py +10 -0
- invenio_vocabularies/datastreams/datastreams.py +61 -13
- invenio_vocabularies/datastreams/factories.py +1 -2
- invenio_vocabularies/datastreams/readers.py +138 -29
- invenio_vocabularies/datastreams/tasks.py +37 -0
- invenio_vocabularies/datastreams/transformers.py +17 -27
- invenio_vocabularies/datastreams/writers.py +116 -14
- invenio_vocabularies/datastreams/xml.py +34 -0
- invenio_vocabularies/ext.py +59 -5
- invenio_vocabularies/factories.py +137 -0
- invenio_vocabularies/jobs.py +133 -0
- invenio_vocabularies/proxies.py +2 -2
- invenio_vocabularies/records/jsonschemas/vocabularies/definitions-v1.0.0.json +7 -0
- invenio_vocabularies/records/jsonschemas/vocabularies/vocabulary-v1.0.0.json +1 -4
- invenio_vocabularies/records/mappings/os-v1/vocabularies/vocabulary-v1.0.0.json +3 -3
- invenio_vocabularies/records/mappings/os-v2/vocabularies/vocabulary-v1.0.0.json +3 -3
- invenio_vocabularies/records/mappings/v7/vocabularies/vocabulary-v1.0.0.json +3 -3
- invenio_vocabularies/records/models.py +8 -10
- invenio_vocabularies/records/pidprovider.py +1 -2
- invenio_vocabularies/records/systemfields/relations.py +2 -2
- invenio_vocabularies/resources/__init__.py +9 -1
- invenio_vocabularies/resources/config.py +105 -0
- invenio_vocabularies/resources/resource.py +31 -41
- invenio_vocabularies/resources/schema.py +2 -1
- invenio_vocabularies/services/__init__.py +5 -2
- invenio_vocabularies/services/config.py +179 -0
- invenio_vocabularies/services/custom_fields/__init__.py +6 -2
- invenio_vocabularies/services/custom_fields/subject.py +82 -0
- invenio_vocabularies/services/custom_fields/vocabulary.py +19 -9
- invenio_vocabularies/services/facets.py +67 -37
- invenio_vocabularies/services/permissions.py +3 -1
- invenio_vocabularies/services/results.py +110 -0
- invenio_vocabularies/services/schema.py +39 -2
- invenio_vocabularies/services/service.py +46 -94
- invenio_vocabularies/services/tasks.py +1 -1
- invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/subjects.html +23 -0
- invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabularies-list.html +12 -0
- invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabulary-details.html +71 -0
- invenio_vocabularies/translations/af/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/af/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/ar/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ar/LC_MESSAGES/messages.po +9 -8
- invenio_vocabularies/translations/bg/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/bg/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/ca/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ca/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/cs/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/cs/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/da/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/da/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/de/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/de/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/de_AT/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/de_AT/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/de_DE/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/de_DE/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/el/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/el/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/en/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/en_AT/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/en_AT/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/en_HU/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/en_HU/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/es/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/es/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/es_CU/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/es_CU/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/es_MX/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/es_MX/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/et/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/et/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/et_EE/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/et_EE/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/fa/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/fa/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/fa_IR/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/fa_IR/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/fr/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/fr/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/fr_CI/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/fr_CI/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/fr_FR/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/fr_FR/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/gl/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/gl/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/hi_IN/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/hi_IN/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/hr/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/hr/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/hu/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/hu/LC_MESSAGES/messages.po +4 -4
- invenio_vocabularies/translations/hu_HU/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/hu_HU/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/it/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/it/LC_MESSAGES/messages.po +4 -3
- invenio_vocabularies/translations/ja/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ja/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/ka/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ka/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/lt/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/lt/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/messages.pot +95 -48
- invenio_vocabularies/translations/ne/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ne/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/no/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/no/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/pl/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/pl/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/pt/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/pt/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/ro/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ro/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/ru/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ru/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/rw/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/rw/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/sk/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/sk/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/sv/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/sv/LC_MESSAGES/messages.po +4 -3
- invenio_vocabularies/translations/sv_SE/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/sv_SE/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/tr/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/tr/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/uk/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/uk/LC_MESSAGES/messages.po +17 -13
- invenio_vocabularies/translations/uk_UA/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/uk_UA/LC_MESSAGES/messages.po +139 -0
- invenio_vocabularies/translations/zh_CN/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/zh_CN/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/translations/zh_TW/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/zh_TW/LC_MESSAGES/messages.po +1 -1
- invenio_vocabularies/views.py +12 -26
- invenio_vocabularies/webpack.py +51 -0
- invenio_vocabularies-6.3.1.dist-info/METADATA +346 -0
- invenio_vocabularies-6.3.1.dist-info/RECORD +306 -0
- {invenio_vocabularies-1.2.0.dist-info → invenio_vocabularies-6.3.1.dist-info}/WHEEL +1 -1
- {invenio_vocabularies-1.2.0.dist-info → invenio_vocabularies-6.3.1.dist-info}/entry_points.txt +20 -0
- invenio_vocabularies-1.2.0.dist-info/METADATA +0 -133
- invenio_vocabularies-1.2.0.dist-info/RECORD +0 -220
- {invenio_vocabularies-1.2.0.dist-info → invenio_vocabularies-6.3.1.dist-info}/AUTHORS.rst +0 -0
- {invenio_vocabularies-1.2.0.dist-info → invenio_vocabularies-6.3.1.dist-info}/LICENSE +0 -0
- {invenio_vocabularies-1.2.0.dist-info → invenio_vocabularies-6.3.1.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
3
|
# Copyright (C) 2021-2022 CERN.
|
|
4
|
+
# Copyright (C) 2024 California Institute of Technology.
|
|
4
5
|
#
|
|
5
6
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
7
|
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
@@ -43,6 +44,8 @@ class FunderSchema(BaseVocabularySchema):
|
|
|
43
44
|
required=True, validate=validate.Length(min=1, error=_("Name cannot be blank."))
|
|
44
45
|
)
|
|
45
46
|
country = SanitizedUnicode()
|
|
47
|
+
country_name = SanitizedUnicode()
|
|
48
|
+
location_name = SanitizedUnicode()
|
|
46
49
|
identifiers = IdentifierSet(
|
|
47
50
|
fields.Nested(
|
|
48
51
|
partial(
|
|
@@ -57,6 +60,11 @@ class FunderSchema(BaseVocabularySchema):
|
|
|
57
60
|
validate=validate.Length(min=1, error=_("PID cannot be blank."))
|
|
58
61
|
)
|
|
59
62
|
|
|
63
|
+
acronym = SanitizedUnicode()
|
|
64
|
+
aliases = fields.List(SanitizedUnicode())
|
|
65
|
+
status = SanitizedUnicode()
|
|
66
|
+
types = fields.List(SanitizedUnicode())
|
|
67
|
+
|
|
60
68
|
@validates_schema
|
|
61
69
|
def validate_id(self, data, **kwargs):
|
|
62
70
|
"""Validates ID."""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
|
-
# Copyright (C) 2022 CERN.
|
|
3
|
+
# Copyright (C) 2022-2024 CERN.
|
|
4
4
|
#
|
|
5
5
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
6
|
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
@@ -29,4 +29,5 @@ class FunderL10NItemSchema(Schema):
|
|
|
29
29
|
props = fields.Dict(dump_only=True)
|
|
30
30
|
name = fields.String(dump_only=True)
|
|
31
31
|
country = fields.String(dump_only=True)
|
|
32
|
+
country_name = fields.String(dump_only=True)
|
|
32
33
|
identifiers = fields.List(fields.Nested(IdentifierSchema), dump_only=True)
|
|
@@ -28,12 +28,14 @@ class NamesSearchOptions(SearchOptions):
|
|
|
28
28
|
|
|
29
29
|
suggest_parser_cls = SuggestQueryParser.factory(
|
|
30
30
|
fields=[
|
|
31
|
-
"name^100",
|
|
32
|
-
"family_name^100",
|
|
33
31
|
"given_name^100",
|
|
32
|
+
"name^70",
|
|
33
|
+
"family_name^50",
|
|
34
34
|
"identifiers.identifier^20",
|
|
35
|
-
"affiliations.name^
|
|
35
|
+
"affiliations.name^20",
|
|
36
36
|
],
|
|
37
|
+
type="most_fields", # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html#multi-match-types
|
|
38
|
+
fuzziness="AUTO",
|
|
37
39
|
)
|
|
38
40
|
|
|
39
41
|
sort_default = "bestmatch"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
|
-
# Copyright (C) 2021-
|
|
3
|
+
# Copyright (C) 2021-2024 CERN.
|
|
4
4
|
#
|
|
5
5
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
6
|
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
@@ -8,18 +8,123 @@
|
|
|
8
8
|
|
|
9
9
|
"""Names datastreams, transformers, writers and readers."""
|
|
10
10
|
|
|
11
|
+
import csv
|
|
12
|
+
import io
|
|
13
|
+
import tarfile
|
|
14
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
15
|
+
from datetime import timedelta
|
|
16
|
+
|
|
17
|
+
import arrow
|
|
18
|
+
import regex as re
|
|
19
|
+
from flask import current_app
|
|
11
20
|
from invenio_access.permissions import system_identity
|
|
12
|
-
from invenio_pidstore.errors import PIDDoesNotExistError
|
|
13
21
|
from invenio_records.dictutils import dict_lookup
|
|
14
|
-
from marshmallow import ValidationError
|
|
15
22
|
|
|
16
|
-
from
|
|
17
|
-
|
|
18
|
-
from ...datastreams.
|
|
23
|
+
from invenio_vocabularies.contrib.names.s3client import S3OrcidClient
|
|
24
|
+
|
|
25
|
+
from ...datastreams.errors import TransformerError
|
|
26
|
+
from ...datastreams.readers import BaseReader, SimpleHTTPReader
|
|
19
27
|
from ...datastreams.transformers import BaseTransformer
|
|
20
28
|
from ...datastreams.writers import ServiceWriter
|
|
21
29
|
|
|
22
30
|
|
|
31
|
+
class OrcidDataSyncReader(BaseReader):
|
|
32
|
+
"""ORCiD Data Sync Reader."""
|
|
33
|
+
|
|
34
|
+
def __init__(self, origin=None, mode="r", since=None, *args, **kwargs):
|
|
35
|
+
"""Constructor.
|
|
36
|
+
|
|
37
|
+
:param origin: Data source (e.g. filepath).
|
|
38
|
+
Can be none in case of piped readers.
|
|
39
|
+
"""
|
|
40
|
+
super().__init__(origin=origin, mode=mode, *args, **kwargs)
|
|
41
|
+
self.s3_client = S3OrcidClient()
|
|
42
|
+
self.since = since
|
|
43
|
+
|
|
44
|
+
def _fetch_orcid_data(self, orcid_to_sync, bucket):
|
|
45
|
+
"""Fetches a single ORCiD record from S3."""
|
|
46
|
+
# The ORCiD file key is located in a folder which name corresponds to the last three digits of the ORCiD
|
|
47
|
+
suffix = orcid_to_sync[-3:]
|
|
48
|
+
key = f"{suffix}/{orcid_to_sync}.xml"
|
|
49
|
+
try:
|
|
50
|
+
return self.s3_client.read_file(f"s3://{bucket}/{key}")
|
|
51
|
+
except Exception as e:
|
|
52
|
+
# TODO: log
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
def _process_lambda_file(self, fileobj):
|
|
56
|
+
"""Process the ORCiD lambda file and returns a list of ORCiDs to sync.
|
|
57
|
+
|
|
58
|
+
The decoded fileobj looks like the following:
|
|
59
|
+
orcid, path, date_created, last_modified
|
|
60
|
+
0000-0001-5109-3700, http://orcid.org/0000-0001-5109-3700, 2014-08-02 15:00:00.000,2021-08-02 15:00:00.000
|
|
61
|
+
|
|
62
|
+
Yield ORCiDs to sync until the last sync date is reached.
|
|
63
|
+
"""
|
|
64
|
+
date_format = "YYYY-MM-DD HH:mm:ss.SSSSSS"
|
|
65
|
+
date_format_no_millis = "YYYY-MM-DD HH:mm:ss"
|
|
66
|
+
time_shift = current_app.config["VOCABULARIES_ORCID_SYNC_SINCE"]
|
|
67
|
+
if self.since:
|
|
68
|
+
time_shift = self.since
|
|
69
|
+
last_sync = arrow.now() - timedelta(**time_shift)
|
|
70
|
+
|
|
71
|
+
file_content = fileobj.read().decode("utf-8")
|
|
72
|
+
|
|
73
|
+
csv_reader = csv.DictReader(file_content.splitlines())
|
|
74
|
+
|
|
75
|
+
for row in csv_reader: # Skip the header line
|
|
76
|
+
orcid = row["orcid"]
|
|
77
|
+
|
|
78
|
+
# Lambda file is ordered by last modified date
|
|
79
|
+
last_modified_str = row["last_modified"]
|
|
80
|
+
try:
|
|
81
|
+
last_modified_date = arrow.get(last_modified_str, date_format)
|
|
82
|
+
except arrow.parser.ParserError:
|
|
83
|
+
last_modified_date = arrow.get(last_modified_str, date_format_no_millis)
|
|
84
|
+
|
|
85
|
+
if last_modified_date < last_sync:
|
|
86
|
+
break
|
|
87
|
+
yield orcid
|
|
88
|
+
|
|
89
|
+
def _iter(self, orcids):
|
|
90
|
+
"""Iterates over the ORCiD records yielding each one."""
|
|
91
|
+
with ThreadPoolExecutor(
|
|
92
|
+
max_workers=current_app.config["VOCABULARIES_ORCID_SYNC_MAX_WORKERS"]
|
|
93
|
+
) as executor:
|
|
94
|
+
futures = [
|
|
95
|
+
executor.submit(
|
|
96
|
+
self._fetch_orcid_data,
|
|
97
|
+
orcid,
|
|
98
|
+
current_app.config["VOCABULARIES_ORCID_SUMMARIES_BUCKET"],
|
|
99
|
+
)
|
|
100
|
+
for orcid in orcids
|
|
101
|
+
]
|
|
102
|
+
for future in as_completed(futures):
|
|
103
|
+
result = future.result()
|
|
104
|
+
if result is not None:
|
|
105
|
+
yield result
|
|
106
|
+
|
|
107
|
+
def read(self, item=None, *args, **kwargs):
|
|
108
|
+
"""Streams the ORCiD lambda file, process it to get the ORCiDS to sync and yields it's data."""
|
|
109
|
+
# Read the file from S3
|
|
110
|
+
tar_content = self.s3_client.read_file(
|
|
111
|
+
"s3://orcid-lambda-file/last_modified.csv.tar"
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
orcids_to_sync = []
|
|
115
|
+
# Opens tar file and process it
|
|
116
|
+
with tarfile.open(fileobj=io.BytesIO(tar_content)) as tar:
|
|
117
|
+
# Iterate over each member (file or directory) in the tar file
|
|
118
|
+
for member in tar.getmembers():
|
|
119
|
+
# Extract the file
|
|
120
|
+
extracted_file = tar.extractfile(member)
|
|
121
|
+
if extracted_file:
|
|
122
|
+
# Process the file and get the ORCiDs to sync
|
|
123
|
+
orcids_to_sync.extend(self._process_lambda_file(extracted_file))
|
|
124
|
+
|
|
125
|
+
yield from self._iter(orcids_to_sync)
|
|
126
|
+
|
|
127
|
+
|
|
23
128
|
class OrcidHTTPReader(SimpleHTTPReader):
|
|
24
129
|
"""ORCiD HTTP Reader."""
|
|
25
130
|
|
|
@@ -33,20 +138,43 @@ class OrcidHTTPReader(SimpleHTTPReader):
|
|
|
33
138
|
super().__init__(origin, *args, **kwargs)
|
|
34
139
|
|
|
35
140
|
|
|
141
|
+
DEFAULT_NAMES_EXCLUDE_REGEX = r"[\p{P}\p{S}\p{Nd}\p{No}\p{Emoji}--,.()\-']"
|
|
142
|
+
"""Regex to filter out names with punctuations, symbols, decimal numbers and emojis."""
|
|
143
|
+
|
|
144
|
+
|
|
36
145
|
class OrcidTransformer(BaseTransformer):
|
|
37
146
|
"""Transforms an ORCiD record into a names record."""
|
|
38
147
|
|
|
148
|
+
def __init__(
|
|
149
|
+
self, *args, names_exclude_regex=DEFAULT_NAMES_EXCLUDE_REGEX, **kwargs
|
|
150
|
+
) -> None:
|
|
151
|
+
"""Constructor."""
|
|
152
|
+
self._names_exclude_regex = names_exclude_regex
|
|
153
|
+
super().__init__()
|
|
154
|
+
|
|
155
|
+
def _is_valid_name(self, name):
|
|
156
|
+
"""Check whether the name passes the regex."""
|
|
157
|
+
if not self._names_exclude_regex:
|
|
158
|
+
return True
|
|
159
|
+
return not bool(re.search(self._names_exclude_regex, name, re.UNICODE | re.V1))
|
|
160
|
+
|
|
39
161
|
def apply(self, stream_entry, **kwargs):
|
|
40
162
|
"""Applies the transformation to the stream entry."""
|
|
41
163
|
record = stream_entry.entry
|
|
42
164
|
person = record["person"]
|
|
43
|
-
orcid_id = record["orcid-identifier"]["
|
|
165
|
+
orcid_id = record["orcid-identifier"]["path"]
|
|
44
166
|
|
|
45
167
|
name = person.get("name")
|
|
46
168
|
if name is None:
|
|
47
169
|
raise TransformerError(f"Name not found in ORCiD entry.")
|
|
170
|
+
if name.get("family-name") is None:
|
|
171
|
+
raise TransformerError(f"Family name not found in ORCiD entry.")
|
|
172
|
+
|
|
173
|
+
if not self._is_valid_name(name["given-names"] + name["family-name"]):
|
|
174
|
+
raise TransformerError(f"Invalid characters in name.")
|
|
48
175
|
|
|
49
176
|
entry = {
|
|
177
|
+
"id": orcid_id,
|
|
50
178
|
"given_name": name.get("given-names"),
|
|
51
179
|
"family_name": name.get("family-name"),
|
|
52
180
|
"identifiers": [{"scheme": "orcid", "identifier": orcid_id}],
|
|
@@ -79,45 +207,19 @@ class OrcidTransformer(BaseTransformer):
|
|
|
79
207
|
class NamesServiceWriter(ServiceWriter):
|
|
80
208
|
"""Names service writer."""
|
|
81
209
|
|
|
82
|
-
def __init__(self, *args,
|
|
210
|
+
def __init__(self, *args, **kwargs):
|
|
83
211
|
"""Constructor."""
|
|
84
212
|
service_or_name = kwargs.pop("service_or_name", "names")
|
|
85
213
|
super().__init__(service_or_name=service_or_name, *args, **kwargs)
|
|
86
|
-
self._scheme_id = scheme_id
|
|
87
214
|
|
|
88
215
|
def _entry_id(self, entry):
|
|
89
216
|
"""Get the id from an entry."""
|
|
90
|
-
|
|
91
|
-
if identifier.get("scheme") == self._scheme_id:
|
|
92
|
-
return identifier["identifier"]
|
|
93
|
-
|
|
94
|
-
def _resolve(self, id_):
|
|
95
|
-
"""Resolve an entry given an id."""
|
|
96
|
-
return self._service.resolve(self._identity, id_=id_, id_type=self._scheme_id)
|
|
97
|
-
|
|
98
|
-
def write(self, stream_entry, *args, **kwargs):
|
|
99
|
-
"""Writes the input entry using a given service."""
|
|
100
|
-
entry = stream_entry.entry
|
|
101
|
-
try:
|
|
102
|
-
vocab_id = self._entry_id(entry)
|
|
103
|
-
# it is resolved before creation to avoid duplicates since
|
|
104
|
-
# the pid is recidv2 not e.g. the orcid
|
|
105
|
-
current = self._resolve(vocab_id)
|
|
106
|
-
if not self._update:
|
|
107
|
-
raise WriterError([f"Vocabulary entry already exists: {entry}"])
|
|
108
|
-
updated = dict(current.to_dict(), **entry)
|
|
109
|
-
return StreamEntry(
|
|
110
|
-
self._service.update(self._identity, current.id, updated)
|
|
111
|
-
)
|
|
112
|
-
except PIDDoesNotExistError:
|
|
113
|
-
return StreamEntry(self._service.create(self._identity, entry))
|
|
114
|
-
|
|
115
|
-
except ValidationError as err:
|
|
116
|
-
raise WriterError([{"ValidationError": err.messages}])
|
|
217
|
+
return entry["id"]
|
|
117
218
|
|
|
118
219
|
|
|
119
220
|
VOCABULARIES_DATASTREAM_READERS = {
|
|
120
221
|
"orcid-http": OrcidHTTPReader,
|
|
222
|
+
"orcid-data-sync": OrcidDataSyncReader,
|
|
121
223
|
}
|
|
122
224
|
|
|
123
225
|
|
|
@@ -138,10 +240,15 @@ DATASTREAM_CONFIG = {
|
|
|
138
240
|
{
|
|
139
241
|
"type": "tar",
|
|
140
242
|
"args": {
|
|
141
|
-
"regex": "
|
|
243
|
+
"regex": "\\.xml$",
|
|
244
|
+
},
|
|
245
|
+
},
|
|
246
|
+
{
|
|
247
|
+
"type": "xml",
|
|
248
|
+
"args": {
|
|
249
|
+
"root_element": "record",
|
|
142
250
|
},
|
|
143
251
|
},
|
|
144
|
-
{"type": "xml"},
|
|
145
252
|
],
|
|
146
253
|
"transformers": [{"type": "orcid"}],
|
|
147
254
|
"writers": [
|
|
@@ -157,3 +264,35 @@ DATASTREAM_CONFIG = {
|
|
|
157
264
|
|
|
158
265
|
An origin is required for the reader.
|
|
159
266
|
"""
|
|
267
|
+
|
|
268
|
+
# TODO: Used on the jobs and should be set as a "PRESET" (naming to be defined)
|
|
269
|
+
ORCID_PRESET_DATASTREAM_CONFIG = {
|
|
270
|
+
"readers": [
|
|
271
|
+
{
|
|
272
|
+
"type": "orcid-data-sync",
|
|
273
|
+
},
|
|
274
|
+
{
|
|
275
|
+
"type": "xml",
|
|
276
|
+
"args": {
|
|
277
|
+
"root_element": "record",
|
|
278
|
+
},
|
|
279
|
+
},
|
|
280
|
+
],
|
|
281
|
+
"transformers": [{"type": "orcid"}],
|
|
282
|
+
"writers": [
|
|
283
|
+
{
|
|
284
|
+
"type": "async",
|
|
285
|
+
"args": {
|
|
286
|
+
"writer": {
|
|
287
|
+
"type": "names-service",
|
|
288
|
+
}
|
|
289
|
+
},
|
|
290
|
+
}
|
|
291
|
+
],
|
|
292
|
+
"batch_size": 1000,
|
|
293
|
+
"write_many": True,
|
|
294
|
+
}
|
|
295
|
+
"""ORCiD Data Stream configuration.
|
|
296
|
+
|
|
297
|
+
An origin is required for the reader.
|
|
298
|
+
"""
|
|
@@ -8,12 +8,8 @@
|
|
|
8
8
|
"$schema": {
|
|
9
9
|
"$ref": "local://definitions-v1.0.0.json#/$schema"
|
|
10
10
|
},
|
|
11
|
-
"
|
|
12
|
-
"
|
|
13
|
-
"$ref": "local://definitions-v1.0.0.json#/identifier"
|
|
14
|
-
},
|
|
15
|
-
"pid": {
|
|
16
|
-
"$ref": "local://definitions-v1.0.0.json#/internal-pid"
|
|
11
|
+
"tags": {
|
|
12
|
+
"$ref": "local://vocabularies/definitions-v1.0.0.json#/tags"
|
|
17
13
|
},
|
|
18
14
|
"scheme": {
|
|
19
15
|
"description": "Identifier of the name scheme.",
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
{
|
|
2
|
+
"settings": {
|
|
3
|
+
"analysis": {
|
|
4
|
+
"char_filter": {
|
|
5
|
+
"strip_special_chars": {
|
|
6
|
+
"type": "pattern_replace",
|
|
7
|
+
"pattern": "[\\p{Punct}\\p{S}]",
|
|
8
|
+
"replacement": ""
|
|
9
|
+
}
|
|
10
|
+
},
|
|
11
|
+
"analyzer": {
|
|
12
|
+
"accent_edge_analyzer": {
|
|
13
|
+
"tokenizer": "standard",
|
|
14
|
+
"type": "custom",
|
|
15
|
+
"char_filter": ["strip_special_chars"],
|
|
16
|
+
"filter": [
|
|
17
|
+
"lowercasepreserveoriginal",
|
|
18
|
+
"asciifoldingpreserveoriginal",
|
|
19
|
+
"edgegrams"
|
|
20
|
+
]
|
|
21
|
+
},
|
|
22
|
+
"accent_analyzer": {
|
|
23
|
+
"tokenizer": "standard",
|
|
24
|
+
"type": "custom",
|
|
25
|
+
"char_filter": ["strip_special_chars"],
|
|
26
|
+
"filter": [
|
|
27
|
+
"lowercasepreserveoriginal",
|
|
28
|
+
"asciifoldingpreserveoriginal"
|
|
29
|
+
]
|
|
30
|
+
}
|
|
31
|
+
},
|
|
32
|
+
"normalizer": {
|
|
33
|
+
"accent_normalizer": {
|
|
34
|
+
"type": "custom",
|
|
35
|
+
"char_filter": ["strip_special_chars"],
|
|
36
|
+
"filter": [
|
|
37
|
+
"lowercase",
|
|
38
|
+
"asciifolding"
|
|
39
|
+
]
|
|
40
|
+
}
|
|
41
|
+
},
|
|
42
|
+
"filter": {
|
|
43
|
+
"lowercasepreserveoriginal": {
|
|
44
|
+
"type": "lowercase",
|
|
45
|
+
"preserve_original": true
|
|
46
|
+
},
|
|
47
|
+
"asciifoldingpreserveoriginal": {
|
|
48
|
+
"type": "asciifolding",
|
|
49
|
+
"preserve_original": true
|
|
50
|
+
},
|
|
51
|
+
"edgegrams": {
|
|
52
|
+
"type": "edge_ngram",
|
|
53
|
+
"min_gram": 2,
|
|
54
|
+
"max_gram": 20
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
},
|
|
59
|
+
"mappings": {
|
|
60
|
+
"dynamic": "strict",
|
|
61
|
+
"properties": {
|
|
62
|
+
"$schema": {
|
|
63
|
+
"type": "keyword",
|
|
64
|
+
"index": "false"
|
|
65
|
+
},
|
|
66
|
+
"created": {
|
|
67
|
+
"type": "date"
|
|
68
|
+
},
|
|
69
|
+
"updated": {
|
|
70
|
+
"type": "date"
|
|
71
|
+
},
|
|
72
|
+
"indexed_at": {
|
|
73
|
+
"type": "date"
|
|
74
|
+
},
|
|
75
|
+
"uuid": {
|
|
76
|
+
"type": "keyword"
|
|
77
|
+
},
|
|
78
|
+
"version_id": {
|
|
79
|
+
"type": "integer"
|
|
80
|
+
},
|
|
81
|
+
"id": {
|
|
82
|
+
"type": "keyword"
|
|
83
|
+
},
|
|
84
|
+
"tags": {
|
|
85
|
+
"type": "keyword"
|
|
86
|
+
},
|
|
87
|
+
"name_sort": {
|
|
88
|
+
"type": "keyword"
|
|
89
|
+
},
|
|
90
|
+
"name": {
|
|
91
|
+
"type": "text",
|
|
92
|
+
"analyzer": "accent_edge_analyzer",
|
|
93
|
+
"search_analyzer": "accent_analyzer",
|
|
94
|
+
"copy_to": "name_sort"
|
|
95
|
+
},
|
|
96
|
+
"given_name": {
|
|
97
|
+
"type": "text",
|
|
98
|
+
"analyzer": "accent_edge_analyzer",
|
|
99
|
+
"search_analyzer": "accent_analyzer"
|
|
100
|
+
},
|
|
101
|
+
"family_name": {
|
|
102
|
+
"type": "text"
|
|
103
|
+
},
|
|
104
|
+
"identifiers": {
|
|
105
|
+
"properties": {
|
|
106
|
+
"identifier": {
|
|
107
|
+
"type": "keyword",
|
|
108
|
+
"normalizer": "accent_normalizer"
|
|
109
|
+
},
|
|
110
|
+
"scheme": {
|
|
111
|
+
"type": "keyword"
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
},
|
|
115
|
+
"affiliations": {
|
|
116
|
+
"type": "object",
|
|
117
|
+
"properties": {
|
|
118
|
+
"@v": {
|
|
119
|
+
"type": "keyword"
|
|
120
|
+
},
|
|
121
|
+
"id": {
|
|
122
|
+
"type": "keyword"
|
|
123
|
+
},
|
|
124
|
+
"name": {
|
|
125
|
+
"type": "text",
|
|
126
|
+
"analyzer": "accent_edge_analyzer",
|
|
127
|
+
"search_analyzer": "accent_analyzer"
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
},
|
|
131
|
+
"pid": {
|
|
132
|
+
"type": "object",
|
|
133
|
+
"properties": {
|
|
134
|
+
"pk": {
|
|
135
|
+
"type": "integer"
|
|
136
|
+
},
|
|
137
|
+
"pid_type": {
|
|
138
|
+
"type": "keyword"
|
|
139
|
+
},
|
|
140
|
+
"obj_type": {
|
|
141
|
+
"type": "keyword"
|
|
142
|
+
},
|
|
143
|
+
"status": {
|
|
144
|
+
"type": "keyword"
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
{
|
|
2
|
+
"settings": {
|
|
3
|
+
"analysis": {
|
|
4
|
+
"char_filter": {
|
|
5
|
+
"strip_special_chars": {
|
|
6
|
+
"type": "pattern_replace",
|
|
7
|
+
"pattern": "[\\p{Punct}\\p{S}]",
|
|
8
|
+
"replacement": ""
|
|
9
|
+
}
|
|
10
|
+
},
|
|
11
|
+
"analyzer": {
|
|
12
|
+
"accent_edge_analyzer": {
|
|
13
|
+
"tokenizer": "standard",
|
|
14
|
+
"type": "custom",
|
|
15
|
+
"char_filter": ["strip_special_chars"],
|
|
16
|
+
"filter": [
|
|
17
|
+
"lowercasepreserveoriginal",
|
|
18
|
+
"asciifoldingpreserveoriginal",
|
|
19
|
+
"edgegrams"
|
|
20
|
+
]
|
|
21
|
+
},
|
|
22
|
+
"accent_analyzer": {
|
|
23
|
+
"tokenizer": "standard",
|
|
24
|
+
"type": "custom",
|
|
25
|
+
"char_filter": ["strip_special_chars"],
|
|
26
|
+
"filter": [
|
|
27
|
+
"lowercasepreserveoriginal",
|
|
28
|
+
"asciifoldingpreserveoriginal"
|
|
29
|
+
]
|
|
30
|
+
}
|
|
31
|
+
},
|
|
32
|
+
"normalizer": {
|
|
33
|
+
"accent_normalizer": {
|
|
34
|
+
"type": "custom",
|
|
35
|
+
"char_filter": ["strip_special_chars"],
|
|
36
|
+
"filter": [
|
|
37
|
+
"lowercase",
|
|
38
|
+
"asciifolding"
|
|
39
|
+
]
|
|
40
|
+
}
|
|
41
|
+
},
|
|
42
|
+
"filter": {
|
|
43
|
+
"lowercasepreserveoriginal": {
|
|
44
|
+
"type": "lowercase",
|
|
45
|
+
"preserve_original": true
|
|
46
|
+
},
|
|
47
|
+
"asciifoldingpreserveoriginal": {
|
|
48
|
+
"type": "asciifolding",
|
|
49
|
+
"preserve_original": true
|
|
50
|
+
},
|
|
51
|
+
"edgegrams": {
|
|
52
|
+
"type": "edge_ngram",
|
|
53
|
+
"min_gram": 2,
|
|
54
|
+
"max_gram": 20
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
},
|
|
59
|
+
"mappings": {
|
|
60
|
+
"dynamic": "strict",
|
|
61
|
+
"properties": {
|
|
62
|
+
"$schema": {
|
|
63
|
+
"type": "keyword",
|
|
64
|
+
"index": "false"
|
|
65
|
+
},
|
|
66
|
+
"created": {
|
|
67
|
+
"type": "date"
|
|
68
|
+
},
|
|
69
|
+
"updated": {
|
|
70
|
+
"type": "date"
|
|
71
|
+
},
|
|
72
|
+
"indexed_at": {
|
|
73
|
+
"type": "date"
|
|
74
|
+
},
|
|
75
|
+
"uuid": {
|
|
76
|
+
"type": "keyword"
|
|
77
|
+
},
|
|
78
|
+
"version_id": {
|
|
79
|
+
"type": "integer"
|
|
80
|
+
},
|
|
81
|
+
"id": {
|
|
82
|
+
"type": "keyword"
|
|
83
|
+
},
|
|
84
|
+
"tags": {
|
|
85
|
+
"type": "keyword"
|
|
86
|
+
},
|
|
87
|
+
"name_sort": {
|
|
88
|
+
"type": "keyword"
|
|
89
|
+
},
|
|
90
|
+
"name": {
|
|
91
|
+
"type": "text",
|
|
92
|
+
"analyzer": "accent_edge_analyzer",
|
|
93
|
+
"search_analyzer": "accent_analyzer",
|
|
94
|
+
"copy_to": "name_sort"
|
|
95
|
+
},
|
|
96
|
+
"given_name": {
|
|
97
|
+
"type": "text",
|
|
98
|
+
"analyzer": "accent_edge_analyzer",
|
|
99
|
+
"search_analyzer": "accent_analyzer"
|
|
100
|
+
},
|
|
101
|
+
"family_name": {
|
|
102
|
+
"type": "text"
|
|
103
|
+
},
|
|
104
|
+
"identifiers": {
|
|
105
|
+
"properties": {
|
|
106
|
+
"identifier": {
|
|
107
|
+
"type": "keyword",
|
|
108
|
+
"normalizer": "accent_normalizer"
|
|
109
|
+
},
|
|
110
|
+
"scheme": {
|
|
111
|
+
"type": "keyword"
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
},
|
|
115
|
+
"affiliations": {
|
|
116
|
+
"type": "object",
|
|
117
|
+
"properties": {
|
|
118
|
+
"@v": {
|
|
119
|
+
"type": "keyword"
|
|
120
|
+
},
|
|
121
|
+
"id": {
|
|
122
|
+
"type": "keyword"
|
|
123
|
+
},
|
|
124
|
+
"name": {
|
|
125
|
+
"type": "text",
|
|
126
|
+
"analyzer": "accent_edge_analyzer",
|
|
127
|
+
"search_analyzer": "accent_analyzer"
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
},
|
|
131
|
+
"pid": {
|
|
132
|
+
"type": "object",
|
|
133
|
+
"properties": {
|
|
134
|
+
"pk": {
|
|
135
|
+
"type": "integer"
|
|
136
|
+
},
|
|
137
|
+
"pid_type": {
|
|
138
|
+
"type": "keyword"
|
|
139
|
+
},
|
|
140
|
+
"obj_type": {
|
|
141
|
+
"type": "keyword"
|
|
142
|
+
},
|
|
143
|
+
"status": {
|
|
144
|
+
"type": "keyword"
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|