invenio-vocabularies 4.4.0__py2.py3-none-any.whl → 5.0.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of invenio-vocabularies might be problematic. Click here for more details.

Files changed (36) hide show
  1. invenio_vocabularies/__init__.py +1 -1
  2. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/package.json +0 -6
  3. invenio_vocabularies/config.py +7 -1
  4. invenio_vocabularies/contrib/affiliations/affiliations.py +1 -0
  5. invenio_vocabularies/contrib/affiliations/config.py +12 -1
  6. invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v2.0.0.json +171 -0
  7. invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v2.0.0.json +171 -0
  8. invenio_vocabularies/contrib/common/ror/datastreams.py +34 -32
  9. invenio_vocabularies/contrib/funders/config.py +3 -1
  10. invenio_vocabularies/contrib/funders/funders.py +1 -0
  11. invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v2.0.0.json +140 -0
  12. invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v2.0.0.json +140 -0
  13. invenio_vocabularies/contrib/names/config.py +5 -3
  14. invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v2.0.0.json +150 -0
  15. invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v2.0.0.json +150 -0
  16. invenio_vocabularies/contrib/names/names.py +1 -0
  17. invenio_vocabularies/contrib/subjects/config.py +9 -3
  18. invenio_vocabularies/contrib/subjects/datastreams.py +55 -0
  19. invenio_vocabularies/contrib/subjects/jsonschemas/subjects/subject-v1.0.0.json +12 -0
  20. invenio_vocabularies/contrib/subjects/mappings/os-v1/subjects/subject-v1.0.0.json +18 -0
  21. invenio_vocabularies/contrib/subjects/mappings/os-v2/subjects/subject-v1.0.0.json +18 -0
  22. invenio_vocabularies/contrib/subjects/mappings/v7/subjects/subject-v1.0.0.json +18 -0
  23. invenio_vocabularies/contrib/subjects/mesh/datastreams.py +43 -0
  24. invenio_vocabularies/contrib/subjects/schema.py +20 -2
  25. invenio_vocabularies/factories.py +13 -0
  26. invenio_vocabularies/services/config.py +1 -1
  27. invenio_vocabularies/services/service.py +1 -1
  28. invenio_vocabularies/translations/messages.pot +95 -48
  29. invenio_vocabularies/webpack.py +1 -1
  30. {invenio_vocabularies-4.4.0.dist-info → invenio_vocabularies-5.0.0.dist-info}/METADATA +5 -1
  31. {invenio_vocabularies-4.4.0.dist-info → invenio_vocabularies-5.0.0.dist-info}/RECORD +36 -28
  32. {invenio_vocabularies-4.4.0.dist-info → invenio_vocabularies-5.0.0.dist-info}/AUTHORS.rst +0 -0
  33. {invenio_vocabularies-4.4.0.dist-info → invenio_vocabularies-5.0.0.dist-info}/LICENSE +0 -0
  34. {invenio_vocabularies-4.4.0.dist-info → invenio_vocabularies-5.0.0.dist-info}/WHEEL +0 -0
  35. {invenio_vocabularies-4.4.0.dist-info → invenio_vocabularies-5.0.0.dist-info}/entry_points.txt +0 -0
  36. {invenio_vocabularies-4.4.0.dist-info → invenio_vocabularies-5.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,140 @@
1
+ {
2
+ "settings": {
3
+ "analysis": {
4
+ "char_filter": {
5
+ "strip_special_chars": {
6
+ "type": "pattern_replace",
7
+ "pattern": "[\\p{Punct}\\p{S}]",
8
+ "replacement": ""
9
+ }
10
+ },
11
+ "analyzer": {
12
+ "accent_edge_analyzer": {
13
+ "tokenizer": "standard",
14
+ "type": "custom",
15
+ "char_filter": ["strip_special_chars"],
16
+ "filter": [
17
+ "lowercase",
18
+ "asciifolding",
19
+ "edgegrams"
20
+ ]
21
+ },
22
+ "accent_analyzer": {
23
+ "tokenizer": "standard",
24
+ "type": "custom",
25
+ "char_filter": ["strip_special_chars"],
26
+ "filter": [
27
+ "lowercase",
28
+ "asciifolding"
29
+ ]
30
+ }
31
+ },
32
+ "filter": {
33
+ "lowercase": {
34
+ "type": "lowercase",
35
+ "preserve_original": true
36
+ },
37
+ "asciifolding": {
38
+ "type": "asciifolding",
39
+ "preserve_original": true
40
+ },
41
+ "edgegrams": {
42
+ "type": "edge_ngram",
43
+ "min_gram": 2,
44
+ "max_gram": 20
45
+ }
46
+ }
47
+ }
48
+ },
49
+ "mappings": {
50
+ "dynamic_templates": [
51
+ {
52
+ "i18n_title": {
53
+ "path_match": "title.*",
54
+ "match_mapping_type": "string",
55
+ "mapping": {
56
+ "type": "text",
57
+ "analyzer": "accent_edge_analyzer",
58
+ "search_analyzer": "accent_analyzer"
59
+ }
60
+ }
61
+ }
62
+ ],
63
+ "dynamic": "strict",
64
+ "properties": {
65
+ "$schema": {
66
+ "type": "keyword",
67
+ "index": "false"
68
+ },
69
+ "created": {
70
+ "type": "date"
71
+ },
72
+ "updated": {
73
+ "type": "date"
74
+ },
75
+ "indexed_at": {
76
+ "type": "date"
77
+ },
78
+ "uuid": {
79
+ "type": "keyword"
80
+ },
81
+ "version_id": {
82
+ "type": "integer"
83
+ },
84
+ "identifiers": {
85
+ "properties": {
86
+ "identifier": {
87
+ "type": "keyword"
88
+ },
89
+ "scheme": {
90
+ "type": "keyword"
91
+ }
92
+ }
93
+ },
94
+ "name_sort": {
95
+ "type": "keyword"
96
+ },
97
+ "name": {
98
+ "type": "text",
99
+ "analyzer": "accent_edge_analyzer",
100
+ "search_analyzer": "accent_analyzer",
101
+ "copy_to": "name_sort"
102
+ },
103
+ "country": {
104
+ "type": "text"
105
+ },
106
+ "country_name": {
107
+ "type": "text"
108
+ },
109
+ "location_name": {
110
+ "type": "text"
111
+ },
112
+ "acronym": {
113
+ "type": "text",
114
+ "analyzer": "accent_edge_analyzer",
115
+ "search_analyzer": "accent_analyzer"
116
+ },
117
+ "status": {
118
+ "type": "keyword"
119
+ },
120
+ "aliases": {
121
+ "type": "text",
122
+ "analyzer": "accent_edge_analyzer",
123
+ "search_analyzer": "accent_analyzer"
124
+ },
125
+ "types": {
126
+ "type": "keyword"
127
+ },
128
+ "id": {
129
+ "type": "keyword"
130
+ },
131
+ "title": {
132
+ "type": "object",
133
+ "dynamic": "true"
134
+ },
135
+ "tags": {
136
+ "type": "keyword"
137
+ }
138
+ }
139
+ }
140
+ }
@@ -28,12 +28,14 @@ class NamesSearchOptions(SearchOptions):
28
28
 
29
29
  suggest_parser_cls = SuggestQueryParser.factory(
30
30
  fields=[
31
- "name^100",
32
- "family_name^100",
33
31
  "given_name^100",
32
+ "name^70",
33
+ "family_name^50",
34
34
  "identifiers.identifier^20",
35
- "affiliations.name^10",
35
+ "affiliations.name^20",
36
36
  ],
37
+ type="most_fields", # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html#multi-match-types
38
+ fuzziness="AUTO",
37
39
  )
38
40
 
39
41
  sort_default = "bestmatch"
@@ -0,0 +1,150 @@
1
+ {
2
+ "settings": {
3
+ "analysis": {
4
+ "char_filter": {
5
+ "strip_special_chars": {
6
+ "type": "pattern_replace",
7
+ "pattern": "[\\p{Punct}\\p{S}]",
8
+ "replacement": ""
9
+ }
10
+ },
11
+ "analyzer": {
12
+ "accent_edge_analyzer": {
13
+ "tokenizer": "standard",
14
+ "type": "custom",
15
+ "char_filter": ["strip_special_chars"],
16
+ "filter": [
17
+ "lowercase",
18
+ "asciifolding",
19
+ "edgegrams"
20
+ ]
21
+ },
22
+ "accent_analyzer": {
23
+ "tokenizer": "standard",
24
+ "type": "custom",
25
+ "char_filter": ["strip_special_chars"],
26
+ "filter": [
27
+ "lowercase",
28
+ "asciifolding"
29
+ ]
30
+ }
31
+ },
32
+ "normalizer": {
33
+ "accent_normalizer": {
34
+ "type": "custom",
35
+ "char_filter": ["strip_special_chars"],
36
+ "filter": [
37
+ "lowercase",
38
+ "asciifolding"
39
+ ]
40
+ }
41
+ },
42
+ "filter": {
43
+ "lowercase": {
44
+ "type": "lowercase",
45
+ "preserve_original": true
46
+ },
47
+ "asciifolding": {
48
+ "type": "asciifolding",
49
+ "preserve_original": true
50
+ },
51
+ "edgegrams": {
52
+ "type": "edge_ngram",
53
+ "min_gram": 2,
54
+ "max_gram": 20
55
+ }
56
+ }
57
+ }
58
+ },
59
+ "mappings": {
60
+ "dynamic": "strict",
61
+ "properties": {
62
+ "$schema": {
63
+ "type": "keyword",
64
+ "index": "false"
65
+ },
66
+ "created": {
67
+ "type": "date"
68
+ },
69
+ "updated": {
70
+ "type": "date"
71
+ },
72
+ "indexed_at": {
73
+ "type": "date"
74
+ },
75
+ "uuid": {
76
+ "type": "keyword"
77
+ },
78
+ "version_id": {
79
+ "type": "integer"
80
+ },
81
+ "id": {
82
+ "type": "keyword"
83
+ },
84
+ "tags": {
85
+ "type": "keyword"
86
+ },
87
+ "name_sort": {
88
+ "type": "keyword"
89
+ },
90
+ "name": {
91
+ "type": "text",
92
+ "analyzer": "accent_edge_analyzer",
93
+ "search_analyzer": "accent_analyzer",
94
+ "copy_to": "name_sort"
95
+ },
96
+ "given_name": {
97
+ "type": "text",
98
+ "analyzer": "accent_edge_analyzer",
99
+ "search_analyzer": "accent_analyzer"
100
+ },
101
+ "family_name": {
102
+ "type": "text"
103
+ },
104
+ "identifiers": {
105
+ "properties": {
106
+ "identifier": {
107
+ "type": "keyword",
108
+ "normalizer": "accent_normalizer"
109
+ },
110
+ "scheme": {
111
+ "type": "keyword"
112
+ }
113
+ }
114
+ },
115
+ "affiliations": {
116
+ "type": "object",
117
+ "properties": {
118
+ "@v": {
119
+ "type": "keyword"
120
+ },
121
+ "id": {
122
+ "type": "keyword"
123
+ },
124
+ "name": {
125
+ "type": "text",
126
+ "analyzer": "accent_edge_analyzer",
127
+ "search_analyzer": "accent_analyzer"
128
+ }
129
+ }
130
+ },
131
+ "pid": {
132
+ "type": "object",
133
+ "properties": {
134
+ "pk": {
135
+ "type": "integer"
136
+ },
137
+ "pid_type": {
138
+ "type": "keyword"
139
+ },
140
+ "obj_type": {
141
+ "type": "keyword"
142
+ },
143
+ "status": {
144
+ "type": "keyword"
145
+ }
146
+ }
147
+ }
148
+ }
149
+ }
150
+ }
@@ -0,0 +1,150 @@
1
+ {
2
+ "settings": {
3
+ "analysis": {
4
+ "char_filter": {
5
+ "strip_special_chars": {
6
+ "type": "pattern_replace",
7
+ "pattern": "[\\p{Punct}\\p{S}]",
8
+ "replacement": ""
9
+ }
10
+ },
11
+ "analyzer": {
12
+ "accent_edge_analyzer": {
13
+ "tokenizer": "standard",
14
+ "type": "custom",
15
+ "char_filter": ["strip_special_chars"],
16
+ "filter": [
17
+ "lowercase",
18
+ "asciifolding",
19
+ "edgegrams"
20
+ ]
21
+ },
22
+ "accent_analyzer": {
23
+ "tokenizer": "standard",
24
+ "type": "custom",
25
+ "char_filter": ["strip_special_chars"],
26
+ "filter": [
27
+ "lowercase",
28
+ "asciifolding"
29
+ ]
30
+ }
31
+ },
32
+ "normalizer": {
33
+ "accent_normalizer": {
34
+ "type": "custom",
35
+ "char_filter": ["strip_special_chars"],
36
+ "filter": [
37
+ "lowercase",
38
+ "asciifolding"
39
+ ]
40
+ }
41
+ },
42
+ "filter": {
43
+ "lowercase": {
44
+ "type": "lowercase",
45
+ "preserve_original": true
46
+ },
47
+ "asciifolding": {
48
+ "type": "asciifolding",
49
+ "preserve_original": true
50
+ },
51
+ "edgegrams": {
52
+ "type": "edge_ngram",
53
+ "min_gram": 2,
54
+ "max_gram": 20
55
+ }
56
+ }
57
+ }
58
+ },
59
+ "mappings": {
60
+ "dynamic": "strict",
61
+ "properties": {
62
+ "$schema": {
63
+ "type": "keyword",
64
+ "index": "false"
65
+ },
66
+ "created": {
67
+ "type": "date"
68
+ },
69
+ "updated": {
70
+ "type": "date"
71
+ },
72
+ "indexed_at": {
73
+ "type": "date"
74
+ },
75
+ "uuid": {
76
+ "type": "keyword"
77
+ },
78
+ "version_id": {
79
+ "type": "integer"
80
+ },
81
+ "id": {
82
+ "type": "keyword"
83
+ },
84
+ "tags": {
85
+ "type": "keyword"
86
+ },
87
+ "name_sort": {
88
+ "type": "keyword"
89
+ },
90
+ "name": {
91
+ "type": "text",
92
+ "analyzer": "accent_edge_analyzer",
93
+ "search_analyzer": "accent_analyzer",
94
+ "copy_to": "name_sort"
95
+ },
96
+ "given_name": {
97
+ "type": "text",
98
+ "analyzer": "accent_edge_analyzer",
99
+ "search_analyzer": "accent_analyzer"
100
+ },
101
+ "family_name": {
102
+ "type": "text"
103
+ },
104
+ "identifiers": {
105
+ "properties": {
106
+ "identifier": {
107
+ "type": "keyword",
108
+ "normalizer": "accent_normalizer"
109
+ },
110
+ "scheme": {
111
+ "type": "keyword"
112
+ }
113
+ }
114
+ },
115
+ "affiliations": {
116
+ "type": "object",
117
+ "properties": {
118
+ "@v": {
119
+ "type": "keyword"
120
+ },
121
+ "id": {
122
+ "type": "keyword"
123
+ },
124
+ "name": {
125
+ "type": "text",
126
+ "analyzer": "accent_edge_analyzer",
127
+ "search_analyzer": "accent_analyzer"
128
+ }
129
+ }
130
+ },
131
+ "pid": {
132
+ "type": "object",
133
+ "properties": {
134
+ "pk": {
135
+ "type": "integer"
136
+ },
137
+ "pid_type": {
138
+ "type": "keyword"
139
+ },
140
+ "obj_type": {
141
+ "type": "keyword"
142
+ },
143
+ "status": {
144
+ "type": "keyword"
145
+ }
146
+ }
147
+ }
148
+ }
149
+ }
150
+ }
@@ -49,6 +49,7 @@ record_type = RecordTypeFactory(
49
49
  },
50
50
  schema_version="1.0.0",
51
51
  schema_path="local://names/name-v1.0.0.json",
52
+ index_name="names-name-v2.0.0",
52
53
  record_relations=name_relations,
53
54
  record_dumper=SearchDumper(
54
55
  model_fields={"pid": ("id", str)},
@@ -2,6 +2,7 @@
2
2
  #
3
3
  # Copyright (C) 2021 CERN.
4
4
  # Copyright (C) 2021 Northwestern University.
5
+ # Copyright (C) 2024 University of Münster.
5
6
  #
6
7
  # Invenio-Vocabularies is free software; you can redistribute it and/or
7
8
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -9,13 +10,19 @@
9
10
 
10
11
  """Subjects configuration."""
11
12
 
13
+ from flask import current_app
12
14
  from invenio_i18n import lazy_gettext as _
13
15
  from invenio_records_resources.services import SearchOptions
14
16
  from invenio_records_resources.services.records.components import DataComponent
17
+ from werkzeug.local import LocalProxy
15
18
 
16
19
  from ...services.components import PIDComponent
17
20
  from ...services.querystr import FilteredSuggestQueryParser
18
21
 
22
+ subject_schemes = LocalProxy(
23
+ lambda: current_app.config["VOCABULARIES_SUBJECTS_SCHEMES"]
24
+ )
25
+
19
26
 
20
27
  class SubjectsSearchOptions(SearchOptions):
21
28
  """Search options."""
@@ -23,9 +30,8 @@ class SubjectsSearchOptions(SearchOptions):
23
30
  suggest_parser_cls = FilteredSuggestQueryParser.factory(
24
31
  filter_field="scheme",
25
32
  fields=[ # suggest fields
26
- "subject^100",
27
- "subject._2gram",
28
- "subject._3gram",
33
+ "title.*^100",
34
+ "synonyms^20",
29
35
  ],
30
36
  )
31
37
 
@@ -0,0 +1,55 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2024 University of Münster.
4
+ #
5
+ # Invenio-Vocabularies is free software; you can redistribute it and/or
6
+ # modify it under the terms of the MIT License; see LICENSE file for more
7
+ # details.
8
+
9
+ """Names datastreams, transformers, writers and readers."""
10
+
11
+ from invenio_access.permissions import system_identity
12
+ from invenio_i18n import lazy_gettext as _
13
+
14
+ from ...datastreams.writers import ServiceWriter
15
+ from .mesh.datastreams import VOCABULARIES_DATASTREAM_READERS as mesh_readers
16
+ from .mesh.datastreams import VOCABULARIES_DATASTREAM_TRANSFORMERS as mesh_transformers
17
+ from .mesh.datastreams import VOCABULARIES_DATASTREAM_WRITERS as mesh_writers
18
+
19
+
20
+ class SubjectsServiceWriter(ServiceWriter):
21
+ """Subjects Service Writer."""
22
+
23
+ def __init__(self, *args, **kwargs):
24
+ """Constructor."""
25
+ service_or_name = kwargs.pop("service_or_name", "subjects")
26
+ super().__init__(service_or_name=service_or_name, *args, **kwargs)
27
+
28
+ def _entry_id(self, entry):
29
+ """Get the id from an entry."""
30
+ return entry["id"]
31
+
32
+
33
+ VOCABULARIES_DATASTREAM_READERS = {**mesh_readers}
34
+ """Subjects Data Streams readers."""
35
+
36
+ VOCABULARIES_DATASTREAM_TRANSFORMERS = {**mesh_transformers}
37
+ """Subjects Data Streams transformers."""
38
+
39
+ VOCABULARIES_DATASTREAM_WRITERS = {
40
+ "subjects-service": SubjectsServiceWriter,
41
+ **mesh_writers,
42
+ }
43
+ """Subjects Data Streams writers."""
44
+
45
+ DATASTREAM_CONFIG = {
46
+ "readers": [
47
+ {"type": "yaml"},
48
+ ],
49
+ "writers": [
50
+ {
51
+ "type": "subjects-service",
52
+ }
53
+ ],
54
+ }
55
+ """Data Stream configuration."""
@@ -25,6 +25,18 @@
25
25
  "subject": {
26
26
  "description": "Human readable label.",
27
27
  "type": "string"
28
+ },
29
+ "title": {
30
+ "description": "Human readable label in different languages.",
31
+ "$ref": "local://vocabularies/definitions-v1.0.0.json#/title"
32
+ },
33
+ "synonyms": {
34
+ "description": "Synonyms of the subject label.",
35
+ "type": "array",
36
+ "items": {
37
+ "type": "string"
38
+ },
39
+ "uniqueItems": true
28
40
  }
29
41
  }
30
42
  }
@@ -1,5 +1,16 @@
1
1
  {
2
2
  "mappings": {
3
+ "dynamic_templates": [
4
+ {
5
+ "i18n_title": {
6
+ "path_match": "title.*",
7
+ "match_mapping_type": "string",
8
+ "mapping": {
9
+ "type": "search_as_you_type"
10
+ }
11
+ }
12
+ }
13
+ ],
3
14
  "dynamic": "strict",
4
15
  "properties": {
5
16
  "$schema": {
@@ -56,6 +67,13 @@
56
67
  }
57
68
  }
58
69
  },
70
+ "title": {
71
+ "type": "object",
72
+ "dynamic": "true"
73
+ },
74
+ "synonyms": {
75
+ "type": "text"
76
+ },
59
77
  "tags": {
60
78
  "type": "keyword"
61
79
  }
@@ -1,5 +1,16 @@
1
1
  {
2
2
  "mappings": {
3
+ "dynamic_templates": [
4
+ {
5
+ "i18n_title": {
6
+ "path_match": "title.*",
7
+ "match_mapping_type": "string",
8
+ "mapping": {
9
+ "type": "search_as_you_type"
10
+ }
11
+ }
12
+ }
13
+ ],
3
14
  "dynamic": "strict",
4
15
  "properties": {
5
16
  "$schema": {
@@ -56,6 +67,13 @@
56
67
  }
57
68
  }
58
69
  },
70
+ "title": {
71
+ "type": "object",
72
+ "dynamic": "true"
73
+ },
74
+ "synonyms": {
75
+ "type": "text"
76
+ },
59
77
  "tags": {
60
78
  "type": "keyword"
61
79
  }
@@ -1,5 +1,16 @@
1
1
  {
2
2
  "mappings": {
3
+ "dynamic_templates": [
4
+ {
5
+ "i18n_title": {
6
+ "path_match": "title.*",
7
+ "match_mapping_type": "string",
8
+ "mapping": {
9
+ "type": "search_as_you_type"
10
+ }
11
+ }
12
+ }
13
+ ],
3
14
  "dynamic": "strict",
4
15
  "properties": {
5
16
  "$schema": {
@@ -56,6 +67,13 @@
56
67
  }
57
68
  }
58
69
  },
70
+ "title": {
71
+ "type": "object",
72
+ "dynamic": "true"
73
+ },
74
+ "synonyms": {
75
+ "type": "text"
76
+ },
59
77
  "tags": {
60
78
  "type": "keyword"
61
79
  }