oarepo-runtime 1.7.2__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,26 @@
1
- from .icu import ICUField, ICUSortField, ICUSuggestField
1
+ from .icu import (
2
+ FulltextIndexField,
3
+ ICUField,
4
+ ICUSearchField,
5
+ ICUSortField,
6
+ ICUSuggestField,
7
+ )
2
8
  from .mapping import MappingSystemFieldMixin, SystemFieldDumperExt
3
- from .selectors import FirstItemSelector, PathSelector, Selector, FilteredSelector, MultiSelector
9
+ from .selectors import (
10
+ FilteredSelector,
11
+ FirstItemSelector,
12
+ MultiSelector,
13
+ PathSelector,
14
+ Selector,
15
+ )
4
16
  from .synthetic import SyntheticSystemField
5
17
 
6
18
  __all__ = (
7
19
  "ICUField",
8
20
  "ICUSuggestField",
9
21
  "ICUSortField",
22
+ "ICUSearchField",
23
+ "FulltextIndexField",
10
24
  "MappingSystemFieldMixin",
11
25
  "SystemFieldDumperExt",
12
26
  "SyntheticSystemField",
@@ -37,11 +37,18 @@ class ICUField(MappingSystemFieldMixin, SystemField):
37
37
  ret = []
38
38
  for l in lookup_key(data, f"{self.source_field}"):
39
39
  if isinstance(l.value, str):
40
+ # take single value as being always the the language provided
40
41
  ret.append(l.value)
41
42
  elif isinstance(l.value, dict):
43
+ # expected to be {"cs": "", "en": ""}
42
44
  val = l.value.get(language)
43
45
  if val:
44
46
  ret.append(val)
47
+ elif "lang" in l.value:
48
+ # for [{"lang": "", "value": ""}, ...] we get each item separately
49
+ # that's why we do not iterate over l.value
50
+ if l.value["lang"] == language:
51
+ ret.append(l.value["value"])
45
52
  return ret
46
53
 
47
54
  def search_dump(self, data, record):
@@ -132,23 +139,24 @@ class ICUSuggestField(ICUField):
132
139
  }
133
140
 
134
141
 
135
- class ICUSearchField(ICUField):
136
- """
137
- A field that adds stemming-aware search field
138
- """
142
+ class ICUSearchAnalyzerMixin:
139
143
 
140
144
  default_stemming_analyzers = {
141
145
  "stemming_analyzer_cs": {
142
146
  "tokenizer": "standard",
143
- "filter": ["stemming_filter_cs"],
147
+ "filter": ["stemming_filter_cs", "lowercase"],
144
148
  },
145
149
  "stemming_analyzer_en": {
146
150
  "tokenizer": "standard",
147
- "filter": ["stemming_filter_en"],
151
+ "filter": ["stemming_filter_en", "lowercase"],
148
152
  },
149
153
  "ascii_folding_analyzer": {
150
154
  "tokenizer": "standard",
151
- "filter": ["ascii_folding_filter"],
155
+ "filter": ["ascii_folding_filter", "lowercase"],
156
+ },
157
+ "lowercase_analyzer": {
158
+ "tokenizer": "standard",
159
+ "filter": ["lowercase"],
152
160
  },
153
161
  }
154
162
 
@@ -166,6 +174,25 @@ class ICUSearchField(ICUField):
166
174
  "ascii_folding_filter": {"type": "asciifolding", "preserve_original": True},
167
175
  }
168
176
 
177
+ @property
178
+ def mapping_settings(self):
179
+ return {
180
+ "analysis": {
181
+ "analyzer": current_app.config.get(
182
+ "OAREPO_ICU_SEARCH_ANALYZERS", self.default_stemming_analyzers
183
+ ),
184
+ "filter": current_app.config.get(
185
+ "OAREPO_ICU_SEARCH_FILTERS", self.default_stemming_filters
186
+ ),
187
+ }
188
+ }
189
+
190
+
191
+ class ICUSearchField(ICUSearchAnalyzerMixin, ICUField):
192
+ """
193
+ A field that adds stemming-aware search field
194
+ """
195
+
169
196
  def __init__(self, source_field, key=None):
170
197
  super().__init__(source_field=source_field, key=key)
171
198
 
@@ -187,6 +214,11 @@ class ICUSearchField(ICUField):
187
214
  "analyzer": f"stemming_analyzer_{lang}",
188
215
  "boost": 0.5,
189
216
  },
217
+ "lowercase": {
218
+ "type": "text",
219
+ "boost": 0.8,
220
+ "analyzer": "lowercase_analyzer",
221
+ },
190
222
  "ascii_folded": {
191
223
  "type": "text",
192
224
  "analyzer": "ascii_folding_analyzer",
@@ -200,15 +232,59 @@ class ICUSearchField(ICUField):
200
232
  },
201
233
  }
202
234
 
235
+ def get_values(self, data, language):
236
+ return super().get_values(data, language=language)
237
+
238
+
239
+ class FulltextIndexField(ICUSearchAnalyzerMixin, ICUField):
240
+ """
241
+ A system field that makes the field searchable in OpenSearch,
242
+ regardless if it is indexed/analyzed, embedded in Nested or not.
243
+
244
+ It creates a top-level mapping field and copies
245
+ content of {source_field} into it. It also provides the correct mapping
246
+ for the field based on the current configuration of the application.
247
+
248
+ Unlike the ICU, this field is a single-language and the language should
249
+ be provided when initializing the field.
250
+ It defaults to the BABEL_DEFAULT_LOCALE if not provided.
251
+ """
252
+
253
+ def __init__(self, *, source_field, key=None, language=None):
254
+ super().__init__(source_field=source_field, key=key)
255
+ self.language = language
256
+
203
257
  @property
204
- def mapping_settings(self):
205
- return {
206
- "analysis": {
207
- "analyzer": current_app.config.get(
208
- "OAREPO_ICU_SEARCH_ANALYZERS", self.default_stemming_analyzers
209
- ),
210
- "filter": current_app.config.get(
211
- "OAREPO_ICU_SEARCH_FILTERS", self.default_stemming_filters
212
- ),
258
+ def mapping(self):
259
+ language = self.language or current_app.config.get("BABEL_DEFAULT_LOCALE", "en")
260
+ mapping_settings = self.languages.get(language, None)
261
+ if mapping_settings:
262
+ mapping_settings = mapping_settings.get("search")
263
+ if not mapping_settings:
264
+ mapping_settings = {
265
+ "type": "text",
266
+ "boost": 1,
267
+ "fields": {
268
+ "stemmed": {
269
+ "type": "text",
270
+ "analyzer": f"stemming_analyzer_{language}",
271
+ "boost": 0.5,
272
+ },
273
+ "ascii_folded": {
274
+ "type": "text",
275
+ "analyzer": "ascii_folding_analyzer",
276
+ "boost": 0.3,
277
+ },
278
+ },
213
279
  }
214
- }
280
+
281
+ return {self.attr_name: mapping_settings}
282
+
283
+ def search_dump(self, data, record):
284
+ """Dump custom field."""
285
+ data[self.attr_name] = self.get_values(data, language=self.language)
286
+
287
+ @classmethod
288
+ def search_load(cls, data, record_cls):
289
+ """Load custom field."""
290
+ data.pop(cls.attr_name, None)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: oarepo-runtime
3
- Version: 1.7.2
3
+ Version: 1.8.0
4
4
  Summary: A set of runtime extensions of Invenio repository
5
5
  Description-Content-Type: text/markdown
6
6
  License-File: LICENSE
@@ -70,10 +70,10 @@ oarepo_runtime/records/relations/base.py,sha256=ESTwj0-eT8HRTJ8QcE5fmqzjOjBFHpQq
70
70
  oarepo_runtime/records/relations/internal.py,sha256=OTp8iJqyl80sWDk0Q0AK42l6UsxZDABspVU_GwWza9o,1556
71
71
  oarepo_runtime/records/relations/lookup.py,sha256=wi3jPfOedazOmhOMrgu50PUETc1jfSdpmjK0wvOFsEM,848
72
72
  oarepo_runtime/records/relations/pid_relation.py,sha256=eojw5uIo5zXmJGge_bj6Wj2njCRY5S4o4B_h_HFyaDY,3901
73
- oarepo_runtime/records/systemfields/__init__.py,sha256=LL1R64RUakA_4r0IkTq9MtwqD5eV-AQaj5u96zkWa74,533
73
+ oarepo_runtime/records/systemfields/__init__.py,sha256=fdOvVL7L-Q4pcNHPcixYw52ydwEd9mBJVVvZT2-6ON4,667
74
74
  oarepo_runtime/records/systemfields/featured_file.py,sha256=MbSaYR130_o5S9gEOblnChq-PVK4xGPGpSCrzwG3cwc,1720
75
75
  oarepo_runtime/records/systemfields/has_draftcheck.py,sha256=4JkMEefPLpqtPtlTgK3UT0KzTRgyw5_Qtkss2qcz5xk,1643
76
- oarepo_runtime/records/systemfields/icu.py,sha256=sSGAgi5WhsAY4cCBL7-7nMpvHAuctpW8Y8vRExHQUfk,6738
76
+ oarepo_runtime/records/systemfields/icu.py,sha256=id4yv80DG-8XWMaSeegpoF7JWciXTHq69U2Xqo_4lW8,9700
77
77
  oarepo_runtime/records/systemfields/mapping.py,sha256=tXOK_jkdY1pOUO7_VfChfDNB8UTi21GUXaidpugTnO8,1017
78
78
  oarepo_runtime/records/systemfields/owner.py,sha256=dYRVBinniW7ECHuSnTAjeN6x1KhhJtNR9vxmD1KswMs,3805
79
79
  oarepo_runtime/records/systemfields/record_status.py,sha256=U3kem4-JkNsT17e0iAl3HIAZ2MvO5lY_0U757aZvTKE,935
@@ -150,13 +150,13 @@ oarepo_runtime/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
150
150
  oarepo_runtime/utils/functools.py,sha256=gKS9YZtlIYcDvdNA9cmYO00yjiXBYV1jg8VpcRUyQyg,1324
151
151
  oarepo_runtime/utils/index.py,sha256=ArrUUXB-KowUcUksRKqcFpmqct4bn9alO1zd_kX2tmU,292
152
152
  oarepo_runtime/utils/path.py,sha256=V1NVyk3m12_YLbj7QHYvUpE1wScO78bYsX1LOLeXDkI,3108
153
- oarepo_runtime-1.7.2.dist-info/licenses/LICENSE,sha256=h2uWz0OaB3EN-J1ImdGJZzc7yvfQjvHVYdUhQ-H7ypY,1064
153
+ oarepo_runtime-1.8.0.dist-info/licenses/LICENSE,sha256=h2uWz0OaB3EN-J1ImdGJZzc7yvfQjvHVYdUhQ-H7ypY,1064
154
154
  tests/marshmallow_to_json/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
155
155
  tests/marshmallow_to_json/test_datacite_ui_schema.py,sha256=82iLj8nW45lZOUewpWbLX3mpSkpa9lxo-vK-Qtv_1bU,48552
156
156
  tests/marshmallow_to_json/test_simple_schema.py,sha256=izZN9p0v6kovtSZ6AdxBYmK_c6ZOti2_z_wPT_zXIr0,1500
157
157
  tests/pkg_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
158
- oarepo_runtime-1.7.2.dist-info/METADATA,sha256=zgNr97N1sEuZ23n7YI99fqBLKHIRKdQdxwp9vS9DtLo,4788
159
- oarepo_runtime-1.7.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
160
- oarepo_runtime-1.7.2.dist-info/entry_points.txt,sha256=k7O5LZUOGsVeSpB7ulU0txBUNp1CVQG7Q7TJIVTPbzU,491
161
- oarepo_runtime-1.7.2.dist-info/top_level.txt,sha256=bHhlkT1_RQC4IkfTQCqA3iN4KCB6cSFQlsXpQMSP-bE,21
162
- oarepo_runtime-1.7.2.dist-info/RECORD,,
158
+ oarepo_runtime-1.8.0.dist-info/METADATA,sha256=2RIMYE7SkyjV-UjfAVIT-CBXIOk8ImbsxDrIweni-Mw,4788
159
+ oarepo_runtime-1.8.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
160
+ oarepo_runtime-1.8.0.dist-info/entry_points.txt,sha256=k7O5LZUOGsVeSpB7ulU0txBUNp1CVQG7Q7TJIVTPbzU,491
161
+ oarepo_runtime-1.8.0.dist-info/top_level.txt,sha256=bHhlkT1_RQC4IkfTQCqA3iN4KCB6cSFQlsXpQMSP-bE,21
162
+ oarepo_runtime-1.8.0.dist-info/RECORD,,