searchkick 4.4.0 → 5.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,491 +1,552 @@
1
1
  module Searchkick
2
- module IndexOptions
3
- def index_options
4
- options = @options
5
- language = options[:language]
6
- language = language.call if language.respond_to?(:call)
2
+ class IndexOptions
3
+ attr_reader :options
7
4
 
8
- below62 = Searchkick.server_below?("6.2.0")
9
- below70 = Searchkick.server_below?("7.0.0")
10
- below73 = Searchkick.server_below?("7.3.0")
11
-
12
- if below70
13
- index_type = options[:_type]
14
- index_type = index_type.call if index_type.respond_to?(:call)
15
- end
5
+ def initialize(index)
6
+ @options = index.options
7
+ end
16
8
 
17
- custom_mapping = options[:mappings] || {}
18
- if below70 && custom_mapping.keys.map(&:to_sym).include?(:properties)
19
- # add type
20
- custom_mapping = {index_type => custom_mapping}
21
- end
9
+ def index_options
10
+ # mortal symbols are garbage collected in Ruby 2.2+
11
+ custom_settings = (options[:settings] || {}).deep_symbolize_keys
12
+ custom_mappings = (options[:mappings] || {}).deep_symbolize_keys
22
13
 
23
14
  if options[:mappings] && !options[:merge_mappings]
24
- settings = options[:settings] || {}
25
- mappings = custom_mapping
15
+ settings = custom_settings
16
+ mappings = custom_mappings
26
17
  else
27
- default_type = "text"
28
- default_analyzer = :searchkick_index
29
- keyword_mapping = {type: "keyword"}
30
-
31
- keyword_mapping[:ignore_above] = options[:ignore_above] || 30000
32
-
33
- settings = {
34
- analysis: {
35
- analyzer: {
36
- searchkick_keyword: {
37
- type: "custom",
38
- tokenizer: "keyword",
39
- filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
40
- },
41
- default_analyzer => {
42
- type: "custom",
43
- # character filters -> tokenizer -> token filters
44
- # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
45
- char_filter: ["ampersand"],
46
- tokenizer: "standard",
47
- # synonym should come last, after stemming and shingle
48
- # shingle must come before searchkick_stemmer
49
- filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
50
- },
51
- searchkick_search: {
52
- type: "custom",
53
- char_filter: ["ampersand"],
54
- tokenizer: "standard",
55
- filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
56
- },
57
- searchkick_search2: {
58
- type: "custom",
59
- char_filter: ["ampersand"],
60
- tokenizer: "standard",
61
- filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
62
- },
63
- # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
64
- searchkick_autocomplete_search: {
65
- type: "custom",
66
- tokenizer: "keyword",
67
- filter: ["lowercase", "asciifolding"]
68
- },
69
- searchkick_word_search: {
70
- type: "custom",
71
- tokenizer: "standard",
72
- filter: ["lowercase", "asciifolding"]
73
- },
74
- searchkick_suggest_index: {
75
- type: "custom",
76
- tokenizer: "standard",
77
- filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
78
- },
79
- searchkick_text_start_index: {
80
- type: "custom",
81
- tokenizer: "keyword",
82
- filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
83
- },
84
- searchkick_text_middle_index: {
85
- type: "custom",
86
- tokenizer: "keyword",
87
- filter: ["lowercase", "asciifolding", "searchkick_ngram"]
88
- },
89
- searchkick_text_end_index: {
90
- type: "custom",
91
- tokenizer: "keyword",
92
- filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
93
- },
94
- searchkick_word_start_index: {
95
- type: "custom",
96
- tokenizer: "standard",
97
- filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
98
- },
99
- searchkick_word_middle_index: {
100
- type: "custom",
101
- tokenizer: "standard",
102
- filter: ["lowercase", "asciifolding", "searchkick_ngram"]
103
- },
104
- searchkick_word_end_index: {
105
- type: "custom",
106
- tokenizer: "standard",
107
- filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
108
- }
109
- },
110
- filter: {
111
- searchkick_index_shingle: {
112
- type: "shingle",
113
- token_separator: ""
114
- },
115
- # lucky find https://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
116
- searchkick_search_shingle: {
117
- type: "shingle",
118
- token_separator: "",
119
- output_unigrams: false,
120
- output_unigrams_if_no_shingles: true
121
- },
122
- searchkick_suggest_shingle: {
123
- type: "shingle",
124
- max_shingle_size: 5
125
- },
126
- searchkick_edge_ngram: {
127
- type: "edge_ngram",
128
- min_gram: 1,
129
- max_gram: 50
130
- },
131
- searchkick_ngram: {
132
- type: "ngram",
133
- min_gram: 1,
134
- max_gram: 50
135
- },
136
- searchkick_stemmer: {
137
- # use stemmer if language is lowercase, snowball otherwise
138
- type: language == language.to_s.downcase ? "stemmer" : "snowball",
139
- language: language || "English"
140
- }
141
- },
142
- char_filter: {
143
- # https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
144
- # &_to_and
145
- ampersand: {
146
- type: "mapping",
147
- mappings: ["&=> and "]
148
- }
149
- }
150
- }
151
- }
18
+ settings = generate_settings.deep_symbolize_keys.deep_merge(custom_settings)
19
+ mappings = generate_mappings.deep_symbolize_keys.deep_merge(custom_mappings)
20
+ end
21
+
22
+ set_deep_paging(settings) if options[:deep_paging] || options[:max_result_window]
23
+
24
+ {
25
+ settings: settings,
26
+ mappings: mappings
27
+ }
28
+ end
152
29
 
153
- stem = options[:stem]
30
+ def generate_settings
31
+ language = options[:language]
32
+ language = language.call if language.respond_to?(:call)
154
33
 
155
- case language
156
- when "chinese"
157
- settings[:analysis][:analyzer].merge!(
34
+ settings = {
35
+ analysis: {
36
+ analyzer: {
37
+ searchkick_keyword: {
38
+ type: "custom",
39
+ tokenizer: "keyword",
40
+ filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
41
+ },
158
42
  default_analyzer => {
159
- type: "ik_smart"
43
+ type: "custom",
44
+ # character filters -> tokenizer -> token filters
45
+ # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
46
+ char_filter: ["ampersand"],
47
+ tokenizer: "standard",
48
+ # synonym should come last, after stemming and shingle
49
+ # shingle must come before searchkick_stemmer
50
+ filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
160
51
  },
161
52
  searchkick_search: {
162
- type: "ik_smart"
53
+ type: "custom",
54
+ char_filter: ["ampersand"],
55
+ tokenizer: "standard",
56
+ filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
163
57
  },
164
58
  searchkick_search2: {
165
- type: "ik_max_word"
166
- }
167
- )
168
-
169
- stem = false
170
- when "chinese2", "smartcn"
171
- settings[:analysis][:analyzer].merge!(
172
- default_analyzer => {
173
- type: "smartcn"
59
+ type: "custom",
60
+ char_filter: ["ampersand"],
61
+ tokenizer: "standard",
62
+ filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
174
63
  },
175
- searchkick_search: {
176
- type: "smartcn"
64
+ # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
65
+ searchkick_autocomplete_search: {
66
+ type: "custom",
67
+ tokenizer: "keyword",
68
+ filter: ["lowercase", "asciifolding"]
177
69
  },
178
- searchkick_search2: {
179
- type: "smartcn"
180
- }
181
- )
182
-
183
- stem = false
184
- when "japanese"
185
- settings[:analysis][:analyzer].merge!(
186
- default_analyzer => {
187
- type: "kuromoji"
70
+ searchkick_word_search: {
71
+ type: "custom",
72
+ tokenizer: "standard",
73
+ filter: ["lowercase", "asciifolding"]
188
74
  },
189
- searchkick_search: {
190
- type: "kuromoji"
75
+ searchkick_suggest_index: {
76
+ type: "custom",
77
+ tokenizer: "standard",
78
+ filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
191
79
  },
192
- searchkick_search2: {
193
- type: "kuromoji"
194
- }
195
- )
196
-
197
- stem = false
198
- when "korean"
199
- settings[:analysis][:analyzer].merge!(
200
- default_analyzer => {
201
- type: "openkoreantext-analyzer"
80
+ searchkick_text_start_index: {
81
+ type: "custom",
82
+ tokenizer: "keyword",
83
+ filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
202
84
  },
203
- searchkick_search: {
204
- type: "openkoreantext-analyzer"
85
+ searchkick_text_middle_index: {
86
+ type: "custom",
87
+ tokenizer: "keyword",
88
+ filter: ["lowercase", "asciifolding", "searchkick_ngram"]
205
89
  },
206
- searchkick_search2: {
207
- type: "openkoreantext-analyzer"
208
- }
209
- )
210
-
211
- stem = false
212
- when "korean2"
213
- settings[:analysis][:analyzer].merge!(
214
- default_analyzer => {
215
- type: "nori"
90
+ searchkick_text_end_index: {
91
+ type: "custom",
92
+ tokenizer: "keyword",
93
+ filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
216
94
  },
217
- searchkick_search: {
218
- type: "nori"
95
+ searchkick_word_start_index: {
96
+ type: "custom",
97
+ tokenizer: "standard",
98
+ filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
219
99
  },
220
- searchkick_search2: {
221
- type: "nori"
100
+ searchkick_word_middle_index: {
101
+ type: "custom",
102
+ tokenizer: "standard",
103
+ filter: ["lowercase", "asciifolding", "searchkick_ngram"]
104
+ },
105
+ searchkick_word_end_index: {
106
+ type: "custom",
107
+ tokenizer: "standard",
108
+ filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
222
109
  }
223
- )
224
-
225
- stem = false
226
- when "vietnamese"
227
- settings[:analysis][:analyzer].merge!(
228
- default_analyzer => {
229
- type: "vi_analyzer"
110
+ },
111
+ filter: {
112
+ searchkick_index_shingle: {
113
+ type: "shingle",
114
+ token_separator: ""
230
115
  },
231
- searchkick_search: {
232
- type: "vi_analyzer"
116
+ # lucky find https://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
117
+ searchkick_search_shingle: {
118
+ type: "shingle",
119
+ token_separator: "",
120
+ output_unigrams: false,
121
+ output_unigrams_if_no_shingles: true
233
122
  },
234
- searchkick_search2: {
235
- type: "vi_analyzer"
236
- }
237
- )
238
-
239
- stem = false
240
- when "polish", "ukrainian"
241
- settings[:analysis][:analyzer].merge!(
242
- default_analyzer => {
243
- type: language
123
+ searchkick_suggest_shingle: {
124
+ type: "shingle",
125
+ max_shingle_size: 5
244
126
  },
245
- searchkick_search: {
246
- type: language
127
+ searchkick_edge_ngram: {
128
+ type: "edge_ngram",
129
+ min_gram: 1,
130
+ max_gram: 50
247
131
  },
248
- searchkick_search2: {
249
- type: language
132
+ searchkick_ngram: {
133
+ type: "ngram",
134
+ min_gram: 1,
135
+ max_gram: 50
136
+ },
137
+ searchkick_stemmer: {
138
+ # use stemmer if language is lowercase, snowball otherwise
139
+ type: language == language.to_s.downcase ? "stemmer" : "snowball",
140
+ language: language || "English"
141
+ }
142
+ },
143
+ char_filter: {
144
+ # https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
145
+ # &_to_and
146
+ ampersand: {
147
+ type: "mapping",
148
+ mappings: ["&=> and "]
250
149
  }
251
- )
150
+ }
151
+ }
152
+ }
252
153
 
253
- stem = false
254
- end
154
+ raise ArgumentError, "Can't pass both language and stemmer" if options[:stemmer] && language
155
+ update_language(settings, language)
156
+ update_stemming(settings)
255
157
 
256
- if Searchkick.env == "test"
257
- settings[:number_of_shards] = 1
258
- settings[:number_of_replicas] = 0
259
- end
158
+ if Searchkick.env == "test"
159
+ settings[:number_of_shards] = 1
160
+ settings[:number_of_replicas] = 0
161
+ end
260
162
 
261
- if options[:similarity]
262
- settings[:similarity] = {default: {type: options[:similarity]}}
263
- end
163
+ if options[:similarity]
164
+ settings[:similarity] = {default: {type: options[:similarity]}}
165
+ end
264
166
 
265
- unless below62
266
- settings[:index] = {
267
- max_ngram_diff: 49,
268
- max_shingle_diff: 4
269
- }
270
- end
167
+ settings[:index] = {
168
+ max_ngram_diff: 49,
169
+ max_shingle_diff: 4
170
+ }
271
171
 
272
- if options[:case_sensitive]
273
- settings[:analysis][:analyzer].each do |_, analyzer|
274
- analyzer[:filter].delete("lowercase")
275
- end
172
+ if options[:case_sensitive]
173
+ settings[:analysis][:analyzer].each do |_, analyzer|
174
+ analyzer[:filter].delete("lowercase")
276
175
  end
176
+ end
277
177
 
278
- if stem == false
279
- settings[:analysis][:filter].delete(:searchkick_stemmer)
280
- settings[:analysis][:analyzer].each do |_, analyzer|
281
- analyzer[:filter].delete("searchkick_stemmer") if analyzer[:filter]
282
- end
178
+ add_synonyms(settings)
179
+ add_search_synonyms(settings)
180
+
181
+ if options[:special_characters] == false
182
+ settings[:analysis][:analyzer].each_value do |analyzer_settings|
183
+ analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
283
184
  end
185
+ end
284
186
 
285
- settings = settings.symbolize_keys.deep_merge((options[:settings] || {}).symbolize_keys)
187
+ settings
188
+ end
286
189
 
287
- # synonyms
288
- synonyms = options[:synonyms] || []
289
- synonyms = synonyms.call if synonyms.respond_to?(:call)
290
- if synonyms.any?
291
- settings[:analysis][:filter][:searchkick_synonym] = {
292
- type: "synonym",
293
- # only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently
294
- synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)
190
+ def update_language(settings, language)
191
+ case language
192
+ when "chinese"
193
+ settings[:analysis][:analyzer].merge!(
194
+ default_analyzer => {
195
+ type: "ik_smart"
196
+ },
197
+ searchkick_search: {
198
+ type: "ik_smart"
199
+ },
200
+ searchkick_search2: {
201
+ type: "ik_max_word"
295
202
  }
296
- # choosing a place for the synonym filter when stemming is not easy
297
- # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
298
- # TODO use a snowball stemmer on synonyms when creating the token filter
299
-
300
- # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
301
- # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
302
- # - Only apply the synonym expansion at index time
303
- # - Don't have the synonym filter applied search
304
- # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
305
- settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")
306
-
307
- %w(word_start word_middle word_end).each do |type|
308
- settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
309
- end
203
+ )
204
+ when "chinese2", "smartcn"
205
+ settings[:analysis][:analyzer].merge!(
206
+ default_analyzer => {
207
+ type: "smartcn"
208
+ },
209
+ searchkick_search: {
210
+ type: "smartcn"
211
+ },
212
+ searchkick_search2: {
213
+ type: "smartcn"
214
+ }
215
+ )
216
+ when "japanese", "japanese2"
217
+ analyzer = {
218
+ type: "custom",
219
+ tokenizer: "kuromoji_tokenizer",
220
+ filter: [
221
+ "kuromoji_baseform",
222
+ "kuromoji_part_of_speech",
223
+ "cjk_width",
224
+ "ja_stop",
225
+ "searchkick_stemmer",
226
+ "lowercase"
227
+ ]
228
+ }
229
+ settings[:analysis][:analyzer].merge!(
230
+ default_analyzer => analyzer.deep_dup,
231
+ searchkick_search: analyzer.deep_dup,
232
+ searchkick_search2: analyzer.deep_dup
233
+ )
234
+ settings[:analysis][:filter][:searchkick_stemmer] = {
235
+ type: "kuromoji_stemmer"
236
+ }
237
+ when "korean"
238
+ settings[:analysis][:analyzer].merge!(
239
+ default_analyzer => {
240
+ type: "openkoreantext-analyzer"
241
+ },
242
+ searchkick_search: {
243
+ type: "openkoreantext-analyzer"
244
+ },
245
+ searchkick_search2: {
246
+ type: "openkoreantext-analyzer"
247
+ }
248
+ )
249
+ when "korean2"
250
+ settings[:analysis][:analyzer].merge!(
251
+ default_analyzer => {
252
+ type: "nori"
253
+ },
254
+ searchkick_search: {
255
+ type: "nori"
256
+ },
257
+ searchkick_search2: {
258
+ type: "nori"
259
+ }
260
+ )
261
+ when "vietnamese"
262
+ settings[:analysis][:analyzer].merge!(
263
+ default_analyzer => {
264
+ type: "vi_analyzer"
265
+ },
266
+ searchkick_search: {
267
+ type: "vi_analyzer"
268
+ },
269
+ searchkick_search2: {
270
+ type: "vi_analyzer"
271
+ }
272
+ )
273
+ when "polish", "ukrainian"
274
+ settings[:analysis][:analyzer].merge!(
275
+ default_analyzer => {
276
+ type: language
277
+ },
278
+ searchkick_search: {
279
+ type: language
280
+ },
281
+ searchkick_search2: {
282
+ type: language
283
+ }
284
+ )
285
+ end
286
+ end
287
+
288
+ def update_stemming(settings)
289
+ if options[:stemmer]
290
+ stemmer = options[:stemmer]
291
+ # could also support snowball and stemmer
292
+ case stemmer[:type]
293
+ when "hunspell"
294
+ # supports all token filter options
295
+ settings[:analysis][:filter][:searchkick_stemmer] = stemmer
296
+ else
297
+ raise ArgumentError, "Unknown stemmer: #{stemmer[:type]}"
310
298
  end
299
+ end
311
300
 
312
- search_synonyms = options[:search_synonyms] || []
313
- search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call)
314
- if search_synonyms.is_a?(String) || search_synonyms.any?
315
- if search_synonyms.is_a?(String)
316
- synonym_graph = {
317
- type: "synonym_graph",
318
- synonyms_path: search_synonyms
319
- }
320
- synonym_graph[:updateable] = true unless below73
321
- else
322
- synonym_graph = {
323
- type: "synonym_graph",
324
- # TODO confirm this is correct
325
- synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
326
- }
327
- end
328
- settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
301
+ stem = options[:stem]
329
302
 
330
- [:searchkick_search2, :searchkick_word_search].each do |analyzer|
331
- settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
332
- end
333
- end
303
+ # language analyzer used
304
+ stem = false if settings[:analysis][:analyzer][default_analyzer][:type] != "custom"
334
305
 
335
- if options[:wordnet]
336
- settings[:analysis][:filter][:searchkick_wordnet] = {
337
- type: "synonym",
338
- format: "wordnet",
339
- synonyms_path: Searchkick.wordnet_path
340
- }
306
+ if stem == false
307
+ settings[:analysis][:filter].delete(:searchkick_stemmer)
308
+ settings[:analysis][:analyzer].each do |_, analyzer|
309
+ analyzer[:filter].delete("searchkick_stemmer") if analyzer[:filter]
310
+ end
311
+ end
341
312
 
342
- settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
343
- settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
313
+ if options[:stemmer_override]
314
+ stemmer_override = {
315
+ type: "stemmer_override"
316
+ }
317
+ if options[:stemmer_override].is_a?(String)
318
+ stemmer_override[:rules_path] = options[:stemmer_override]
319
+ else
320
+ stemmer_override[:rules] = options[:stemmer_override]
321
+ end
322
+ settings[:analysis][:filter][:searchkick_stemmer_override] = stemmer_override
344
323
 
345
- %w(word_start word_middle word_end).each do |type|
346
- settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
347
- end
324
+ settings[:analysis][:analyzer].each do |_, analyzer|
325
+ stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
326
+ analyzer[:filter].insert(stemmer_index, "searchkick_stemmer_override") if stemmer_index
348
327
  end
328
+ end
349
329
 
350
- if options[:special_characters] == false
351
- settings[:analysis][:analyzer].each_value do |analyzer_settings|
352
- analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
353
- end
330
+ if options[:stem_exclusion]
331
+ settings[:analysis][:filter][:searchkick_stem_exclusion] = {
332
+ type: "keyword_marker",
333
+ keywords: options[:stem_exclusion]
334
+ }
335
+
336
+ settings[:analysis][:analyzer].each do |_, analyzer|
337
+ stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
338
+ analyzer[:filter].insert(stemmer_index, "searchkick_stem_exclusion") if stemmer_index
354
339
  end
340
+ end
341
+ end
355
342
 
356
- mapping = {}
343
+ def generate_mappings
344
+ mapping = {}
357
345
 
358
- # conversions
359
- Array(options[:conversions]).each do |conversions_field|
360
- mapping[conversions_field] = {
361
- type: "nested",
362
- properties: {
363
- query: {type: default_type, analyzer: "searchkick_keyword"},
364
- count: {type: "integer"}
365
- }
346
+ keyword_mapping = {type: "keyword"}
347
+ keyword_mapping[:ignore_above] = options[:ignore_above] || 30000
348
+
349
+ # conversions
350
+ Array(options[:conversions]).each do |conversions_field|
351
+ mapping[conversions_field] = {
352
+ type: "nested",
353
+ properties: {
354
+ query: {type: default_type, analyzer: "searchkick_keyword"},
355
+ count: {type: "integer"}
366
356
  }
367
- end
357
+ }
358
+ end
368
359
 
369
- mapping_options = Hash[
370
- [:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
371
- .map { |type| [type, (options[type] || []).map(&:to_s)] }
372
- ]
360
+ mapping_options =
361
+ [:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
362
+ .to_h { |type| [type, (options[type] || []).map(&:to_s)] }
373
363
 
374
- word = options[:word] != false && (!options[:match] || options[:match] == :word)
364
+ word = options[:word] != false && (!options[:match] || options[:match] == :word)
375
365
 
376
- mapping_options[:searchable].delete("_all")
366
+ mapping_options[:searchable].delete("_all")
377
367
 
378
- analyzed_field_options = {type: default_type, index: true, analyzer: default_analyzer}
368
+ analyzed_field_options = {type: default_type, index: true, analyzer: default_analyzer.to_s}
379
369
 
380
- mapping_options.values.flatten.uniq.each do |field|
381
- fields = {}
370
+ mapping_options.values.flatten.uniq.each do |field|
371
+ fields = {}
382
372
 
383
- if options.key?(:filterable) && !mapping_options[:filterable].include?(field)
384
- fields[field] = {type: default_type, index: false}
385
- else
386
- fields[field] = keyword_mapping
387
- end
373
+ if options.key?(:filterable) && !mapping_options[:filterable].include?(field)
374
+ fields[field] = {type: default_type, index: false}
375
+ else
376
+ fields[field] = keyword_mapping
377
+ end
388
378
 
389
- if !options[:searchable] || mapping_options[:searchable].include?(field)
390
- if word
391
- fields[:analyzed] = analyzed_field_options
379
+ if !options[:searchable] || mapping_options[:searchable].include?(field)
380
+ if word
381
+ fields[:analyzed] = analyzed_field_options
392
382
 
393
- if mapping_options[:highlight].include?(field)
394
- fields[:analyzed][:term_vector] = "with_positions_offsets"
395
- end
383
+ if mapping_options[:highlight].include?(field)
384
+ fields[:analyzed][:term_vector] = "with_positions_offsets"
396
385
  end
386
+ end
397
387
 
398
- mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f|
399
- if options[:match] == type || f.include?(field)
400
- fields[type] = {type: default_type, index: true, analyzer: "searchkick_#{type}_index"}
401
- end
388
+ mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f|
389
+ if options[:match] == type || f.include?(field)
390
+ fields[type] = {type: default_type, index: true, analyzer: "searchkick_#{type}_index"}
402
391
  end
403
392
  end
404
-
405
- mapping[field] = fields[field].merge(fields: fields.except(field))
406
393
  end
407
394
 
408
- (options[:locations] || []).map(&:to_s).each do |field|
409
- mapping[field] = {
410
- type: "geo_point"
411
- }
412
- end
395
+ mapping[field] = fields[field].merge(fields: fields.except(field))
396
+ end
397
+
398
+ (options[:locations] || []).map(&:to_s).each do |field|
399
+ mapping[field] = {
400
+ type: "geo_point"
401
+ }
402
+ end
403
+
404
+ options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array)
405
+ (options[:geo_shape] || {}).each do |field, shape_options|
406
+ mapping[field] = shape_options.merge(type: "geo_shape")
407
+ end
413
408
 
414
- options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array)
415
- (options[:geo_shape] || {}).each do |field, shape_options|
416
- mapping[field] = shape_options.merge(type: "geo_shape")
409
+ if options[:inheritance]
410
+ mapping[:type] = keyword_mapping
411
+ end
412
+
413
+ routing = {}
414
+ if options[:routing]
415
+ routing = {required: true}
416
+ unless options[:routing] == true
417
+ routing[:path] = options[:routing].to_s
417
418
  end
419
+ end
418
420
 
419
- if options[:inheritance]
420
- mapping[:type] = keyword_mapping
421
+ dynamic_fields = {
422
+ # analyzed field must be the default field for include_in_all
423
+ # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
424
+ # however, we can include the not_analyzed field in _all
425
+ # and the _all index analyzer will take care of it
426
+ "{name}" => keyword_mapping
427
+ }
428
+
429
+ if options.key?(:filterable)
430
+ dynamic_fields["{name}"] = {type: default_type, index: false}
431
+ end
432
+
433
+ unless options[:searchable]
434
+ if options[:match] && options[:match] != :word
435
+ dynamic_fields[options[:match]] = {type: default_type, index: true, analyzer: "searchkick_#{options[:match]}_index"}
421
436
  end
422
437
 
423
- routing = {}
424
- if options[:routing]
425
- routing = {required: true}
426
- unless options[:routing] == true
427
- routing[:path] = options[:routing].to_s
428
- end
438
+ if word
439
+ dynamic_fields[:analyzed] = analyzed_field_options
429
440
  end
441
+ end
442
+
443
+ # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
444
+ multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
445
+
446
+ mappings = {
447
+ properties: mapping,
448
+ _routing: routing,
449
+ # https://gist.github.com/kimchy/2898285
450
+ dynamic_templates: [
451
+ {
452
+ string_template: {
453
+ match: "*",
454
+ match_mapping_type: "string",
455
+ mapping: multi_field
456
+ }
457
+ }
458
+ ]
459
+ }
460
+
461
+ mappings
462
+ end
430
463
 
431
- dynamic_fields = {
432
- # analyzed field must be the default field for include_in_all
433
- # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
434
- # however, we can include the not_analyzed field in _all
435
- # and the _all index analyzer will take care of it
436
- "{name}" => keyword_mapping
464
+ def add_synonyms(settings)
465
+ synonyms = options[:synonyms] || []
466
+ synonyms = synonyms.call if synonyms.respond_to?(:call)
467
+ if synonyms.any?
468
+ settings[:analysis][:filter][:searchkick_synonym] = {
469
+ type: "synonym",
470
+ # only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently
471
+ synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)
437
472
  }
473
+ # choosing a place for the synonym filter when stemming is not easy
474
+ # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
475
+ # TODO use a snowball stemmer on synonyms when creating the token filter
476
+
477
+ # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
478
+ # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
479
+ # - Only apply the synonym expansion at index time
480
+ # - Don't have the synonym filter applied search
481
+ # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
482
+ settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")
483
+
484
+ %w(word_start word_middle word_end).each do |type|
485
+ settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
486
+ end
487
+ end
488
+ end
438
489
 
439
- if options.key?(:filterable)
440
- dynamic_fields["{name}"] = {type: default_type, index: false}
490
+ def add_search_synonyms(settings)
491
+ search_synonyms = options[:search_synonyms] || []
492
+ search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call)
493
+ if search_synonyms.is_a?(String) || search_synonyms.any?
494
+ if search_synonyms.is_a?(String)
495
+ synonym_graph = {
496
+ type: "synonym_graph",
497
+ synonyms_path: search_synonyms
498
+ }
499
+ synonym_graph[:updateable] = true unless below73?
500
+ else
501
+ synonym_graph = {
502
+ type: "synonym_graph",
503
+ # TODO confirm this is correct
504
+ synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
505
+ }
441
506
  end
507
+ settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
442
508
 
443
- unless options[:searchable]
444
- if options[:match] && options[:match] != :word
445
- dynamic_fields[options[:match]] = {type: default_type, index: true, analyzer: "searchkick_#{options[:match]}_index"}
509
+ if ["japanese", "japanese2"].include?(options[:language])
510
+ [:searchkick_search, :searchkick_search2].each do |analyzer|
511
+ settings[:analysis][:analyzer][analyzer][:filter].insert(4, "searchkick_synonym_graph")
446
512
  end
513
+ else
514
+ [:searchkick_search2, :searchkick_word_search].each do |analyzer|
515
+ unless settings[:analysis][:analyzer][analyzer].key?(:filter)
516
+ raise Error, "Search synonyms are not supported yet for language"
517
+ end
447
518
 
448
- if word
449
- dynamic_fields[:analyzed] = analyzed_field_options
519
+ settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
450
520
  end
451
521
  end
522
+ end
523
+ end
452
524
 
453
- # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
454
- multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
455
-
456
- mappings = {
457
- properties: mapping,
458
- _routing: routing,
459
- # https://gist.github.com/kimchy/2898285
460
- dynamic_templates: [
461
- {
462
- string_template: {
463
- match: "*",
464
- match_mapping_type: "string",
465
- mapping: multi_field
466
- }
467
- }
468
- ]
469
- }
470
-
471
- if below70
472
- mappings = {index_type => mappings}
473
- end
474
-
475
- mappings = mappings.symbolize_keys.deep_merge(custom_mapping.symbolize_keys)
525
+ def set_deep_paging(settings)
526
+ if !settings.dig(:index, :max_result_window) && !settings[:"index.max_result_window"]
527
+ settings[:index] ||= {}
528
+ settings[:index][:max_result_window] = options[:max_result_window] || 1_000_000_000
476
529
  end
530
+ end
477
531
 
478
- if options[:deep_paging]
479
- if !settings.dig(:index, :max_result_window) && !settings[:"index.max_result_window"]
480
- settings[:index] ||= {}
481
- settings[:index][:max_result_window] = 1_000_000_000
482
- end
532
+ def index_type
533
+ @index_type ||= begin
534
+ index_type = options[:_type]
535
+ index_type = index_type.call if index_type.respond_to?(:call)
536
+ index_type
483
537
  end
538
+ end
484
539
 
485
- {
486
- settings: settings,
487
- mappings: mappings
488
- }
540
+ def default_type
541
+ "text"
542
+ end
543
+
544
+ def default_analyzer
545
+ :searchkick_index
546
+ end
547
+
548
+ def below73?
549
+ Searchkick.server_below?("7.3.0")
489
550
  end
490
551
  end
491
552
  end