searchkick 4.0.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,429 +1,552 @@
1
1
  module Searchkick
2
- module IndexOptions
2
+ class IndexOptions
3
+ attr_reader :options
4
+
5
+ def initialize(index)
6
+ @options = index.options
7
+ end
8
+
3
9
  def index_options
4
- options = @options
5
- language = options[:language]
6
- language = language.call if language.respond_to?(:call)
10
+ # mortal symbols are garbage collected in Ruby 2.2+
11
+ custom_settings = (options[:settings] || {}).deep_symbolize_keys
12
+ custom_mappings = (options[:mappings] || {}).deep_symbolize_keys
7
13
 
8
14
  if options[:mappings] && !options[:merge_mappings]
9
- settings = options[:settings] || {}
10
- mappings = options[:mappings]
15
+ settings = custom_settings
16
+ mappings = custom_mappings
11
17
  else
12
- below62 = Searchkick.server_below?("6.2.0")
13
- below70 = Searchkick.server_below?("7.0.0")
14
-
15
- default_type = "text"
16
- default_analyzer = :searchkick_index
17
- keyword_mapping = {type: "keyword"}
18
-
19
- all = options.key?(:_all) ? options[:_all] : false
20
- index_true_value = true
21
- index_false_value = false
22
-
23
- keyword_mapping[:ignore_above] = options[:ignore_above] || 30000
24
-
25
- settings = {
26
- analysis: {
27
- analyzer: {
28
- searchkick_keyword: {
29
- type: "custom",
30
- tokenizer: "keyword",
31
- filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
32
- },
33
- default_analyzer => {
34
- type: "custom",
35
- # character filters -> tokenizer -> token filters
36
- # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
37
- char_filter: ["ampersand"],
38
- tokenizer: "standard",
39
- # synonym should come last, after stemming and shingle
40
- # shingle must come before searchkick_stemmer
41
- filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
42
- },
43
- searchkick_search: {
44
- type: "custom",
45
- char_filter: ["ampersand"],
46
- tokenizer: "standard",
47
- filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
48
- },
49
- searchkick_search2: {
50
- type: "custom",
51
- char_filter: ["ampersand"],
52
- tokenizer: "standard",
53
- filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
54
- },
55
- # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
56
- searchkick_autocomplete_search: {
57
- type: "custom",
58
- tokenizer: "keyword",
59
- filter: ["lowercase", "asciifolding"]
60
- },
61
- searchkick_word_search: {
62
- type: "custom",
63
- tokenizer: "standard",
64
- filter: ["lowercase", "asciifolding"]
65
- },
66
- searchkick_suggest_index: {
67
- type: "custom",
68
- tokenizer: "standard",
69
- filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
70
- },
71
- searchkick_text_start_index: {
72
- type: "custom",
73
- tokenizer: "keyword",
74
- filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
75
- },
76
- searchkick_text_middle_index: {
77
- type: "custom",
78
- tokenizer: "keyword",
79
- filter: ["lowercase", "asciifolding", "searchkick_ngram"]
80
- },
81
- searchkick_text_end_index: {
82
- type: "custom",
83
- tokenizer: "keyword",
84
- filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
85
- },
86
- searchkick_word_start_index: {
87
- type: "custom",
88
- tokenizer: "standard",
89
- filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
90
- },
91
- searchkick_word_middle_index: {
92
- type: "custom",
93
- tokenizer: "standard",
94
- filter: ["lowercase", "asciifolding", "searchkick_ngram"]
95
- },
96
- searchkick_word_end_index: {
97
- type: "custom",
98
- tokenizer: "standard",
99
- filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
100
- }
101
- },
102
- filter: {
103
- searchkick_index_shingle: {
104
- type: "shingle",
105
- token_separator: ""
106
- },
107
- # lucky find http://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
108
- searchkick_search_shingle: {
109
- type: "shingle",
110
- token_separator: "",
111
- output_unigrams: false,
112
- output_unigrams_if_no_shingles: true
113
- },
114
- searchkick_suggest_shingle: {
115
- type: "shingle",
116
- max_shingle_size: 5
117
- },
118
- searchkick_edge_ngram: {
119
- type: "edgeNGram",
120
- min_gram: 1,
121
- max_gram: 50
122
- },
123
- searchkick_ngram: {
124
- type: "nGram",
125
- min_gram: 1,
126
- max_gram: 50
127
- },
128
- searchkick_stemmer: {
129
- # use stemmer if language is lowercase, snowball otherwise
130
- type: language == language.to_s.downcase ? "stemmer" : "snowball",
131
- language: language || "English"
132
- }
133
- },
134
- char_filter: {
135
- # https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
136
- # &_to_and
137
- ampersand: {
138
- type: "mapping",
139
- mappings: ["&=> and "]
140
- }
141
- }
142
- }
143
- }
18
+ settings = generate_settings.deep_symbolize_keys.deep_merge(custom_settings)
19
+ mappings = generate_mappings.deep_symbolize_keys.deep_merge(custom_mappings)
20
+ end
21
+
22
+ set_deep_paging(settings) if options[:deep_paging]
23
+
24
+ {
25
+ settings: settings,
26
+ mappings: mappings
27
+ }
28
+ end
144
29
 
145
- stem = options[:stem]
30
+ def generate_settings
31
+ language = options[:language]
32
+ language = language.call if language.respond_to?(:call)
146
33
 
147
- case language
148
- when "chinese"
149
- settings[:analysis][:analyzer].merge!(
34
+ settings = {
35
+ analysis: {
36
+ analyzer: {
37
+ searchkick_keyword: {
38
+ type: "custom",
39
+ tokenizer: "keyword",
40
+ filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
41
+ },
150
42
  default_analyzer => {
151
- type: "ik_smart"
43
+ type: "custom",
44
+ # character filters -> tokenizer -> token filters
45
+ # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
46
+ char_filter: ["ampersand"],
47
+ tokenizer: "standard",
48
+ # synonym should come last, after stemming and shingle
49
+ # shingle must come before searchkick_stemmer
50
+ filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
152
51
  },
153
52
  searchkick_search: {
154
- type: "ik_smart"
53
+ type: "custom",
54
+ char_filter: ["ampersand"],
55
+ tokenizer: "standard",
56
+ filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
155
57
  },
156
58
  searchkick_search2: {
157
- type: "ik_max_word"
158
- }
159
- )
160
-
161
- stem = false
162
- when "japanese"
163
- settings[:analysis][:analyzer].merge!(
164
- default_analyzer => {
165
- type: "kuromoji"
59
+ type: "custom",
60
+ char_filter: ["ampersand"],
61
+ tokenizer: "standard",
62
+ filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
166
63
  },
167
- searchkick_search: {
168
- type: "kuromoji"
64
+ # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
65
+ searchkick_autocomplete_search: {
66
+ type: "custom",
67
+ tokenizer: "keyword",
68
+ filter: ["lowercase", "asciifolding"]
169
69
  },
170
- searchkick_search2: {
171
- type: "kuromoji"
172
- }
173
- )
174
-
175
- stem = false
176
- when "korean"
177
- settings[:analysis][:analyzer].merge!(
178
- default_analyzer => {
179
- type: "openkoreantext-analyzer"
70
+ searchkick_word_search: {
71
+ type: "custom",
72
+ tokenizer: "standard",
73
+ filter: ["lowercase", "asciifolding"]
180
74
  },
181
- searchkick_search: {
182
- type: "openkoreantext-analyzer"
75
+ searchkick_suggest_index: {
76
+ type: "custom",
77
+ tokenizer: "standard",
78
+ filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
183
79
  },
184
- searchkick_search2: {
185
- type: "openkoreantext-analyzer"
186
- }
187
- )
188
-
189
- stem = false
190
- when "vietnamese"
191
- settings[:analysis][:analyzer].merge!(
192
- default_analyzer => {
193
- type: "vi_analyzer"
80
+ searchkick_text_start_index: {
81
+ type: "custom",
82
+ tokenizer: "keyword",
83
+ filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
194
84
  },
195
- searchkick_search: {
196
- type: "vi_analyzer"
85
+ searchkick_text_middle_index: {
86
+ type: "custom",
87
+ tokenizer: "keyword",
88
+ filter: ["lowercase", "asciifolding", "searchkick_ngram"]
197
89
  },
198
- searchkick_search2: {
199
- type: "vi_analyzer"
90
+ searchkick_text_end_index: {
91
+ type: "custom",
92
+ tokenizer: "keyword",
93
+ filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
94
+ },
95
+ searchkick_word_start_index: {
96
+ type: "custom",
97
+ tokenizer: "standard",
98
+ filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
99
+ },
100
+ searchkick_word_middle_index: {
101
+ type: "custom",
102
+ tokenizer: "standard",
103
+ filter: ["lowercase", "asciifolding", "searchkick_ngram"]
104
+ },
105
+ searchkick_word_end_index: {
106
+ type: "custom",
107
+ tokenizer: "standard",
108
+ filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
200
109
  }
201
- )
202
-
203
- stem = false
204
- when "polish", "ukrainian", "smartcn"
205
- settings[:analysis][:analyzer].merge!(
206
- default_analyzer => {
207
- type: language
110
+ },
111
+ filter: {
112
+ searchkick_index_shingle: {
113
+ type: "shingle",
114
+ token_separator: ""
208
115
  },
209
- searchkick_search: {
210
- type: language
116
+ # lucky find https://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
117
+ searchkick_search_shingle: {
118
+ type: "shingle",
119
+ token_separator: "",
120
+ output_unigrams: false,
121
+ output_unigrams_if_no_shingles: true
211
122
  },
212
- searchkick_search2: {
213
- type: language
123
+ searchkick_suggest_shingle: {
124
+ type: "shingle",
125
+ max_shingle_size: 5
126
+ },
127
+ searchkick_edge_ngram: {
128
+ type: "edge_ngram",
129
+ min_gram: 1,
130
+ max_gram: 50
131
+ },
132
+ searchkick_ngram: {
133
+ type: "ngram",
134
+ min_gram: 1,
135
+ max_gram: 50
136
+ },
137
+ searchkick_stemmer: {
138
+ # use stemmer if language is lowercase, snowball otherwise
139
+ type: language == language.to_s.downcase ? "stemmer" : "snowball",
140
+ language: language || "English"
141
+ }
142
+ },
143
+ char_filter: {
144
+ # https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
145
+ # &_to_and
146
+ ampersand: {
147
+ type: "mapping",
148
+ mappings: ["&=> and "]
214
149
  }
215
- )
150
+ }
151
+ }
152
+ }
216
153
 
217
- stem = false
218
- end
154
+ raise ArgumentError, "Can't pass both language and stemmer" if options[:stemmer] && language
155
+ update_language(settings, language)
156
+ update_stemming(settings)
219
157
 
220
- if Searchkick.env == "test"
221
- settings[:number_of_shards] = 1
222
- settings[:number_of_replicas] = 0
223
- end
158
+ if Searchkick.env == "test"
159
+ settings[:number_of_shards] = 1
160
+ settings[:number_of_replicas] = 0
161
+ end
224
162
 
225
- if options[:similarity]
226
- settings[:similarity] = {default: {type: options[:similarity]}}
227
- end
163
+ if options[:similarity]
164
+ settings[:similarity] = {default: {type: options[:similarity]}}
165
+ end
228
166
 
229
- unless below62
230
- settings[:index] = {
231
- max_ngram_diff: 49,
232
- max_shingle_diff: 4
233
- }
167
+ settings[:index] = {
168
+ max_ngram_diff: 49,
169
+ max_shingle_diff: 4
170
+ }
171
+
172
+ if options[:case_sensitive]
173
+ settings[:analysis][:analyzer].each do |_, analyzer|
174
+ analyzer[:filter].delete("lowercase")
234
175
  end
176
+ end
235
177
 
236
- if options[:case_sensitive]
237
- settings[:analysis][:analyzer].each do |_, analyzer|
238
- analyzer[:filter].delete("lowercase")
239
- end
178
+ add_synonyms(settings)
179
+ add_search_synonyms(settings)
180
+
181
+ if options[:special_characters] == false
182
+ settings[:analysis][:analyzer].each_value do |analyzer_settings|
183
+ analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
240
184
  end
185
+ end
241
186
 
242
- if stem == false
243
- settings[:analysis][:filter].delete(:searchkick_stemmer)
244
- settings[:analysis][:analyzer].each do |_, analyzer|
245
- analyzer[:filter].delete("searchkick_stemmer") if analyzer[:filter]
246
- end
187
+ settings
188
+ end
189
+
190
+ def update_language(settings, language)
191
+ case language
192
+ when "chinese"
193
+ settings[:analysis][:analyzer].merge!(
194
+ default_analyzer => {
195
+ type: "ik_smart"
196
+ },
197
+ searchkick_search: {
198
+ type: "ik_smart"
199
+ },
200
+ searchkick_search2: {
201
+ type: "ik_max_word"
202
+ }
203
+ )
204
+ when "chinese2", "smartcn"
205
+ settings[:analysis][:analyzer].merge!(
206
+ default_analyzer => {
207
+ type: "smartcn"
208
+ },
209
+ searchkick_search: {
210
+ type: "smartcn"
211
+ },
212
+ searchkick_search2: {
213
+ type: "smartcn"
214
+ }
215
+ )
216
+ when "japanese", "japanese2"
217
+ analyzer = {
218
+ type: "custom",
219
+ tokenizer: "kuromoji_tokenizer",
220
+ filter: [
221
+ "kuromoji_baseform",
222
+ "kuromoji_part_of_speech",
223
+ "cjk_width",
224
+ "ja_stop",
225
+ "searchkick_stemmer",
226
+ "lowercase"
227
+ ]
228
+ }
229
+ settings[:analysis][:analyzer].merge!(
230
+ default_analyzer => analyzer.deep_dup,
231
+ searchkick_search: analyzer.deep_dup,
232
+ searchkick_search2: analyzer.deep_dup
233
+ )
234
+ settings[:analysis][:filter][:searchkick_stemmer] = {
235
+ type: "kuromoji_stemmer"
236
+ }
237
+ when "korean"
238
+ settings[:analysis][:analyzer].merge!(
239
+ default_analyzer => {
240
+ type: "openkoreantext-analyzer"
241
+ },
242
+ searchkick_search: {
243
+ type: "openkoreantext-analyzer"
244
+ },
245
+ searchkick_search2: {
246
+ type: "openkoreantext-analyzer"
247
+ }
248
+ )
249
+ when "korean2"
250
+ settings[:analysis][:analyzer].merge!(
251
+ default_analyzer => {
252
+ type: "nori"
253
+ },
254
+ searchkick_search: {
255
+ type: "nori"
256
+ },
257
+ searchkick_search2: {
258
+ type: "nori"
259
+ }
260
+ )
261
+ when "vietnamese"
262
+ settings[:analysis][:analyzer].merge!(
263
+ default_analyzer => {
264
+ type: "vi_analyzer"
265
+ },
266
+ searchkick_search: {
267
+ type: "vi_analyzer"
268
+ },
269
+ searchkick_search2: {
270
+ type: "vi_analyzer"
271
+ }
272
+ )
273
+ when "polish", "ukrainian"
274
+ settings[:analysis][:analyzer].merge!(
275
+ default_analyzer => {
276
+ type: language
277
+ },
278
+ searchkick_search: {
279
+ type: language
280
+ },
281
+ searchkick_search2: {
282
+ type: language
283
+ }
284
+ )
285
+ end
286
+ end
287
+
288
+ def update_stemming(settings)
289
+ if options[:stemmer]
290
+ stemmer = options[:stemmer]
291
+ # could also support snowball and stemmer
292
+ case stemmer[:type]
293
+ when "hunspell"
294
+ # supports all token filter options
295
+ settings[:analysis][:filter][:searchkick_stemmer] = stemmer
296
+ else
297
+ raise ArgumentError, "Unknown stemmer: #{stemmer[:type]}"
247
298
  end
299
+ end
248
300
 
249
- settings = settings.symbolize_keys.deep_merge((options[:settings] || {}).symbolize_keys)
301
+ stem = options[:stem]
250
302
 
251
- # synonyms
252
- synonyms = options[:synonyms] || []
303
+ # language analyzer used
304
+ stem = false if settings[:analysis][:analyzer][default_analyzer][:type] != "custom"
253
305
 
254
- synonyms = synonyms.call if synonyms.respond_to?(:call)
306
+ if stem == false
307
+ settings[:analysis][:filter].delete(:searchkick_stemmer)
308
+ settings[:analysis][:analyzer].each do |_, analyzer|
309
+ analyzer[:filter].delete("searchkick_stemmer") if analyzer[:filter]
310
+ end
311
+ end
255
312
 
256
- if synonyms.any?
257
- settings[:analysis][:filter][:searchkick_synonym] = {
258
- type: "synonym",
259
- # only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently
260
- synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)
261
- }
262
- # choosing a place for the synonym filter when stemming is not easy
263
- # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
264
- # TODO use a snowball stemmer on synonyms when creating the token filter
265
-
266
- # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
267
- # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
268
- # - Only apply the synonym expansion at index time
269
- # - Don't have the synonym filter applied search
270
- # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
271
- settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")
272
-
273
- %w(word_start word_middle word_end).each do |type|
274
- settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
275
- end
313
+ if options[:stemmer_override]
314
+ stemmer_override = {
315
+ type: "stemmer_override"
316
+ }
317
+ if options[:stemmer_override].is_a?(String)
318
+ stemmer_override[:rules_path] = options[:stemmer_override]
319
+ else
320
+ stemmer_override[:rules] = options[:stemmer_override]
276
321
  end
322
+ settings[:analysis][:filter][:searchkick_stemmer_override] = stemmer_override
277
323
 
278
- if options[:wordnet]
279
- settings[:analysis][:filter][:searchkick_wordnet] = {
280
- type: "synonym",
281
- format: "wordnet",
282
- synonyms_path: Searchkick.wordnet_path
283
- }
324
+ settings[:analysis][:analyzer].each do |_, analyzer|
325
+ stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
326
+ analyzer[:filter].insert(stemmer_index, "searchkick_stemmer_override") if stemmer_index
327
+ end
328
+ end
284
329
 
285
- settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
286
- settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
330
+ if options[:stem_exclusion]
331
+ settings[:analysis][:filter][:searchkick_stem_exclusion] = {
332
+ type: "keyword_marker",
333
+ keywords: options[:stem_exclusion]
334
+ }
287
335
 
288
- %w(word_start word_middle word_end).each do |type|
289
- settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
290
- end
336
+ settings[:analysis][:analyzer].each do |_, analyzer|
337
+ stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
338
+ analyzer[:filter].insert(stemmer_index, "searchkick_stem_exclusion") if stemmer_index
291
339
  end
340
+ end
341
+ end
292
342
 
293
- if options[:special_characters] == false
294
- settings[:analysis][:analyzer].each_value do |analyzer_settings|
295
- analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
296
- end
297
- end
343
+ def generate_mappings
344
+ mapping = {}
298
345
 
299
- mapping = {}
346
+ keyword_mapping = {type: "keyword"}
347
+ keyword_mapping[:ignore_above] = options[:ignore_above] || 30000
300
348
 
301
- # conversions
302
- Array(options[:conversions]).each do |conversions_field|
303
- mapping[conversions_field] = {
304
- type: "nested",
305
- properties: {
306
- query: {type: default_type, analyzer: "searchkick_keyword"},
307
- count: {type: "integer"}
308
- }
349
+ # conversions
350
+ Array(options[:conversions]).each do |conversions_field|
351
+ mapping[conversions_field] = {
352
+ type: "nested",
353
+ properties: {
354
+ query: {type: default_type, analyzer: "searchkick_keyword"},
355
+ count: {type: "integer"}
309
356
  }
310
- end
357
+ }
358
+ end
311
359
 
312
- mapping_options = Hash[
313
- [:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
314
- .map { |type| [type, (options[type] || []).map(&:to_s)] }
315
- ]
360
+ mapping_options =
361
+ [:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
362
+ .to_h { |type| [type, (options[type] || []).map(&:to_s)] }
316
363
 
317
- word = options[:word] != false && (!options[:match] || options[:match] == :word)
364
+ word = options[:word] != false && (!options[:match] || options[:match] == :word)
318
365
 
319
- mapping_options[:searchable].delete("_all")
366
+ mapping_options[:searchable].delete("_all")
320
367
 
321
- analyzed_field_options = {type: default_type, index: index_true_value, analyzer: default_analyzer}
368
+ analyzed_field_options = {type: default_type, index: true, analyzer: default_analyzer.to_s}
322
369
 
323
- mapping_options.values.flatten.uniq.each do |field|
324
- fields = {}
370
+ mapping_options.values.flatten.uniq.each do |field|
371
+ fields = {}
325
372
 
326
- if options.key?(:filterable) && !mapping_options[:filterable].include?(field)
327
- fields[field] = {type: default_type, index: index_false_value}
328
- else
329
- fields[field] = keyword_mapping
330
- end
373
+ if options.key?(:filterable) && !mapping_options[:filterable].include?(field)
374
+ fields[field] = {type: default_type, index: false}
375
+ else
376
+ fields[field] = keyword_mapping
377
+ end
331
378
 
332
- if !options[:searchable] || mapping_options[:searchable].include?(field)
333
- if word
334
- fields[:analyzed] = analyzed_field_options
379
+ if !options[:searchable] || mapping_options[:searchable].include?(field)
380
+ if word
381
+ fields[:analyzed] = analyzed_field_options
335
382
 
336
- if mapping_options[:highlight].include?(field)
337
- fields[:analyzed][:term_vector] = "with_positions_offsets"
338
- end
383
+ if mapping_options[:highlight].include?(field)
384
+ fields[:analyzed][:term_vector] = "with_positions_offsets"
339
385
  end
386
+ end
340
387
 
341
- mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f|
342
- if options[:match] == type || f.include?(field)
343
- fields[type] = {type: default_type, index: index_true_value, analyzer: "searchkick_#{type}_index"}
344
- end
388
+ mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f|
389
+ if options[:match] == type || f.include?(field)
390
+ fields[type] = {type: default_type, index: true, analyzer: "searchkick_#{type}_index"}
345
391
  end
346
392
  end
347
-
348
- mapping[field] = fields[field].merge(fields: fields.except(field))
349
393
  end
350
394
 
351
- (options[:locations] || []).map(&:to_s).each do |field|
352
- mapping[field] = {
353
- type: "geo_point"
354
- }
355
- end
395
+ mapping[field] = fields[field].merge(fields: fields.except(field))
396
+ end
356
397
 
357
- options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array)
358
- (options[:geo_shape] || {}).each do |field, shape_options|
359
- mapping[field] = shape_options.merge(type: "geo_shape")
398
+ (options[:locations] || []).map(&:to_s).each do |field|
399
+ mapping[field] = {
400
+ type: "geo_point"
401
+ }
402
+ end
403
+
404
+ options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array)
405
+ (options[:geo_shape] || {}).each do |field, shape_options|
406
+ mapping[field] = shape_options.merge(type: "geo_shape")
407
+ end
408
+
409
+ if options[:inheritance]
410
+ mapping[:type] = keyword_mapping
411
+ end
412
+
413
+ routing = {}
414
+ if options[:routing]
415
+ routing = {required: true}
416
+ unless options[:routing] == true
417
+ routing[:path] = options[:routing].to_s
360
418
  end
419
+ end
420
+
421
+ dynamic_fields = {
422
+ # analyzed field must be the default field for include_in_all
423
+ # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
424
+ # however, we can include the not_analyzed field in _all
425
+ # and the _all index analyzer will take care of it
426
+ "{name}" => keyword_mapping
427
+ }
428
+
429
+ if options.key?(:filterable)
430
+ dynamic_fields["{name}"] = {type: default_type, index: false}
431
+ end
361
432
 
362
- if options[:inheritance]
363
- mapping[:type] = keyword_mapping
433
+ unless options[:searchable]
434
+ if options[:match] && options[:match] != :word
435
+ dynamic_fields[options[:match]] = {type: default_type, index: true, analyzer: "searchkick_#{options[:match]}_index"}
364
436
  end
365
437
 
366
- routing = {}
367
- if options[:routing]
368
- routing = {required: true}
369
- unless options[:routing] == true
370
- routing[:path] = options[:routing].to_s
371
- end
438
+ if word
439
+ dynamic_fields[:analyzed] = analyzed_field_options
372
440
  end
441
+ end
442
+
443
+ # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
444
+ multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
445
+
446
+ mappings = {
447
+ properties: mapping,
448
+ _routing: routing,
449
+ # https://gist.github.com/kimchy/2898285
450
+ dynamic_templates: [
451
+ {
452
+ string_template: {
453
+ match: "*",
454
+ match_mapping_type: "string",
455
+ mapping: multi_field
456
+ }
457
+ }
458
+ ]
459
+ }
373
460
 
374
- dynamic_fields = {
375
- # analyzed field must be the default field for include_in_all
376
- # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
377
- # however, we can include the not_analyzed field in _all
378
- # and the _all index analyzer will take care of it
379
- "{name}" => keyword_mapping
461
+ mappings
462
+ end
463
+
464
+ def add_synonyms(settings)
465
+ synonyms = options[:synonyms] || []
466
+ synonyms = synonyms.call if synonyms.respond_to?(:call)
467
+ if synonyms.any?
468
+ settings[:analysis][:filter][:searchkick_synonym] = {
469
+ type: "synonym",
470
+ # only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently
471
+ synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)
380
472
  }
473
+ # choosing a place for the synonym filter when stemming is not easy
474
+ # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
475
+ # TODO use a snowball stemmer on synonyms when creating the token filter
476
+
477
+ # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
478
+ # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
479
+ # - Only apply the synonym expansion at index time
480
+ # - Don't have the synonym filter applied search
481
+ # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
482
+ settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")
483
+
484
+ %w(word_start word_middle word_end).each do |type|
485
+ settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
486
+ end
487
+ end
488
+ end
381
489
 
382
- if options.key?(:filterable)
383
- dynamic_fields["{name}"] = {type: default_type, index: index_false_value}
490
+ def add_search_synonyms(settings)
491
+ search_synonyms = options[:search_synonyms] || []
492
+ search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call)
493
+ if search_synonyms.is_a?(String) || search_synonyms.any?
494
+ if search_synonyms.is_a?(String)
495
+ synonym_graph = {
496
+ type: "synonym_graph",
497
+ synonyms_path: search_synonyms
498
+ }
499
+ synonym_graph[:updateable] = true unless below73?
500
+ else
501
+ synonym_graph = {
502
+ type: "synonym_graph",
503
+ # TODO confirm this is correct
504
+ synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
505
+ }
384
506
  end
507
+ settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
385
508
 
386
- unless options[:searchable]
387
- if options[:match] && options[:match] != :word
388
- dynamic_fields[options[:match]] = {type: default_type, index: index_true_value, analyzer: "searchkick_#{options[:match]}_index"}
509
+ if ["japanese", "japanese2"].include?(options[:language])
510
+ [:searchkick_search, :searchkick_search2].each do |analyzer|
511
+ settings[:analysis][:analyzer][analyzer][:filter].insert(4, "searchkick_synonym_graph")
389
512
  end
513
+ else
514
+ [:searchkick_search2, :searchkick_word_search].each do |analyzer|
515
+ unless settings[:analysis][:analyzer][analyzer].key?(:filter)
516
+ raise Searchkick::Error, "Search synonyms are not supported yet for language"
517
+ end
390
518
 
391
- if word
392
- dynamic_fields[:analyzed] = analyzed_field_options
519
+ settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
393
520
  end
394
521
  end
522
+ end
523
+ end
395
524
 
396
- # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
397
- multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
398
-
399
- mappings = {
400
- properties: mapping,
401
- _routing: routing,
402
- # https://gist.github.com/kimchy/2898285
403
- dynamic_templates: [
404
- {
405
- string_template: {
406
- match: "*",
407
- match_mapping_type: "string",
408
- mapping: multi_field
409
- }
410
- }
411
- ]
412
- }
413
-
414
- if below70
415
- index_type = options[:_type]
416
- index_type = index_type.call if index_type.respond_to?(:call)
417
- mappings = {index_type => mappings}
418
- end
525
+ def set_deep_paging(settings)
526
+ if !settings.dig(:index, :max_result_window) && !settings[:"index.max_result_window"]
527
+ settings[:index] ||= {}
528
+ settings[:index][:max_result_window] = 1_000_000_000
529
+ end
530
+ end
419
531
 
420
- mappings = mappings.symbolize_keys.deep_merge((options[:mappings] || {}).symbolize_keys)
532
+ def index_type
533
+ @index_type ||= begin
534
+ index_type = options[:_type]
535
+ index_type = index_type.call if index_type.respond_to?(:call)
536
+ index_type
421
537
  end
538
+ end
422
539
 
423
- {
424
- settings: settings,
425
- mappings: mappings
426
- }
540
+ def default_type
541
+ "text"
542
+ end
543
+
544
+ def default_analyzer
545
+ :searchkick_index
546
+ end
547
+
548
+ def below73?
549
+ Searchkick.server_below?("7.3.0")
427
550
  end
428
551
  end
429
552
  end