searchkick 4.4.0 → 5.3.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,491 +1,552 @@
1
1
  module Searchkick
2
- module IndexOptions
3
- def index_options
4
- options = @options
5
- language = options[:language]
6
- language = language.call if language.respond_to?(:call)
2
+ class IndexOptions
3
+ attr_reader :options
7
4
 
8
- below62 = Searchkick.server_below?("6.2.0")
9
- below70 = Searchkick.server_below?("7.0.0")
10
- below73 = Searchkick.server_below?("7.3.0")
11
-
12
- if below70
13
- index_type = options[:_type]
14
- index_type = index_type.call if index_type.respond_to?(:call)
15
- end
5
+ def initialize(index)
6
+ @options = index.options
7
+ end
16
8
 
17
- custom_mapping = options[:mappings] || {}
18
- if below70 && custom_mapping.keys.map(&:to_sym).include?(:properties)
19
- # add type
20
- custom_mapping = {index_type => custom_mapping}
21
- end
9
+ def index_options
10
+ # mortal symbols are garbage collected in Ruby 2.2+
11
+ custom_settings = (options[:settings] || {}).deep_symbolize_keys
12
+ custom_mappings = (options[:mappings] || {}).deep_symbolize_keys
22
13
 
23
14
  if options[:mappings] && !options[:merge_mappings]
24
- settings = options[:settings] || {}
25
- mappings = custom_mapping
15
+ settings = custom_settings
16
+ mappings = custom_mappings
26
17
  else
27
- default_type = "text"
28
- default_analyzer = :searchkick_index
29
- keyword_mapping = {type: "keyword"}
30
-
31
- keyword_mapping[:ignore_above] = options[:ignore_above] || 30000
32
-
33
- settings = {
34
- analysis: {
35
- analyzer: {
36
- searchkick_keyword: {
37
- type: "custom",
38
- tokenizer: "keyword",
39
- filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
40
- },
41
- default_analyzer => {
42
- type: "custom",
43
- # character filters -> tokenizer -> token filters
44
- # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
45
- char_filter: ["ampersand"],
46
- tokenizer: "standard",
47
- # synonym should come last, after stemming and shingle
48
- # shingle must come before searchkick_stemmer
49
- filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
50
- },
51
- searchkick_search: {
52
- type: "custom",
53
- char_filter: ["ampersand"],
54
- tokenizer: "standard",
55
- filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
56
- },
57
- searchkick_search2: {
58
- type: "custom",
59
- char_filter: ["ampersand"],
60
- tokenizer: "standard",
61
- filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
62
- },
63
- # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
64
- searchkick_autocomplete_search: {
65
- type: "custom",
66
- tokenizer: "keyword",
67
- filter: ["lowercase", "asciifolding"]
68
- },
69
- searchkick_word_search: {
70
- type: "custom",
71
- tokenizer: "standard",
72
- filter: ["lowercase", "asciifolding"]
73
- },
74
- searchkick_suggest_index: {
75
- type: "custom",
76
- tokenizer: "standard",
77
- filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
78
- },
79
- searchkick_text_start_index: {
80
- type: "custom",
81
- tokenizer: "keyword",
82
- filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
83
- },
84
- searchkick_text_middle_index: {
85
- type: "custom",
86
- tokenizer: "keyword",
87
- filter: ["lowercase", "asciifolding", "searchkick_ngram"]
88
- },
89
- searchkick_text_end_index: {
90
- type: "custom",
91
- tokenizer: "keyword",
92
- filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
93
- },
94
- searchkick_word_start_index: {
95
- type: "custom",
96
- tokenizer: "standard",
97
- filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
98
- },
99
- searchkick_word_middle_index: {
100
- type: "custom",
101
- tokenizer: "standard",
102
- filter: ["lowercase", "asciifolding", "searchkick_ngram"]
103
- },
104
- searchkick_word_end_index: {
105
- type: "custom",
106
- tokenizer: "standard",
107
- filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
108
- }
109
- },
110
- filter: {
111
- searchkick_index_shingle: {
112
- type: "shingle",
113
- token_separator: ""
114
- },
115
- # lucky find https://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
116
- searchkick_search_shingle: {
117
- type: "shingle",
118
- token_separator: "",
119
- output_unigrams: false,
120
- output_unigrams_if_no_shingles: true
121
- },
122
- searchkick_suggest_shingle: {
123
- type: "shingle",
124
- max_shingle_size: 5
125
- },
126
- searchkick_edge_ngram: {
127
- type: "edge_ngram",
128
- min_gram: 1,
129
- max_gram: 50
130
- },
131
- searchkick_ngram: {
132
- type: "ngram",
133
- min_gram: 1,
134
- max_gram: 50
135
- },
136
- searchkick_stemmer: {
137
- # use stemmer if language is lowercase, snowball otherwise
138
- type: language == language.to_s.downcase ? "stemmer" : "snowball",
139
- language: language || "English"
140
- }
141
- },
142
- char_filter: {
143
- # https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
144
- # &_to_and
145
- ampersand: {
146
- type: "mapping",
147
- mappings: ["&=> and "]
148
- }
149
- }
150
- }
151
- }
18
+ settings = generate_settings.deep_symbolize_keys.deep_merge(custom_settings)
19
+ mappings = generate_mappings.deep_symbolize_keys.deep_merge(custom_mappings)
20
+ end
21
+
22
+ set_deep_paging(settings) if options[:deep_paging] || options[:max_result_window]
23
+
24
+ {
25
+ settings: settings,
26
+ mappings: mappings
27
+ }
28
+ end
152
29
 
153
- stem = options[:stem]
30
+ def generate_settings
31
+ language = options[:language]
32
+ language = language.call if language.respond_to?(:call)
154
33
 
155
- case language
156
- when "chinese"
157
- settings[:analysis][:analyzer].merge!(
34
+ settings = {
35
+ analysis: {
36
+ analyzer: {
37
+ searchkick_keyword: {
38
+ type: "custom",
39
+ tokenizer: "keyword",
40
+ filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
41
+ },
158
42
  default_analyzer => {
159
- type: "ik_smart"
43
+ type: "custom",
44
+ # character filters -> tokenizer -> token filters
45
+ # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
46
+ char_filter: ["ampersand"],
47
+ tokenizer: "standard",
48
+ # synonym should come last, after stemming and shingle
49
+ # shingle must come before searchkick_stemmer
50
+ filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
160
51
  },
161
52
  searchkick_search: {
162
- type: "ik_smart"
53
+ type: "custom",
54
+ char_filter: ["ampersand"],
55
+ tokenizer: "standard",
56
+ filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
163
57
  },
164
58
  searchkick_search2: {
165
- type: "ik_max_word"
166
- }
167
- )
168
-
169
- stem = false
170
- when "chinese2", "smartcn"
171
- settings[:analysis][:analyzer].merge!(
172
- default_analyzer => {
173
- type: "smartcn"
59
+ type: "custom",
60
+ char_filter: ["ampersand"],
61
+ tokenizer: "standard",
62
+ filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
174
63
  },
175
- searchkick_search: {
176
- type: "smartcn"
64
+ # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
65
+ searchkick_autocomplete_search: {
66
+ type: "custom",
67
+ tokenizer: "keyword",
68
+ filter: ["lowercase", "asciifolding"]
177
69
  },
178
- searchkick_search2: {
179
- type: "smartcn"
180
- }
181
- )
182
-
183
- stem = false
184
- when "japanese"
185
- settings[:analysis][:analyzer].merge!(
186
- default_analyzer => {
187
- type: "kuromoji"
70
+ searchkick_word_search: {
71
+ type: "custom",
72
+ tokenizer: "standard",
73
+ filter: ["lowercase", "asciifolding"]
188
74
  },
189
- searchkick_search: {
190
- type: "kuromoji"
75
+ searchkick_suggest_index: {
76
+ type: "custom",
77
+ tokenizer: "standard",
78
+ filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
191
79
  },
192
- searchkick_search2: {
193
- type: "kuromoji"
194
- }
195
- )
196
-
197
- stem = false
198
- when "korean"
199
- settings[:analysis][:analyzer].merge!(
200
- default_analyzer => {
201
- type: "openkoreantext-analyzer"
80
+ searchkick_text_start_index: {
81
+ type: "custom",
82
+ tokenizer: "keyword",
83
+ filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
202
84
  },
203
- searchkick_search: {
204
- type: "openkoreantext-analyzer"
85
+ searchkick_text_middle_index: {
86
+ type: "custom",
87
+ tokenizer: "keyword",
88
+ filter: ["lowercase", "asciifolding", "searchkick_ngram"]
205
89
  },
206
- searchkick_search2: {
207
- type: "openkoreantext-analyzer"
208
- }
209
- )
210
-
211
- stem = false
212
- when "korean2"
213
- settings[:analysis][:analyzer].merge!(
214
- default_analyzer => {
215
- type: "nori"
90
+ searchkick_text_end_index: {
91
+ type: "custom",
92
+ tokenizer: "keyword",
93
+ filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
216
94
  },
217
- searchkick_search: {
218
- type: "nori"
95
+ searchkick_word_start_index: {
96
+ type: "custom",
97
+ tokenizer: "standard",
98
+ filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
219
99
  },
220
- searchkick_search2: {
221
- type: "nori"
100
+ searchkick_word_middle_index: {
101
+ type: "custom",
102
+ tokenizer: "standard",
103
+ filter: ["lowercase", "asciifolding", "searchkick_ngram"]
104
+ },
105
+ searchkick_word_end_index: {
106
+ type: "custom",
107
+ tokenizer: "standard",
108
+ filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
222
109
  }
223
- )
224
-
225
- stem = false
226
- when "vietnamese"
227
- settings[:analysis][:analyzer].merge!(
228
- default_analyzer => {
229
- type: "vi_analyzer"
110
+ },
111
+ filter: {
112
+ searchkick_index_shingle: {
113
+ type: "shingle",
114
+ token_separator: ""
230
115
  },
231
- searchkick_search: {
232
- type: "vi_analyzer"
116
+ # lucky find https://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
117
+ searchkick_search_shingle: {
118
+ type: "shingle",
119
+ token_separator: "",
120
+ output_unigrams: false,
121
+ output_unigrams_if_no_shingles: true
233
122
  },
234
- searchkick_search2: {
235
- type: "vi_analyzer"
236
- }
237
- )
238
-
239
- stem = false
240
- when "polish", "ukrainian"
241
- settings[:analysis][:analyzer].merge!(
242
- default_analyzer => {
243
- type: language
123
+ searchkick_suggest_shingle: {
124
+ type: "shingle",
125
+ max_shingle_size: 5
244
126
  },
245
- searchkick_search: {
246
- type: language
127
+ searchkick_edge_ngram: {
128
+ type: "edge_ngram",
129
+ min_gram: 1,
130
+ max_gram: 50
247
131
  },
248
- searchkick_search2: {
249
- type: language
132
+ searchkick_ngram: {
133
+ type: "ngram",
134
+ min_gram: 1,
135
+ max_gram: 50
136
+ },
137
+ searchkick_stemmer: {
138
+ # use stemmer if language is lowercase, snowball otherwise
139
+ type: language == language.to_s.downcase ? "stemmer" : "snowball",
140
+ language: language || "English"
141
+ }
142
+ },
143
+ char_filter: {
144
+ # https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
145
+ # &_to_and
146
+ ampersand: {
147
+ type: "mapping",
148
+ mappings: ["&=> and "]
250
149
  }
251
- )
150
+ }
151
+ }
152
+ }
252
153
 
253
- stem = false
254
- end
154
+ raise ArgumentError, "Can't pass both language and stemmer" if options[:stemmer] && language
155
+ update_language(settings, language)
156
+ update_stemming(settings)
255
157
 
256
- if Searchkick.env == "test"
257
- settings[:number_of_shards] = 1
258
- settings[:number_of_replicas] = 0
259
- end
158
+ if Searchkick.env == "test"
159
+ settings[:number_of_shards] = 1
160
+ settings[:number_of_replicas] = 0
161
+ end
260
162
 
261
- if options[:similarity]
262
- settings[:similarity] = {default: {type: options[:similarity]}}
263
- end
163
+ if options[:similarity]
164
+ settings[:similarity] = {default: {type: options[:similarity]}}
165
+ end
264
166
 
265
- unless below62
266
- settings[:index] = {
267
- max_ngram_diff: 49,
268
- max_shingle_diff: 4
269
- }
270
- end
167
+ settings[:index] = {
168
+ max_ngram_diff: 49,
169
+ max_shingle_diff: 4
170
+ }
271
171
 
272
- if options[:case_sensitive]
273
- settings[:analysis][:analyzer].each do |_, analyzer|
274
- analyzer[:filter].delete("lowercase")
275
- end
172
+ if options[:case_sensitive]
173
+ settings[:analysis][:analyzer].each do |_, analyzer|
174
+ analyzer[:filter].delete("lowercase")
276
175
  end
176
+ end
277
177
 
278
- if stem == false
279
- settings[:analysis][:filter].delete(:searchkick_stemmer)
280
- settings[:analysis][:analyzer].each do |_, analyzer|
281
- analyzer[:filter].delete("searchkick_stemmer") if analyzer[:filter]
282
- end
178
+ add_synonyms(settings)
179
+ add_search_synonyms(settings)
180
+
181
+ if options[:special_characters] == false
182
+ settings[:analysis][:analyzer].each_value do |analyzer_settings|
183
+ analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
283
184
  end
185
+ end
284
186
 
285
- settings = settings.symbolize_keys.deep_merge((options[:settings] || {}).symbolize_keys)
187
+ settings
188
+ end
286
189
 
287
- # synonyms
288
- synonyms = options[:synonyms] || []
289
- synonyms = synonyms.call if synonyms.respond_to?(:call)
290
- if synonyms.any?
291
- settings[:analysis][:filter][:searchkick_synonym] = {
292
- type: "synonym",
293
- # only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently
294
- synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)
190
+ def update_language(settings, language)
191
+ case language
192
+ when "chinese"
193
+ settings[:analysis][:analyzer].merge!(
194
+ default_analyzer => {
195
+ type: "ik_smart"
196
+ },
197
+ searchkick_search: {
198
+ type: "ik_smart"
199
+ },
200
+ searchkick_search2: {
201
+ type: "ik_max_word"
295
202
  }
296
- # choosing a place for the synonym filter when stemming is not easy
297
- # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
298
- # TODO use a snowball stemmer on synonyms when creating the token filter
299
-
300
- # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
301
- # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
302
- # - Only apply the synonym expansion at index time
303
- # - Don't have the synonym filter applied search
304
- # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
305
- settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")
306
-
307
- %w(word_start word_middle word_end).each do |type|
308
- settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
309
- end
203
+ )
204
+ when "chinese2", "smartcn"
205
+ settings[:analysis][:analyzer].merge!(
206
+ default_analyzer => {
207
+ type: "smartcn"
208
+ },
209
+ searchkick_search: {
210
+ type: "smartcn"
211
+ },
212
+ searchkick_search2: {
213
+ type: "smartcn"
214
+ }
215
+ )
216
+ when "japanese", "japanese2"
217
+ analyzer = {
218
+ type: "custom",
219
+ tokenizer: "kuromoji_tokenizer",
220
+ filter: [
221
+ "kuromoji_baseform",
222
+ "kuromoji_part_of_speech",
223
+ "cjk_width",
224
+ "ja_stop",
225
+ "searchkick_stemmer",
226
+ "lowercase"
227
+ ]
228
+ }
229
+ settings[:analysis][:analyzer].merge!(
230
+ default_analyzer => analyzer.deep_dup,
231
+ searchkick_search: analyzer.deep_dup,
232
+ searchkick_search2: analyzer.deep_dup
233
+ )
234
+ settings[:analysis][:filter][:searchkick_stemmer] = {
235
+ type: "kuromoji_stemmer"
236
+ }
237
+ when "korean"
238
+ settings[:analysis][:analyzer].merge!(
239
+ default_analyzer => {
240
+ type: "openkoreantext-analyzer"
241
+ },
242
+ searchkick_search: {
243
+ type: "openkoreantext-analyzer"
244
+ },
245
+ searchkick_search2: {
246
+ type: "openkoreantext-analyzer"
247
+ }
248
+ )
249
+ when "korean2"
250
+ settings[:analysis][:analyzer].merge!(
251
+ default_analyzer => {
252
+ type: "nori"
253
+ },
254
+ searchkick_search: {
255
+ type: "nori"
256
+ },
257
+ searchkick_search2: {
258
+ type: "nori"
259
+ }
260
+ )
261
+ when "vietnamese"
262
+ settings[:analysis][:analyzer].merge!(
263
+ default_analyzer => {
264
+ type: "vi_analyzer"
265
+ },
266
+ searchkick_search: {
267
+ type: "vi_analyzer"
268
+ },
269
+ searchkick_search2: {
270
+ type: "vi_analyzer"
271
+ }
272
+ )
273
+ when "polish", "ukrainian"
274
+ settings[:analysis][:analyzer].merge!(
275
+ default_analyzer => {
276
+ type: language
277
+ },
278
+ searchkick_search: {
279
+ type: language
280
+ },
281
+ searchkick_search2: {
282
+ type: language
283
+ }
284
+ )
285
+ end
286
+ end
287
+
288
+ def update_stemming(settings)
289
+ if options[:stemmer]
290
+ stemmer = options[:stemmer]
291
+ # could also support snowball and stemmer
292
+ case stemmer[:type]
293
+ when "hunspell"
294
+ # supports all token filter options
295
+ settings[:analysis][:filter][:searchkick_stemmer] = stemmer
296
+ else
297
+ raise ArgumentError, "Unknown stemmer: #{stemmer[:type]}"
310
298
  end
299
+ end
311
300
 
312
- search_synonyms = options[:search_synonyms] || []
313
- search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call)
314
- if search_synonyms.is_a?(String) || search_synonyms.any?
315
- if search_synonyms.is_a?(String)
316
- synonym_graph = {
317
- type: "synonym_graph",
318
- synonyms_path: search_synonyms
319
- }
320
- synonym_graph[:updateable] = true unless below73
321
- else
322
- synonym_graph = {
323
- type: "synonym_graph",
324
- # TODO confirm this is correct
325
- synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
326
- }
327
- end
328
- settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
301
+ stem = options[:stem]
329
302
 
330
- [:searchkick_search2, :searchkick_word_search].each do |analyzer|
331
- settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
332
- end
333
- end
303
+ # language analyzer used
304
+ stem = false if settings[:analysis][:analyzer][default_analyzer][:type] != "custom"
334
305
 
335
- if options[:wordnet]
336
- settings[:analysis][:filter][:searchkick_wordnet] = {
337
- type: "synonym",
338
- format: "wordnet",
339
- synonyms_path: Searchkick.wordnet_path
340
- }
306
+ if stem == false
307
+ settings[:analysis][:filter].delete(:searchkick_stemmer)
308
+ settings[:analysis][:analyzer].each do |_, analyzer|
309
+ analyzer[:filter].delete("searchkick_stemmer") if analyzer[:filter]
310
+ end
311
+ end
341
312
 
342
- settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
343
- settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
313
+ if options[:stemmer_override]
314
+ stemmer_override = {
315
+ type: "stemmer_override"
316
+ }
317
+ if options[:stemmer_override].is_a?(String)
318
+ stemmer_override[:rules_path] = options[:stemmer_override]
319
+ else
320
+ stemmer_override[:rules] = options[:stemmer_override]
321
+ end
322
+ settings[:analysis][:filter][:searchkick_stemmer_override] = stemmer_override
344
323
 
345
- %w(word_start word_middle word_end).each do |type|
346
- settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
347
- end
324
+ settings[:analysis][:analyzer].each do |_, analyzer|
325
+ stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
326
+ analyzer[:filter].insert(stemmer_index, "searchkick_stemmer_override") if stemmer_index
348
327
  end
328
+ end
349
329
 
350
- if options[:special_characters] == false
351
- settings[:analysis][:analyzer].each_value do |analyzer_settings|
352
- analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
353
- end
330
+ if options[:stem_exclusion]
331
+ settings[:analysis][:filter][:searchkick_stem_exclusion] = {
332
+ type: "keyword_marker",
333
+ keywords: options[:stem_exclusion]
334
+ }
335
+
336
+ settings[:analysis][:analyzer].each do |_, analyzer|
337
+ stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
338
+ analyzer[:filter].insert(stemmer_index, "searchkick_stem_exclusion") if stemmer_index
354
339
  end
340
+ end
341
+ end
355
342
 
356
- mapping = {}
343
+ def generate_mappings
344
+ mapping = {}
357
345
 
358
- # conversions
359
- Array(options[:conversions]).each do |conversions_field|
360
- mapping[conversions_field] = {
361
- type: "nested",
362
- properties: {
363
- query: {type: default_type, analyzer: "searchkick_keyword"},
364
- count: {type: "integer"}
365
- }
346
+ keyword_mapping = {type: "keyword"}
347
+ keyword_mapping[:ignore_above] = options[:ignore_above] || 30000
348
+
349
+ # conversions
350
+ Array(options[:conversions]).each do |conversions_field|
351
+ mapping[conversions_field] = {
352
+ type: "nested",
353
+ properties: {
354
+ query: {type: default_type, analyzer: "searchkick_keyword"},
355
+ count: {type: "integer"}
366
356
  }
367
- end
357
+ }
358
+ end
368
359
 
369
- mapping_options = Hash[
370
- [:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
371
- .map { |type| [type, (options[type] || []).map(&:to_s)] }
372
- ]
360
+ mapping_options =
361
+ [:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
362
+ .to_h { |type| [type, (options[type] || []).map(&:to_s)] }
373
363
 
374
- word = options[:word] != false && (!options[:match] || options[:match] == :word)
364
+ word = options[:word] != false && (!options[:match] || options[:match] == :word)
375
365
 
376
- mapping_options[:searchable].delete("_all")
366
+ mapping_options[:searchable].delete("_all")
377
367
 
378
- analyzed_field_options = {type: default_type, index: true, analyzer: default_analyzer}
368
+ analyzed_field_options = {type: default_type, index: true, analyzer: default_analyzer.to_s}
379
369
 
380
- mapping_options.values.flatten.uniq.each do |field|
381
- fields = {}
370
+ mapping_options.values.flatten.uniq.each do |field|
371
+ fields = {}
382
372
 
383
- if options.key?(:filterable) && !mapping_options[:filterable].include?(field)
384
- fields[field] = {type: default_type, index: false}
385
- else
386
- fields[field] = keyword_mapping
387
- end
373
+ if options.key?(:filterable) && !mapping_options[:filterable].include?(field)
374
+ fields[field] = {type: default_type, index: false}
375
+ else
376
+ fields[field] = keyword_mapping
377
+ end
388
378
 
389
- if !options[:searchable] || mapping_options[:searchable].include?(field)
390
- if word
391
- fields[:analyzed] = analyzed_field_options
379
+ if !options[:searchable] || mapping_options[:searchable].include?(field)
380
+ if word
381
+ fields[:analyzed] = analyzed_field_options
392
382
 
393
- if mapping_options[:highlight].include?(field)
394
- fields[:analyzed][:term_vector] = "with_positions_offsets"
395
- end
383
+ if mapping_options[:highlight].include?(field)
384
+ fields[:analyzed][:term_vector] = "with_positions_offsets"
396
385
  end
386
+ end
397
387
 
398
- mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f|
399
- if options[:match] == type || f.include?(field)
400
- fields[type] = {type: default_type, index: true, analyzer: "searchkick_#{type}_index"}
401
- end
388
+ mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f|
389
+ if options[:match] == type || f.include?(field)
390
+ fields[type] = {type: default_type, index: true, analyzer: "searchkick_#{type}_index"}
402
391
  end
403
392
  end
404
-
405
- mapping[field] = fields[field].merge(fields: fields.except(field))
406
393
  end
407
394
 
408
- (options[:locations] || []).map(&:to_s).each do |field|
409
- mapping[field] = {
410
- type: "geo_point"
411
- }
412
- end
395
+ mapping[field] = fields[field].merge(fields: fields.except(field))
396
+ end
397
+
398
+ (options[:locations] || []).map(&:to_s).each do |field|
399
+ mapping[field] = {
400
+ type: "geo_point"
401
+ }
402
+ end
403
+
404
+ options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array)
405
+ (options[:geo_shape] || {}).each do |field, shape_options|
406
+ mapping[field] = shape_options.merge(type: "geo_shape")
407
+ end
413
408
 
414
- options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array)
415
- (options[:geo_shape] || {}).each do |field, shape_options|
416
- mapping[field] = shape_options.merge(type: "geo_shape")
409
+ if options[:inheritance]
410
+ mapping[:type] = keyword_mapping
411
+ end
412
+
413
+ routing = {}
414
+ if options[:routing]
415
+ routing = {required: true}
416
+ unless options[:routing] == true
417
+ routing[:path] = options[:routing].to_s
417
418
  end
419
+ end
418
420
 
419
- if options[:inheritance]
420
- mapping[:type] = keyword_mapping
421
+ dynamic_fields = {
422
+ # analyzed field must be the default field for include_in_all
423
+ # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
424
+ # however, we can include the not_analyzed field in _all
425
+ # and the _all index analyzer will take care of it
426
+ "{name}" => keyword_mapping
427
+ }
428
+
429
+ if options.key?(:filterable)
430
+ dynamic_fields["{name}"] = {type: default_type, index: false}
431
+ end
432
+
433
+ unless options[:searchable]
434
+ if options[:match] && options[:match] != :word
435
+ dynamic_fields[options[:match]] = {type: default_type, index: true, analyzer: "searchkick_#{options[:match]}_index"}
421
436
  end
422
437
 
423
- routing = {}
424
- if options[:routing]
425
- routing = {required: true}
426
- unless options[:routing] == true
427
- routing[:path] = options[:routing].to_s
428
- end
438
+ if word
439
+ dynamic_fields[:analyzed] = analyzed_field_options
429
440
  end
441
+ end
442
+
443
+ # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
444
+ multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
445
+
446
+ mappings = {
447
+ properties: mapping,
448
+ _routing: routing,
449
+ # https://gist.github.com/kimchy/2898285
450
+ dynamic_templates: [
451
+ {
452
+ string_template: {
453
+ match: "*",
454
+ match_mapping_type: "string",
455
+ mapping: multi_field
456
+ }
457
+ }
458
+ ]
459
+ }
460
+
461
+ mappings
462
+ end
430
463
 
431
- dynamic_fields = {
432
- # analyzed field must be the default field for include_in_all
433
- # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
434
- # however, we can include the not_analyzed field in _all
435
- # and the _all index analyzer will take care of it
436
- "{name}" => keyword_mapping
464
+ def add_synonyms(settings)
465
+ synonyms = options[:synonyms] || []
466
+ synonyms = synonyms.call if synonyms.respond_to?(:call)
467
+ if synonyms.any?
468
+ settings[:analysis][:filter][:searchkick_synonym] = {
469
+ type: "synonym",
470
+ # only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently
471
+ synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)
437
472
  }
473
+ # choosing a place for the synonym filter when stemming is not easy
474
+ # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
475
+ # TODO use a snowball stemmer on synonyms when creating the token filter
476
+
477
+ # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
478
+ # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
479
+ # - Only apply the synonym expansion at index time
480
+ # - Don't have the synonym filter applied search
481
+ # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
482
+ settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")
483
+
484
+ %w(word_start word_middle word_end).each do |type|
485
+ settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
486
+ end
487
+ end
488
+ end
438
489
 
439
- if options.key?(:filterable)
440
- dynamic_fields["{name}"] = {type: default_type, index: false}
490
+ def add_search_synonyms(settings)
491
+ search_synonyms = options[:search_synonyms] || []
492
+ search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call)
493
+ if search_synonyms.is_a?(String) || search_synonyms.any?
494
+ if search_synonyms.is_a?(String)
495
+ synonym_graph = {
496
+ type: "synonym_graph",
497
+ synonyms_path: search_synonyms
498
+ }
499
+ synonym_graph[:updateable] = true unless below73?
500
+ else
501
+ synonym_graph = {
502
+ type: "synonym_graph",
503
+ # TODO confirm this is correct
504
+ synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
505
+ }
441
506
  end
507
+ settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
442
508
 
443
- unless options[:searchable]
444
- if options[:match] && options[:match] != :word
445
- dynamic_fields[options[:match]] = {type: default_type, index: true, analyzer: "searchkick_#{options[:match]}_index"}
509
+ if ["japanese", "japanese2"].include?(options[:language])
510
+ [:searchkick_search, :searchkick_search2].each do |analyzer|
511
+ settings[:analysis][:analyzer][analyzer][:filter].insert(4, "searchkick_synonym_graph")
446
512
  end
513
+ else
514
+ [:searchkick_search2, :searchkick_word_search].each do |analyzer|
515
+ unless settings[:analysis][:analyzer][analyzer].key?(:filter)
516
+ raise Error, "Search synonyms are not supported yet for language"
517
+ end
447
518
 
448
- if word
449
- dynamic_fields[:analyzed] = analyzed_field_options
519
+ settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
450
520
  end
451
521
  end
522
+ end
523
+ end
452
524
 
453
- # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
454
- multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
455
-
456
- mappings = {
457
- properties: mapping,
458
- _routing: routing,
459
- # https://gist.github.com/kimchy/2898285
460
- dynamic_templates: [
461
- {
462
- string_template: {
463
- match: "*",
464
- match_mapping_type: "string",
465
- mapping: multi_field
466
- }
467
- }
468
- ]
469
- }
470
-
471
- if below70
472
- mappings = {index_type => mappings}
473
- end
474
-
475
- mappings = mappings.symbolize_keys.deep_merge(custom_mapping.symbolize_keys)
525
+ def set_deep_paging(settings)
526
+ if !settings.dig(:index, :max_result_window) && !settings[:"index.max_result_window"]
527
+ settings[:index] ||= {}
528
+ settings[:index][:max_result_window] = options[:max_result_window] || 1_000_000_000
476
529
  end
530
+ end
477
531
 
478
- if options[:deep_paging]
479
- if !settings.dig(:index, :max_result_window) && !settings[:"index.max_result_window"]
480
- settings[:index] ||= {}
481
- settings[:index][:max_result_window] = 1_000_000_000
482
- end
532
+ def index_type
533
+ @index_type ||= begin
534
+ index_type = options[:_type]
535
+ index_type = index_type.call if index_type.respond_to?(:call)
536
+ index_type
483
537
  end
538
+ end
484
539
 
485
- {
486
- settings: settings,
487
- mappings: mappings
488
- }
540
+ def default_type
541
+ "text"
542
+ end
543
+
544
+ def default_analyzer
545
+ :searchkick_index
546
+ end
547
+
548
+ def below73?
549
+ Searchkick.server_below?("7.3.0")
489
550
  end
490
551
  end
491
552
  end