searchkick 4.4.0 → 4.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -164,6 +164,7 @@ module Searchkick
164
164
 
165
165
  def self.aws_credentials=(creds)
166
166
  begin
167
+ # TODO remove in Searchkick 5 (just use aws_sigv4)
167
168
  require "faraday_middleware/aws_signers_v4"
168
169
  rescue LoadError
169
170
  require "faraday_middleware/aws_sigv4"
@@ -173,17 +174,16 @@ module Searchkick
173
174
  end
174
175
 
175
176
  def self.reindex_status(index_name)
176
- if redis
177
- batches_left = Searchkick::Index.new(index_name).batches_left
178
- {
179
- completed: batches_left == 0,
180
- batches_left: batches_left
181
- }
182
- else
183
- raise Searchkick::Error, "Redis not configured"
184
- end
177
+ raise Searchkick::Error, "Redis not configured" unless redis
178
+
179
+ batches_left = Searchkick::Index.new(index_name).batches_left
180
+ {
181
+ completed: batches_left == 0,
182
+ batches_left: batches_left
183
+ }
185
184
  end
186
185
 
186
+ # TODO use ConnectionPool::Wrapper when redis is set so this is no longer needed
187
187
  def self.with_redis
188
188
  if redis
189
189
  if redis.respond_to?(:with)
@@ -267,9 +267,12 @@ module Searchkick
267
267
  end
268
268
  end
269
269
 
270
- # TODO find better ActiveModel hook
271
270
  require "active_model/callbacks"
272
271
  ActiveModel::Callbacks.include(Searchkick::Model)
272
+ # TODO use
273
+ # ActiveSupport.on_load(:mongoid) do
274
+ # Mongoid::Document::ClassMethods.include Searchkick::Model
275
+ # end
273
276
 
274
277
  ActiveSupport.on_load(:active_record) do
275
278
  extend Searchkick::Model
@@ -2,8 +2,6 @@ require "searchkick/index_options"
2
2
 
3
3
  module Searchkick
4
4
  class Index
5
- include IndexOptions
6
-
7
5
  attr_reader :name, :options
8
6
 
9
7
  def initialize(name, options = {})
@@ -12,6 +10,10 @@ module Searchkick
12
10
  @klass_document_type = {} # cache
13
11
  end
14
12
 
13
+ def index_options
14
+ IndexOptions.new(self).index_options
15
+ end
16
+
15
17
  def create(body = {})
16
18
  client.indices.create index: name, body: body
17
19
  end
@@ -178,7 +180,11 @@ module Searchkick
178
180
  require "elasticsearch/xpack"
179
181
  raise Error, "Requires Elasticsearch 7.3+" if Searchkick.server_below?("7.3.0")
180
182
  raise Error, "Requires elasticsearch-xpack 7.8+" unless client.xpack.respond_to?(:indices)
181
- client.xpack.indices.reload_search_analyzers(index: name)
183
+ begin
184
+ client.xpack.indices.reload_search_analyzers(index: name)
185
+ rescue Elasticsearch::Transport::Transport::Errors::MethodNotAllowed
186
+ raise Error, "Requires non-OSS version of Elasticsearch"
187
+ end
182
188
  end
183
189
 
184
190
  # queue
@@ -1,21 +1,14 @@
1
1
  module Searchkick
2
- module IndexOptions
3
- def index_options
4
- options = @options
5
- language = options[:language]
6
- language = language.call if language.respond_to?(:call)
2
+ class IndexOptions
3
+ attr_reader :options
7
4
 
8
- below62 = Searchkick.server_below?("6.2.0")
9
- below70 = Searchkick.server_below?("7.0.0")
10
- below73 = Searchkick.server_below?("7.3.0")
11
-
12
- if below70
13
- index_type = options[:_type]
14
- index_type = index_type.call if index_type.respond_to?(:call)
15
- end
5
+ def initialize(index)
6
+ @options = index.options
7
+ end
16
8
 
9
+ def index_options
17
10
  custom_mapping = options[:mappings] || {}
18
- if below70 && custom_mapping.keys.map(&:to_sym).include?(:properties)
11
+ if below70? && custom_mapping.keys.map(&:to_sym).include?(:properties)
19
12
  # add type
20
13
  custom_mapping = {index_type => custom_mapping}
21
14
  end
@@ -24,468 +17,542 @@ module Searchkick
24
17
  settings = options[:settings] || {}
25
18
  mappings = custom_mapping
26
19
  else
27
- default_type = "text"
28
- default_analyzer = :searchkick_index
29
- keyword_mapping = {type: "keyword"}
30
-
31
- keyword_mapping[:ignore_above] = options[:ignore_above] || 30000
32
-
33
- settings = {
34
- analysis: {
35
- analyzer: {
36
- searchkick_keyword: {
37
- type: "custom",
38
- tokenizer: "keyword",
39
- filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
40
- },
41
- default_analyzer => {
42
- type: "custom",
43
- # character filters -> tokenizer -> token filters
44
- # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
45
- char_filter: ["ampersand"],
46
- tokenizer: "standard",
47
- # synonym should come last, after stemming and shingle
48
- # shingle must come before searchkick_stemmer
49
- filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
50
- },
51
- searchkick_search: {
52
- type: "custom",
53
- char_filter: ["ampersand"],
54
- tokenizer: "standard",
55
- filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
56
- },
57
- searchkick_search2: {
58
- type: "custom",
59
- char_filter: ["ampersand"],
60
- tokenizer: "standard",
61
- filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
62
- },
63
- # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
64
- searchkick_autocomplete_search: {
65
- type: "custom",
66
- tokenizer: "keyword",
67
- filter: ["lowercase", "asciifolding"]
68
- },
69
- searchkick_word_search: {
70
- type: "custom",
71
- tokenizer: "standard",
72
- filter: ["lowercase", "asciifolding"]
73
- },
74
- searchkick_suggest_index: {
75
- type: "custom",
76
- tokenizer: "standard",
77
- filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
78
- },
79
- searchkick_text_start_index: {
80
- type: "custom",
81
- tokenizer: "keyword",
82
- filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
83
- },
84
- searchkick_text_middle_index: {
85
- type: "custom",
86
- tokenizer: "keyword",
87
- filter: ["lowercase", "asciifolding", "searchkick_ngram"]
88
- },
89
- searchkick_text_end_index: {
90
- type: "custom",
91
- tokenizer: "keyword",
92
- filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
93
- },
94
- searchkick_word_start_index: {
95
- type: "custom",
96
- tokenizer: "standard",
97
- filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
98
- },
99
- searchkick_word_middle_index: {
100
- type: "custom",
101
- tokenizer: "standard",
102
- filter: ["lowercase", "asciifolding", "searchkick_ngram"]
103
- },
104
- searchkick_word_end_index: {
105
- type: "custom",
106
- tokenizer: "standard",
107
- filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
108
- }
109
- },
110
- filter: {
111
- searchkick_index_shingle: {
112
- type: "shingle",
113
- token_separator: ""
114
- },
115
- # lucky find https://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
116
- searchkick_search_shingle: {
117
- type: "shingle",
118
- token_separator: "",
119
- output_unigrams: false,
120
- output_unigrams_if_no_shingles: true
121
- },
122
- searchkick_suggest_shingle: {
123
- type: "shingle",
124
- max_shingle_size: 5
125
- },
126
- searchkick_edge_ngram: {
127
- type: "edge_ngram",
128
- min_gram: 1,
129
- max_gram: 50
130
- },
131
- searchkick_ngram: {
132
- type: "ngram",
133
- min_gram: 1,
134
- max_gram: 50
135
- },
136
- searchkick_stemmer: {
137
- # use stemmer if language is lowercase, snowball otherwise
138
- type: language == language.to_s.downcase ? "stemmer" : "snowball",
139
- language: language || "English"
140
- }
141
- },
142
- char_filter: {
143
- # https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
144
- # &_to_and
145
- ampersand: {
146
- type: "mapping",
147
- mappings: ["&=> and "]
148
- }
149
- }
150
- }
151
- }
20
+ settings = generate_settings
21
+ mappings = generate_mappings.symbolize_keys.deep_merge(custom_mapping.symbolize_keys)
22
+ end
152
23
 
153
- stem = options[:stem]
24
+ set_deep_paging(settings) if options[:deep_paging]
154
25
 
155
- case language
156
- when "chinese"
157
- settings[:analysis][:analyzer].merge!(
26
+ {
27
+ settings: settings,
28
+ mappings: mappings
29
+ }
30
+ end
31
+
32
+ def generate_settings
33
+ language = options[:language]
34
+ language = language.call if language.respond_to?(:call)
35
+
36
+ settings = {
37
+ analysis: {
38
+ analyzer: {
39
+ searchkick_keyword: {
40
+ type: "custom",
41
+ tokenizer: "keyword",
42
+ filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
43
+ },
158
44
  default_analyzer => {
159
- type: "ik_smart"
45
+ type: "custom",
46
+ # character filters -> tokenizer -> token filters
47
+ # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
48
+ char_filter: ["ampersand"],
49
+ tokenizer: "standard",
50
+ # synonym should come last, after stemming and shingle
51
+ # shingle must come before searchkick_stemmer
52
+ filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
160
53
  },
161
54
  searchkick_search: {
162
- type: "ik_smart"
55
+ type: "custom",
56
+ char_filter: ["ampersand"],
57
+ tokenizer: "standard",
58
+ filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
163
59
  },
164
60
  searchkick_search2: {
165
- type: "ik_max_word"
166
- }
167
- )
168
-
169
- stem = false
170
- when "chinese2", "smartcn"
171
- settings[:analysis][:analyzer].merge!(
172
- default_analyzer => {
173
- type: "smartcn"
61
+ type: "custom",
62
+ char_filter: ["ampersand"],
63
+ tokenizer: "standard",
64
+ filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
174
65
  },
175
- searchkick_search: {
176
- type: "smartcn"
66
+ # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
67
+ searchkick_autocomplete_search: {
68
+ type: "custom",
69
+ tokenizer: "keyword",
70
+ filter: ["lowercase", "asciifolding"]
177
71
  },
178
- searchkick_search2: {
179
- type: "smartcn"
180
- }
181
- )
182
-
183
- stem = false
184
- when "japanese"
185
- settings[:analysis][:analyzer].merge!(
186
- default_analyzer => {
187
- type: "kuromoji"
72
+ searchkick_word_search: {
73
+ type: "custom",
74
+ tokenizer: "standard",
75
+ filter: ["lowercase", "asciifolding"]
188
76
  },
189
- searchkick_search: {
190
- type: "kuromoji"
77
+ searchkick_suggest_index: {
78
+ type: "custom",
79
+ tokenizer: "standard",
80
+ filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
191
81
  },
192
- searchkick_search2: {
193
- type: "kuromoji"
194
- }
195
- )
196
-
197
- stem = false
198
- when "korean"
199
- settings[:analysis][:analyzer].merge!(
200
- default_analyzer => {
201
- type: "openkoreantext-analyzer"
82
+ searchkick_text_start_index: {
83
+ type: "custom",
84
+ tokenizer: "keyword",
85
+ filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
202
86
  },
203
- searchkick_search: {
204
- type: "openkoreantext-analyzer"
87
+ searchkick_text_middle_index: {
88
+ type: "custom",
89
+ tokenizer: "keyword",
90
+ filter: ["lowercase", "asciifolding", "searchkick_ngram"]
205
91
  },
206
- searchkick_search2: {
207
- type: "openkoreantext-analyzer"
208
- }
209
- )
210
-
211
- stem = false
212
- when "korean2"
213
- settings[:analysis][:analyzer].merge!(
214
- default_analyzer => {
215
- type: "nori"
92
+ searchkick_text_end_index: {
93
+ type: "custom",
94
+ tokenizer: "keyword",
95
+ filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
216
96
  },
217
- searchkick_search: {
218
- type: "nori"
97
+ searchkick_word_start_index: {
98
+ type: "custom",
99
+ tokenizer: "standard",
100
+ filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
219
101
  },
220
- searchkick_search2: {
221
- type: "nori"
102
+ searchkick_word_middle_index: {
103
+ type: "custom",
104
+ tokenizer: "standard",
105
+ filter: ["lowercase", "asciifolding", "searchkick_ngram"]
106
+ },
107
+ searchkick_word_end_index: {
108
+ type: "custom",
109
+ tokenizer: "standard",
110
+ filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
222
111
  }
223
- )
224
-
225
- stem = false
226
- when "vietnamese"
227
- settings[:analysis][:analyzer].merge!(
228
- default_analyzer => {
229
- type: "vi_analyzer"
112
+ },
113
+ filter: {
114
+ searchkick_index_shingle: {
115
+ type: "shingle",
116
+ token_separator: ""
230
117
  },
231
- searchkick_search: {
232
- type: "vi_analyzer"
118
+ # lucky find https://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
119
+ searchkick_search_shingle: {
120
+ type: "shingle",
121
+ token_separator: "",
122
+ output_unigrams: false,
123
+ output_unigrams_if_no_shingles: true
233
124
  },
234
- searchkick_search2: {
235
- type: "vi_analyzer"
236
- }
237
- )
238
-
239
- stem = false
240
- when "polish", "ukrainian"
241
- settings[:analysis][:analyzer].merge!(
242
- default_analyzer => {
243
- type: language
125
+ searchkick_suggest_shingle: {
126
+ type: "shingle",
127
+ max_shingle_size: 5
244
128
  },
245
- searchkick_search: {
246
- type: language
129
+ searchkick_edge_ngram: {
130
+ type: "edge_ngram",
131
+ min_gram: 1,
132
+ max_gram: 50
247
133
  },
248
- searchkick_search2: {
249
- type: language
134
+ searchkick_ngram: {
135
+ type: "ngram",
136
+ min_gram: 1,
137
+ max_gram: 50
138
+ },
139
+ searchkick_stemmer: {
140
+ # use stemmer if language is lowercase, snowball otherwise
141
+ type: language == language.to_s.downcase ? "stemmer" : "snowball",
142
+ language: language || "English"
250
143
  }
251
- )
144
+ },
145
+ char_filter: {
146
+ # https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
147
+ # &_to_and
148
+ ampersand: {
149
+ type: "mapping",
150
+ mappings: ["&=> and "]
151
+ }
152
+ }
153
+ }
154
+ }
252
155
 
253
- stem = false
254
- end
156
+ update_language(settings, language)
157
+ update_stemming(settings)
255
158
 
256
- if Searchkick.env == "test"
257
- settings[:number_of_shards] = 1
258
- settings[:number_of_replicas] = 0
259
- end
159
+ if Searchkick.env == "test"
160
+ settings[:number_of_shards] = 1
161
+ settings[:number_of_replicas] = 0
162
+ end
260
163
 
261
- if options[:similarity]
262
- settings[:similarity] = {default: {type: options[:similarity]}}
263
- end
164
+ # TODO remove in Searchkick 5 (classic no longer supported)
165
+ if options[:similarity]
166
+ settings[:similarity] = {default: {type: options[:similarity]}}
167
+ end
264
168
 
265
- unless below62
266
- settings[:index] = {
267
- max_ngram_diff: 49,
268
- max_shingle_diff: 4
269
- }
270
- end
169
+ unless below62?
170
+ settings[:index] = {
171
+ max_ngram_diff: 49,
172
+ max_shingle_diff: 4
173
+ }
174
+ end
271
175
 
272
- if options[:case_sensitive]
273
- settings[:analysis][:analyzer].each do |_, analyzer|
274
- analyzer[:filter].delete("lowercase")
275
- end
176
+ if options[:case_sensitive]
177
+ settings[:analysis][:analyzer].each do |_, analyzer|
178
+ analyzer[:filter].delete("lowercase")
276
179
  end
180
+ end
277
181
 
278
- if stem == false
279
- settings[:analysis][:filter].delete(:searchkick_stemmer)
280
- settings[:analysis][:analyzer].each do |_, analyzer|
281
- analyzer[:filter].delete("searchkick_stemmer") if analyzer[:filter]
282
- end
182
+ # TODO do this last in Searchkick 5
183
+ settings = settings.symbolize_keys.deep_merge((options[:settings] || {}).symbolize_keys)
184
+
185
+ add_synonyms(settings)
186
+ add_search_synonyms(settings)
187
+ # TODO remove in Searchkick 5
188
+ add_wordnet(settings) if options[:wordnet]
189
+
190
+ if options[:special_characters] == false
191
+ settings[:analysis][:analyzer].each_value do |analyzer_settings|
192
+ analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
283
193
  end
194
+ end
284
195
 
285
- settings = settings.symbolize_keys.deep_merge((options[:settings] || {}).symbolize_keys)
196
+ settings
197
+ end
286
198
 
287
- # synonyms
288
- synonyms = options[:synonyms] || []
289
- synonyms = synonyms.call if synonyms.respond_to?(:call)
290
- if synonyms.any?
291
- settings[:analysis][:filter][:searchkick_synonym] = {
292
- type: "synonym",
293
- # only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently
294
- synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)
199
+ def update_language(settings, language)
200
+ case language
201
+ when "chinese"
202
+ settings[:analysis][:analyzer].merge!(
203
+ default_analyzer => {
204
+ type: "ik_smart"
205
+ },
206
+ searchkick_search: {
207
+ type: "ik_smart"
208
+ },
209
+ searchkick_search2: {
210
+ type: "ik_max_word"
295
211
  }
296
- # choosing a place for the synonym filter when stemming is not easy
297
- # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
298
- # TODO use a snowball stemmer on synonyms when creating the token filter
299
-
300
- # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
301
- # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
302
- # - Only apply the synonym expansion at index time
303
- # - Don't have the synonym filter applied search
304
- # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
305
- settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")
306
-
307
- %w(word_start word_middle word_end).each do |type|
308
- settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
309
- end
310
- end
212
+ )
213
+ when "chinese2", "smartcn"
214
+ settings[:analysis][:analyzer].merge!(
215
+ default_analyzer => {
216
+ type: "smartcn"
217
+ },
218
+ searchkick_search: {
219
+ type: "smartcn"
220
+ },
221
+ searchkick_search2: {
222
+ type: "smartcn"
223
+ }
224
+ )
225
+ when "japanese"
226
+ settings[:analysis][:analyzer].merge!(
227
+ default_analyzer => {
228
+ type: "kuromoji"
229
+ },
230
+ searchkick_search: {
231
+ type: "kuromoji"
232
+ },
233
+ searchkick_search2: {
234
+ type: "kuromoji"
235
+ }
236
+ )
237
+ when "korean"
238
+ settings[:analysis][:analyzer].merge!(
239
+ default_analyzer => {
240
+ type: "openkoreantext-analyzer"
241
+ },
242
+ searchkick_search: {
243
+ type: "openkoreantext-analyzer"
244
+ },
245
+ searchkick_search2: {
246
+ type: "openkoreantext-analyzer"
247
+ }
248
+ )
249
+ when "korean2"
250
+ settings[:analysis][:analyzer].merge!(
251
+ default_analyzer => {
252
+ type: "nori"
253
+ },
254
+ searchkick_search: {
255
+ type: "nori"
256
+ },
257
+ searchkick_search2: {
258
+ type: "nori"
259
+ }
260
+ )
261
+ when "vietnamese"
262
+ settings[:analysis][:analyzer].merge!(
263
+ default_analyzer => {
264
+ type: "vi_analyzer"
265
+ },
266
+ searchkick_search: {
267
+ type: "vi_analyzer"
268
+ },
269
+ searchkick_search2: {
270
+ type: "vi_analyzer"
271
+ }
272
+ )
273
+ when "polish", "ukrainian"
274
+ settings[:analysis][:analyzer].merge!(
275
+ default_analyzer => {
276
+ type: language
277
+ },
278
+ searchkick_search: {
279
+ type: language
280
+ },
281
+ searchkick_search2: {
282
+ type: language
283
+ }
284
+ )
285
+ end
286
+ end
311
287
 
312
- search_synonyms = options[:search_synonyms] || []
313
- search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call)
314
- if search_synonyms.is_a?(String) || search_synonyms.any?
315
- if search_synonyms.is_a?(String)
316
- synonym_graph = {
317
- type: "synonym_graph",
318
- synonyms_path: search_synonyms
319
- }
320
- synonym_graph[:updateable] = true unless below73
321
- else
322
- synonym_graph = {
323
- type: "synonym_graph",
324
- # TODO confirm this is correct
325
- synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
326
- }
327
- end
328
- settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
288
+ def update_stemming(settings)
289
+ stem = options[:stem]
329
290
 
330
- [:searchkick_search2, :searchkick_word_search].each do |analyzer|
331
- settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
332
- end
333
- end
291
+ # language analyzer used
292
+ stem = false if settings[:analysis][:analyzer][default_analyzer][:type] != "custom"
334
293
 
335
- if options[:wordnet]
336
- settings[:analysis][:filter][:searchkick_wordnet] = {
337
- type: "synonym",
338
- format: "wordnet",
339
- synonyms_path: Searchkick.wordnet_path
340
- }
294
+ if stem == false
295
+ settings[:analysis][:filter].delete(:searchkick_stemmer)
296
+ settings[:analysis][:analyzer].each do |_, analyzer|
297
+ analyzer[:filter].delete("searchkick_stemmer") if analyzer[:filter]
298
+ end
299
+ end
341
300
 
342
- settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
343
- settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
301
+ if options[:stemmer_override]
302
+ stemmer_override = {
303
+ type: "stemmer_override"
304
+ }
305
+ if options[:stemmer_override].is_a?(String)
306
+ stemmer_override[:rules_path] = options[:stemmer_override]
307
+ else
308
+ stemmer_override[:rules] = options[:stemmer_override]
309
+ end
310
+ settings[:analysis][:filter][:searchkick_stemmer_override] = stemmer_override
344
311
 
345
- %w(word_start word_middle word_end).each do |type|
346
- settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
347
- end
312
+ settings[:analysis][:analyzer].each do |_, analyzer|
313
+ stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
314
+ analyzer[:filter].insert(stemmer_index, "searchkick_stemmer_override") if stemmer_index
348
315
  end
316
+ end
349
317
 
350
- if options[:special_characters] == false
351
- settings[:analysis][:analyzer].each_value do |analyzer_settings|
352
- analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
353
- end
318
+ if options[:stem_exclusion]
319
+ settings[:analysis][:filter][:searchkick_stem_exclusion] = {
320
+ type: "keyword_marker",
321
+ keywords: options[:stem_exclusion]
322
+ }
323
+
324
+ settings[:analysis][:analyzer].each do |_, analyzer|
325
+ stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
326
+ analyzer[:filter].insert(stemmer_index, "searchkick_stem_exclusion") if stemmer_index
354
327
  end
328
+ end
329
+ end
355
330
 
356
- mapping = {}
331
+ def generate_mappings
332
+ mapping = {}
357
333
 
358
- # conversions
359
- Array(options[:conversions]).each do |conversions_field|
360
- mapping[conversions_field] = {
361
- type: "nested",
362
- properties: {
363
- query: {type: default_type, analyzer: "searchkick_keyword"},
364
- count: {type: "integer"}
365
- }
334
+ keyword_mapping = {type: "keyword"}
335
+ keyword_mapping[:ignore_above] = options[:ignore_above] || 30000
336
+
337
+ # conversions
338
+ Array(options[:conversions]).each do |conversions_field|
339
+ mapping[conversions_field] = {
340
+ type: "nested",
341
+ properties: {
342
+ query: {type: default_type, analyzer: "searchkick_keyword"},
343
+ count: {type: "integer"}
366
344
  }
367
- end
345
+ }
346
+ end
368
347
 
369
- mapping_options = Hash[
370
- [:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
371
- .map { |type| [type, (options[type] || []).map(&:to_s)] }
372
- ]
348
+ mapping_options = Hash[
349
+ [:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
350
+ .map { |type| [type, (options[type] || []).map(&:to_s)] }
351
+ ]
373
352
 
374
- word = options[:word] != false && (!options[:match] || options[:match] == :word)
353
+ word = options[:word] != false && (!options[:match] || options[:match] == :word)
375
354
 
376
- mapping_options[:searchable].delete("_all")
355
+ mapping_options[:searchable].delete("_all")
377
356
 
378
- analyzed_field_options = {type: default_type, index: true, analyzer: default_analyzer}
357
+ analyzed_field_options = {type: default_type, index: true, analyzer: default_analyzer}
379
358
 
380
- mapping_options.values.flatten.uniq.each do |field|
381
- fields = {}
359
+ mapping_options.values.flatten.uniq.each do |field|
360
+ fields = {}
382
361
 
383
- if options.key?(:filterable) && !mapping_options[:filterable].include?(field)
384
- fields[field] = {type: default_type, index: false}
385
- else
386
- fields[field] = keyword_mapping
387
- end
362
+ if options.key?(:filterable) && !mapping_options[:filterable].include?(field)
363
+ fields[field] = {type: default_type, index: false}
364
+ else
365
+ fields[field] = keyword_mapping
366
+ end
388
367
 
389
- if !options[:searchable] || mapping_options[:searchable].include?(field)
390
- if word
391
- fields[:analyzed] = analyzed_field_options
368
+ if !options[:searchable] || mapping_options[:searchable].include?(field)
369
+ if word
370
+ fields[:analyzed] = analyzed_field_options
392
371
 
393
- if mapping_options[:highlight].include?(field)
394
- fields[:analyzed][:term_vector] = "with_positions_offsets"
395
- end
372
+ if mapping_options[:highlight].include?(field)
373
+ fields[:analyzed][:term_vector] = "with_positions_offsets"
396
374
  end
375
+ end
397
376
 
398
- mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f|
399
- if options[:match] == type || f.include?(field)
400
- fields[type] = {type: default_type, index: true, analyzer: "searchkick_#{type}_index"}
401
- end
377
+ mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f|
378
+ if options[:match] == type || f.include?(field)
379
+ fields[type] = {type: default_type, index: true, analyzer: "searchkick_#{type}_index"}
402
380
  end
403
381
  end
404
-
405
- mapping[field] = fields[field].merge(fields: fields.except(field))
406
382
  end
407
383
 
408
- (options[:locations] || []).map(&:to_s).each do |field|
409
- mapping[field] = {
410
- type: "geo_point"
411
- }
412
- end
384
+ mapping[field] = fields[field].merge(fields: fields.except(field))
385
+ end
413
386
 
414
- options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array)
415
- (options[:geo_shape] || {}).each do |field, shape_options|
416
- mapping[field] = shape_options.merge(type: "geo_shape")
417
- end
387
+ (options[:locations] || []).map(&:to_s).each do |field|
388
+ mapping[field] = {
389
+ type: "geo_point"
390
+ }
391
+ end
418
392
 
419
- if options[:inheritance]
420
- mapping[:type] = keyword_mapping
421
- end
393
+ options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array)
394
+ (options[:geo_shape] || {}).each do |field, shape_options|
395
+ mapping[field] = shape_options.merge(type: "geo_shape")
396
+ end
422
397
 
423
- routing = {}
424
- if options[:routing]
425
- routing = {required: true}
426
- unless options[:routing] == true
427
- routing[:path] = options[:routing].to_s
428
- end
398
+ if options[:inheritance]
399
+ mapping[:type] = keyword_mapping
400
+ end
401
+
402
+ routing = {}
403
+ if options[:routing]
404
+ routing = {required: true}
405
+ unless options[:routing] == true
406
+ routing[:path] = options[:routing].to_s
429
407
  end
408
+ end
430
409
 
431
- dynamic_fields = {
432
- # analyzed field must be the default field for include_in_all
433
- # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
434
- # however, we can include the not_analyzed field in _all
435
- # and the _all index analyzer will take care of it
436
- "{name}" => keyword_mapping
437
- }
410
+ dynamic_fields = {
411
+ # analyzed field must be the default field for include_in_all
412
+ # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
413
+ # however, we can include the not_analyzed field in _all
414
+ # and the _all index analyzer will take care of it
415
+ "{name}" => keyword_mapping
416
+ }
438
417
 
439
- if options.key?(:filterable)
440
- dynamic_fields["{name}"] = {type: default_type, index: false}
441
- end
418
+ if options.key?(:filterable)
419
+ dynamic_fields["{name}"] = {type: default_type, index: false}
420
+ end
442
421
 
443
- unless options[:searchable]
444
- if options[:match] && options[:match] != :word
445
- dynamic_fields[options[:match]] = {type: default_type, index: true, analyzer: "searchkick_#{options[:match]}_index"}
446
- end
422
+ unless options[:searchable]
423
+ if options[:match] && options[:match] != :word
424
+ dynamic_fields[options[:match]] = {type: default_type, index: true, analyzer: "searchkick_#{options[:match]}_index"}
425
+ end
447
426
 
448
- if word
449
- dynamic_fields[:analyzed] = analyzed_field_options
450
- end
427
+ if word
428
+ dynamic_fields[:analyzed] = analyzed_field_options
451
429
  end
430
+ end
452
431
 
453
- # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
454
- multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
455
-
456
- mappings = {
457
- properties: mapping,
458
- _routing: routing,
459
- # https://gist.github.com/kimchy/2898285
460
- dynamic_templates: [
461
- {
462
- string_template: {
463
- match: "*",
464
- match_mapping_type: "string",
465
- mapping: multi_field
466
- }
432
+ # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
433
+ multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
434
+
435
+ mappings = {
436
+ properties: mapping,
437
+ _routing: routing,
438
+ # https://gist.github.com/kimchy/2898285
439
+ dynamic_templates: [
440
+ {
441
+ string_template: {
442
+ match: "*",
443
+ match_mapping_type: "string",
444
+ mapping: multi_field
467
445
  }
468
- ]
469
- }
446
+ }
447
+ ]
448
+ }
470
449
 
471
- if below70
472
- mappings = {index_type => mappings}
473
- end
450
+ if below70?
451
+ mappings = {index_type => mappings}
452
+ end
453
+
454
+ mappings
455
+ end
474
456
 
475
- mappings = mappings.symbolize_keys.deep_merge(custom_mapping.symbolize_keys)
457
+ def add_synonyms(settings)
458
+ synonyms = options[:synonyms] || []
459
+ synonyms = synonyms.call if synonyms.respond_to?(:call)
460
+ if synonyms.any?
461
+ settings[:analysis][:filter][:searchkick_synonym] = {
462
+ type: "synonym",
463
+ # only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently
464
+ synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)
465
+ }
466
+ # choosing a place for the synonym filter when stemming is not easy
467
+ # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
468
+ # TODO use a snowball stemmer on synonyms when creating the token filter
469
+
470
+ # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
471
+ # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
472
+ # - Only apply the synonym expansion at index time
473
+ # - Don't have the synonym filter applied search
474
+ # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
475
+ settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")
476
+
477
+ %w(word_start word_middle word_end).each do |type|
478
+ settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
479
+ end
476
480
  end
481
+ end
477
482
 
478
- if options[:deep_paging]
479
- if !settings.dig(:index, :max_result_window) && !settings[:"index.max_result_window"]
480
- settings[:index] ||= {}
481
- settings[:index][:max_result_window] = 1_000_000_000
483
+ def add_search_synonyms(settings)
484
+ search_synonyms = options[:search_synonyms] || []
485
+ search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call)
486
+ if search_synonyms.is_a?(String) || search_synonyms.any?
487
+ if search_synonyms.is_a?(String)
488
+ synonym_graph = {
489
+ type: "synonym_graph",
490
+ synonyms_path: search_synonyms
491
+ }
492
+ synonym_graph[:updateable] = true unless below73?
493
+ else
494
+ synonym_graph = {
495
+ type: "synonym_graph",
496
+ # TODO confirm this is correct
497
+ synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
498
+ }
499
+ end
500
+ settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
501
+
502
+ [:searchkick_search2, :searchkick_word_search].each do |analyzer|
503
+ settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
482
504
  end
483
505
  end
506
+ end
484
507
 
485
- {
486
- settings: settings,
487
- mappings: mappings
508
+ def add_wordnet(settings)
509
+ settings[:analysis][:filter][:searchkick_wordnet] = {
510
+ type: "synonym",
511
+ format: "wordnet",
512
+ synonyms_path: Searchkick.wordnet_path
488
513
  }
514
+
515
+ settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
516
+ settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
517
+
518
+ %w(word_start word_middle word_end).each do |type|
519
+ settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
520
+ end
521
+ end
522
+
523
+ def set_deep_paging(settings)
524
+ if !settings.dig(:index, :max_result_window) && !settings[:"index.max_result_window"]
525
+ settings[:index] ||= {}
526
+ settings[:index][:max_result_window] = 1_000_000_000
527
+ end
528
+ end
529
+
530
+ def index_type
531
+ @index_type ||= begin
532
+ index_type = options[:_type]
533
+ index_type = index_type.call if index_type.respond_to?(:call)
534
+ index_type
535
+ end
536
+ end
537
+
538
+ def default_type
539
+ "text"
540
+ end
541
+
542
+ def default_analyzer
543
+ :searchkick_index
544
+ end
545
+
546
+ def below62?
547
+ Searchkick.server_below?("6.2.0")
548
+ end
549
+
550
+ def below70?
551
+ Searchkick.server_below?("7.0.0")
552
+ end
553
+
554
+ def below73?
555
+ Searchkick.server_below?("7.3.0")
489
556
  end
490
557
  end
491
558
  end