searchkick 4.2.0 → 4.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,10 @@
1
+ # dependencies
1
2
  require "active_support"
2
3
  require "active_support/core_ext/hash/deep_merge"
3
4
  require "elasticsearch"
4
5
  require "hashie"
5
6
 
7
+ # modules
6
8
  require "searchkick/bulk_indexer"
7
9
  require "searchkick/index"
8
10
  require "searchkick/indexer"
@@ -17,6 +19,7 @@ require "searchkick/record_indexer"
17
19
  require "searchkick/results"
18
20
  require "searchkick/version"
19
21
 
22
+ # integrations
20
23
  require "searchkick/railtie" if defined?(Rails)
21
24
  require "searchkick/logging" if defined?(ActiveSupport::Notifications)
22
25
 
@@ -27,6 +30,7 @@ module Searchkick
27
30
  autoload :ProcessQueueJob, "searchkick/process_queue_job"
28
31
  autoload :ReindexV2Job, "searchkick/reindex_v2_job"
29
32
 
33
+ # errors
30
34
  class Error < StandardError; end
31
35
  class MissingIndexError < Error; end
32
36
  class UnsupportedVersionError < Error; end
@@ -112,7 +116,7 @@ module Searchkick
112
116
  end
113
117
 
114
118
  options = options.merge(block: block) if block
115
- query = Searchkick::Query.new(klass, term, options)
119
+ query = Searchkick::Query.new(klass, term, **options)
116
120
  if options[:execute] == false
117
121
  query
118
122
  else
@@ -142,7 +146,7 @@ module Searchkick
142
146
  end
143
147
  end
144
148
 
145
- def self.callbacks(value)
149
+ def self.callbacks(value = nil)
146
150
  if block_given?
147
151
  previous_value = callbacks_value
148
152
  begin
@@ -160,6 +164,7 @@ module Searchkick
160
164
 
161
165
  def self.aws_credentials=(creds)
162
166
  begin
167
+ # TODO remove in Searchkick 5 (just use aws_sigv4)
163
168
  require "faraday_middleware/aws_signers_v4"
164
169
  rescue LoadError
165
170
  require "faraday_middleware/aws_sigv4"
@@ -169,17 +174,16 @@ module Searchkick
169
174
  end
170
175
 
171
176
  def self.reindex_status(index_name)
172
- if redis
173
- batches_left = Searchkick::Index.new(index_name).batches_left
174
- {
175
- completed: batches_left == 0,
176
- batches_left: batches_left
177
- }
178
- else
179
- raise Searchkick::Error, "Redis not configured"
180
- end
177
+ raise Searchkick::Error, "Redis not configured" unless redis
178
+
179
+ batches_left = Searchkick::Index.new(index_name).batches_left
180
+ {
181
+ completed: batches_left == 0,
182
+ batches_left: batches_left
183
+ }
181
184
  end
182
185
 
186
+ # TODO use ConnectionPool::Wrapper when redis is set so this is no longer needed
183
187
  def self.with_redis
184
188
  if redis
185
189
  if redis.respond_to?(:with)
@@ -249,11 +253,26 @@ module Searchkick
249
253
  }
250
254
  end
251
255
  end
256
+
257
+ # private
258
+ # methods are forwarded to base class
259
+ # this check to see if scope exists on that class
260
+ # it's a bit tricky, but this seems to work
261
+ def self.relation?(klass)
262
+ if klass.respond_to?(:current_scope)
263
+ !klass.current_scope.nil?
264
+ elsif defined?(Mongoid::Threaded)
265
+ !Mongoid::Threaded.current_scope(klass).nil?
266
+ end
267
+ end
252
268
  end
253
269
 
254
- # TODO find better ActiveModel hook
255
270
  require "active_model/callbacks"
256
271
  ActiveModel::Callbacks.include(Searchkick::Model)
272
+ # TODO use
273
+ # ActiveSupport.on_load(:mongoid) do
274
+ # Mongoid::Document::ClassMethods.include Searchkick::Model
275
+ # end
257
276
 
258
277
  ActiveSupport.on_load(:active_record) do
259
278
  extend Searchkick::Model
@@ -141,7 +141,7 @@ module Searchkick
141
141
 
142
142
  def bulk_reindex_job(scope, batch_id, options)
143
143
  Searchkick.with_redis { |r| r.sadd(batches_key, batch_id) }
144
- Searchkick::BulkReindexJob.perform_later({
144
+ Searchkick::BulkReindexJob.perform_later(**{
145
145
  class_name: scope.searchkick_options[:class_name],
146
146
  index_name: index.name,
147
147
  batch_id: batch_id
@@ -2,8 +2,6 @@ require "searchkick/index_options"
2
2
 
3
3
  module Searchkick
4
4
  class Index
5
- include IndexOptions
6
-
7
5
  attr_reader :name, :options
8
6
 
9
7
  def initialize(name, options = {})
@@ -12,6 +10,10 @@ module Searchkick
12
10
  @klass_document_type = {} # cache
13
11
  end
14
12
 
13
+ def index_options
14
+ IndexOptions.new(self).index_options
15
+ end
16
+
15
17
  def create(body = {})
16
18
  client.indices.create index: name, body: body
17
19
  end
@@ -174,6 +176,17 @@ module Searchkick
174
176
  Searchkick.search(like_text, model: record.class, **options)
175
177
  end
176
178
 
179
+ def reload_synonyms
180
+ require "elasticsearch/xpack"
181
+ raise Error, "Requires Elasticsearch 7.3+" if Searchkick.server_below?("7.3.0")
182
+ raise Error, "Requires elasticsearch-xpack 7.8+" unless client.xpack.respond_to?(:indices)
183
+ begin
184
+ client.xpack.indices.reload_search_analyzers(index: name)
185
+ rescue Elasticsearch::Transport::Transport::Errors::MethodNotAllowed
186
+ raise Error, "Requires non-OSS version of Elasticsearch"
187
+ end
188
+ end
189
+
177
190
  # queue
178
191
 
179
192
  def reindex_queue
@@ -184,13 +197,20 @@ module Searchkick
184
197
 
185
198
  def reindex(relation, method_name, scoped:, full: false, scope: nil, **options)
186
199
  refresh = options.fetch(:refresh, !scoped)
200
+ options.delete(:refresh)
187
201
 
188
202
  if method_name
203
+ # TODO throw ArgumentError
204
+ Searchkick.warn("unsupported keywords: #{options.keys.map(&:inspect).join(", ")}") if options.any?
205
+
189
206
  # update
190
207
  import_scope(relation, method_name: method_name, scope: scope)
191
208
  self.refresh if refresh
192
209
  true
193
210
  elsif scoped && !full
211
+ # TODO throw ArgumentError
212
+ Searchkick.warn("unsupported keywords: #{options.keys.map(&:inspect).join(", ")}") if options.any?
213
+
194
214
  # reindex association
195
215
  import_scope(relation, scope: scope)
196
216
  self.refresh if refresh
@@ -1,20 +1,14 @@
1
1
  module Searchkick
2
- module IndexOptions
3
- def index_options
4
- options = @options
5
- language = options[:language]
6
- language = language.call if language.respond_to?(:call)
2
+ class IndexOptions
3
+ attr_reader :options
7
4
 
8
- below62 = Searchkick.server_below?("6.2.0")
9
- below70 = Searchkick.server_below?("7.0.0")
10
-
11
- if below70
12
- index_type = options[:_type]
13
- index_type = index_type.call if index_type.respond_to?(:call)
14
- end
5
+ def initialize(index)
6
+ @options = index.options
7
+ end
15
8
 
9
+ def index_options
16
10
  custom_mapping = options[:mappings] || {}
17
- if below70 && custom_mapping.keys.map(&:to_sym).include?(:properties)
11
+ if below70? && custom_mapping.keys.map(&:to_sym).include?(:properties)
18
12
  # add type
19
13
  custom_mapping = {index_type => custom_mapping}
20
14
  end
@@ -23,450 +17,542 @@ module Searchkick
23
17
  settings = options[:settings] || {}
24
18
  mappings = custom_mapping
25
19
  else
26
- default_type = "text"
27
- default_analyzer = :searchkick_index
28
- keyword_mapping = {type: "keyword"}
29
-
30
- index_true_value = true
31
- index_false_value = false
32
-
33
- keyword_mapping[:ignore_above] = options[:ignore_above] || 30000
34
-
35
- settings = {
36
- analysis: {
37
- analyzer: {
38
- searchkick_keyword: {
39
- type: "custom",
40
- tokenizer: "keyword",
41
- filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
42
- },
43
- default_analyzer => {
44
- type: "custom",
45
- # character filters -> tokenizer -> token filters
46
- # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
47
- char_filter: ["ampersand"],
48
- tokenizer: "standard",
49
- # synonym should come last, after stemming and shingle
50
- # shingle must come before searchkick_stemmer
51
- filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
52
- },
53
- searchkick_search: {
54
- type: "custom",
55
- char_filter: ["ampersand"],
56
- tokenizer: "standard",
57
- filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
58
- },
59
- searchkick_search2: {
60
- type: "custom",
61
- char_filter: ["ampersand"],
62
- tokenizer: "standard",
63
- filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
64
- },
65
- # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
66
- searchkick_autocomplete_search: {
67
- type: "custom",
68
- tokenizer: "keyword",
69
- filter: ["lowercase", "asciifolding"]
70
- },
71
- searchkick_word_search: {
72
- type: "custom",
73
- tokenizer: "standard",
74
- filter: ["lowercase", "asciifolding"]
75
- },
76
- searchkick_suggest_index: {
77
- type: "custom",
78
- tokenizer: "standard",
79
- filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
80
- },
81
- searchkick_text_start_index: {
82
- type: "custom",
83
- tokenizer: "keyword",
84
- filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
85
- },
86
- searchkick_text_middle_index: {
87
- type: "custom",
88
- tokenizer: "keyword",
89
- filter: ["lowercase", "asciifolding", "searchkick_ngram"]
90
- },
91
- searchkick_text_end_index: {
92
- type: "custom",
93
- tokenizer: "keyword",
94
- filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
95
- },
96
- searchkick_word_start_index: {
97
- type: "custom",
98
- tokenizer: "standard",
99
- filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
100
- },
101
- searchkick_word_middle_index: {
102
- type: "custom",
103
- tokenizer: "standard",
104
- filter: ["lowercase", "asciifolding", "searchkick_ngram"]
105
- },
106
- searchkick_word_end_index: {
107
- type: "custom",
108
- tokenizer: "standard",
109
- filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
110
- }
111
- },
112
- filter: {
113
- searchkick_index_shingle: {
114
- type: "shingle",
115
- token_separator: ""
116
- },
117
- # lucky find https://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
118
- searchkick_search_shingle: {
119
- type: "shingle",
120
- token_separator: "",
121
- output_unigrams: false,
122
- output_unigrams_if_no_shingles: true
123
- },
124
- searchkick_suggest_shingle: {
125
- type: "shingle",
126
- max_shingle_size: 5
127
- },
128
- searchkick_edge_ngram: {
129
- type: "edgeNGram",
130
- min_gram: 1,
131
- max_gram: 50
132
- },
133
- searchkick_ngram: {
134
- type: "nGram",
135
- min_gram: 1,
136
- max_gram: 50
137
- },
138
- searchkick_stemmer: {
139
- # use stemmer if language is lowercase, snowball otherwise
140
- type: language == language.to_s.downcase ? "stemmer" : "snowball",
141
- language: language || "English"
142
- }
143
- },
144
- char_filter: {
145
- # https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
146
- # &_to_and
147
- ampersand: {
148
- type: "mapping",
149
- mappings: ["&=> and "]
150
- }
151
- }
152
- }
153
- }
20
+ settings = generate_settings
21
+ mappings = generate_mappings.symbolize_keys.deep_merge(custom_mapping.symbolize_keys)
22
+ end
23
+
24
+ set_deep_paging(settings) if options[:deep_paging]
25
+
26
+ {
27
+ settings: settings,
28
+ mappings: mappings
29
+ }
30
+ end
154
31
 
155
- stem = options[:stem]
32
+ def generate_settings
33
+ language = options[:language]
34
+ language = language.call if language.respond_to?(:call)
156
35
 
157
- case language
158
- when "chinese"
159
- settings[:analysis][:analyzer].merge!(
36
+ settings = {
37
+ analysis: {
38
+ analyzer: {
39
+ searchkick_keyword: {
40
+ type: "custom",
41
+ tokenizer: "keyword",
42
+ filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
43
+ },
160
44
  default_analyzer => {
161
- type: "ik_smart"
45
+ type: "custom",
46
+ # character filters -> tokenizer -> token filters
47
+ # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
48
+ char_filter: ["ampersand"],
49
+ tokenizer: "standard",
50
+ # synonym should come last, after stemming and shingle
51
+ # shingle must come before searchkick_stemmer
52
+ filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
162
53
  },
163
54
  searchkick_search: {
164
- type: "ik_smart"
55
+ type: "custom",
56
+ char_filter: ["ampersand"],
57
+ tokenizer: "standard",
58
+ filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
165
59
  },
166
60
  searchkick_search2: {
167
- type: "ik_max_word"
168
- }
169
- )
170
-
171
- stem = false
172
- when "chinese2", "smartcn"
173
- settings[:analysis][:analyzer].merge!(
174
- default_analyzer => {
175
- type: "smartcn"
61
+ type: "custom",
62
+ char_filter: ["ampersand"],
63
+ tokenizer: "standard",
64
+ filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
176
65
  },
177
- searchkick_search: {
178
- type: "smartcn"
66
+ # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
67
+ searchkick_autocomplete_search: {
68
+ type: "custom",
69
+ tokenizer: "keyword",
70
+ filter: ["lowercase", "asciifolding"]
179
71
  },
180
- searchkick_search2: {
181
- type: "smartcn"
182
- }
183
- )
184
-
185
- stem = false
186
- when "japanese"
187
- settings[:analysis][:analyzer].merge!(
188
- default_analyzer => {
189
- type: "kuromoji"
72
+ searchkick_word_search: {
73
+ type: "custom",
74
+ tokenizer: "standard",
75
+ filter: ["lowercase", "asciifolding"]
190
76
  },
191
- searchkick_search: {
192
- type: "kuromoji"
77
+ searchkick_suggest_index: {
78
+ type: "custom",
79
+ tokenizer: "standard",
80
+ filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
193
81
  },
194
- searchkick_search2: {
195
- type: "kuromoji"
196
- }
197
- )
198
-
199
- stem = false
200
- when "korean"
201
- settings[:analysis][:analyzer].merge!(
202
- default_analyzer => {
203
- type: "openkoreantext-analyzer"
82
+ searchkick_text_start_index: {
83
+ type: "custom",
84
+ tokenizer: "keyword",
85
+ filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
204
86
  },
205
- searchkick_search: {
206
- type: "openkoreantext-analyzer"
87
+ searchkick_text_middle_index: {
88
+ type: "custom",
89
+ tokenizer: "keyword",
90
+ filter: ["lowercase", "asciifolding", "searchkick_ngram"]
207
91
  },
208
- searchkick_search2: {
209
- type: "openkoreantext-analyzer"
210
- }
211
- )
212
-
213
- stem = false
214
- when "korean2"
215
- settings[:analysis][:analyzer].merge!(
216
- default_analyzer => {
217
- type: "nori"
92
+ searchkick_text_end_index: {
93
+ type: "custom",
94
+ tokenizer: "keyword",
95
+ filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
218
96
  },
219
- searchkick_search: {
220
- type: "nori"
97
+ searchkick_word_start_index: {
98
+ type: "custom",
99
+ tokenizer: "standard",
100
+ filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
221
101
  },
222
- searchkick_search2: {
223
- type: "nori"
102
+ searchkick_word_middle_index: {
103
+ type: "custom",
104
+ tokenizer: "standard",
105
+ filter: ["lowercase", "asciifolding", "searchkick_ngram"]
106
+ },
107
+ searchkick_word_end_index: {
108
+ type: "custom",
109
+ tokenizer: "standard",
110
+ filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
224
111
  }
225
- )
226
-
227
- stem = false
228
- when "vietnamese"
229
- settings[:analysis][:analyzer].merge!(
230
- default_analyzer => {
231
- type: "vi_analyzer"
112
+ },
113
+ filter: {
114
+ searchkick_index_shingle: {
115
+ type: "shingle",
116
+ token_separator: ""
232
117
  },
233
- searchkick_search: {
234
- type: "vi_analyzer"
118
+ # lucky find https://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
119
+ searchkick_search_shingle: {
120
+ type: "shingle",
121
+ token_separator: "",
122
+ output_unigrams: false,
123
+ output_unigrams_if_no_shingles: true
235
124
  },
236
- searchkick_search2: {
237
- type: "vi_analyzer"
238
- }
239
- )
240
-
241
- stem = false
242
- when "polish", "ukrainian"
243
- settings[:analysis][:analyzer].merge!(
244
- default_analyzer => {
245
- type: language
125
+ searchkick_suggest_shingle: {
126
+ type: "shingle",
127
+ max_shingle_size: 5
246
128
  },
247
- searchkick_search: {
248
- type: language
129
+ searchkick_edge_ngram: {
130
+ type: "edge_ngram",
131
+ min_gram: 1,
132
+ max_gram: 50
249
133
  },
250
- searchkick_search2: {
251
- type: language
134
+ searchkick_ngram: {
135
+ type: "ngram",
136
+ min_gram: 1,
137
+ max_gram: 50
138
+ },
139
+ searchkick_stemmer: {
140
+ # use stemmer if language is lowercase, snowball otherwise
141
+ type: language == language.to_s.downcase ? "stemmer" : "snowball",
142
+ language: language || "English"
143
+ }
144
+ },
145
+ char_filter: {
146
+ # https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
147
+ # &_to_and
148
+ ampersand: {
149
+ type: "mapping",
150
+ mappings: ["&=> and "]
252
151
  }
253
- )
152
+ }
153
+ }
154
+ }
254
155
 
255
- stem = false
256
- end
156
+ update_language(settings, language)
157
+ update_stemming(settings)
257
158
 
258
- if Searchkick.env == "test"
259
- settings[:number_of_shards] = 1
260
- settings[:number_of_replicas] = 0
261
- end
159
+ if Searchkick.env == "test"
160
+ settings[:number_of_shards] = 1
161
+ settings[:number_of_replicas] = 0
162
+ end
262
163
 
263
- if options[:similarity]
264
- settings[:similarity] = {default: {type: options[:similarity]}}
265
- end
164
+ # TODO remove in Searchkick 5 (classic no longer supported)
165
+ if options[:similarity]
166
+ settings[:similarity] = {default: {type: options[:similarity]}}
167
+ end
266
168
 
267
- unless below62
268
- settings[:index] = {
269
- max_ngram_diff: 49,
270
- max_shingle_diff: 4
271
- }
272
- end
169
+ unless below62?
170
+ settings[:index] = {
171
+ max_ngram_diff: 49,
172
+ max_shingle_diff: 4
173
+ }
174
+ end
273
175
 
274
- if options[:case_sensitive]
275
- settings[:analysis][:analyzer].each do |_, analyzer|
276
- analyzer[:filter].delete("lowercase")
277
- end
176
+ if options[:case_sensitive]
177
+ settings[:analysis][:analyzer].each do |_, analyzer|
178
+ analyzer[:filter].delete("lowercase")
278
179
  end
180
+ end
279
181
 
280
- if stem == false
281
- settings[:analysis][:filter].delete(:searchkick_stemmer)
282
- settings[:analysis][:analyzer].each do |_, analyzer|
283
- analyzer[:filter].delete("searchkick_stemmer") if analyzer[:filter]
284
- end
285
- end
182
+ # TODO do this last in Searchkick 5
183
+ settings = settings.symbolize_keys.deep_merge((options[:settings] || {}).symbolize_keys)
286
184
 
287
- settings = settings.symbolize_keys.deep_merge((options[:settings] || {}).symbolize_keys)
185
+ add_synonyms(settings)
186
+ add_search_synonyms(settings)
187
+ # TODO remove in Searchkick 5
188
+ add_wordnet(settings) if options[:wordnet]
288
189
 
289
- # synonyms
290
- synonyms = options[:synonyms] || []
190
+ if options[:special_characters] == false
191
+ settings[:analysis][:analyzer].each_value do |analyzer_settings|
192
+ analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
193
+ end
194
+ end
291
195
 
292
- synonyms = synonyms.call if synonyms.respond_to?(:call)
196
+ settings
197
+ end
293
198
 
294
- if synonyms.any?
295
- settings[:analysis][:filter][:searchkick_synonym] = {
296
- type: "synonym",
297
- # only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently
298
- synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)
199
+ def update_language(settings, language)
200
+ case language
201
+ when "chinese"
202
+ settings[:analysis][:analyzer].merge!(
203
+ default_analyzer => {
204
+ type: "ik_smart"
205
+ },
206
+ searchkick_search: {
207
+ type: "ik_smart"
208
+ },
209
+ searchkick_search2: {
210
+ type: "ik_max_word"
299
211
  }
300
- # choosing a place for the synonym filter when stemming is not easy
301
- # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
302
- # TODO use a snowball stemmer on synonyms when creating the token filter
303
-
304
- # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
305
- # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
306
- # - Only apply the synonym expansion at index time
307
- # - Don't have the synonym filter applied search
308
- # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
309
- settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")
310
-
311
- %w(word_start word_middle word_end).each do |type|
312
- settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
313
- end
314
- end
315
-
316
- if options[:wordnet]
317
- settings[:analysis][:filter][:searchkick_wordnet] = {
318
- type: "synonym",
319
- format: "wordnet",
320
- synonyms_path: Searchkick.wordnet_path
212
+ )
213
+ when "chinese2", "smartcn"
214
+ settings[:analysis][:analyzer].merge!(
215
+ default_analyzer => {
216
+ type: "smartcn"
217
+ },
218
+ searchkick_search: {
219
+ type: "smartcn"
220
+ },
221
+ searchkick_search2: {
222
+ type: "smartcn"
223
+ }
224
+ )
225
+ when "japanese"
226
+ settings[:analysis][:analyzer].merge!(
227
+ default_analyzer => {
228
+ type: "kuromoji"
229
+ },
230
+ searchkick_search: {
231
+ type: "kuromoji"
232
+ },
233
+ searchkick_search2: {
234
+ type: "kuromoji"
235
+ }
236
+ )
237
+ when "korean"
238
+ settings[:analysis][:analyzer].merge!(
239
+ default_analyzer => {
240
+ type: "openkoreantext-analyzer"
241
+ },
242
+ searchkick_search: {
243
+ type: "openkoreantext-analyzer"
244
+ },
245
+ searchkick_search2: {
246
+ type: "openkoreantext-analyzer"
247
+ }
248
+ )
249
+ when "korean2"
250
+ settings[:analysis][:analyzer].merge!(
251
+ default_analyzer => {
252
+ type: "nori"
253
+ },
254
+ searchkick_search: {
255
+ type: "nori"
256
+ },
257
+ searchkick_search2: {
258
+ type: "nori"
321
259
  }
260
+ )
261
+ when "vietnamese"
262
+ settings[:analysis][:analyzer].merge!(
263
+ default_analyzer => {
264
+ type: "vi_analyzer"
265
+ },
266
+ searchkick_search: {
267
+ type: "vi_analyzer"
268
+ },
269
+ searchkick_search2: {
270
+ type: "vi_analyzer"
271
+ }
272
+ )
273
+ when "polish", "ukrainian"
274
+ settings[:analysis][:analyzer].merge!(
275
+ default_analyzer => {
276
+ type: language
277
+ },
278
+ searchkick_search: {
279
+ type: language
280
+ },
281
+ searchkick_search2: {
282
+ type: language
283
+ }
284
+ )
285
+ end
286
+ end
322
287
 
323
- settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
324
- settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
288
+ def update_stemming(settings)
289
+ stem = options[:stem]
325
290
 
326
- %w(word_start word_middle word_end).each do |type|
327
- settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
328
- end
329
- end
291
+ # language analyzer used
292
+ stem = false if settings[:analysis][:analyzer][default_analyzer][:type] != "custom"
330
293
 
331
- if options[:special_characters] == false
332
- settings[:analysis][:analyzer].each_value do |analyzer_settings|
333
- analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
334
- end
294
+ if stem == false
295
+ settings[:analysis][:filter].delete(:searchkick_stemmer)
296
+ settings[:analysis][:analyzer].each do |_, analyzer|
297
+ analyzer[:filter].delete("searchkick_stemmer") if analyzer[:filter]
335
298
  end
299
+ end
336
300
 
337
- mapping = {}
301
+ if options[:stemmer_override]
302
+ stemmer_override = {
303
+ type: "stemmer_override"
304
+ }
305
+ if options[:stemmer_override].is_a?(String)
306
+ stemmer_override[:rules_path] = options[:stemmer_override]
307
+ else
308
+ stemmer_override[:rules] = options[:stemmer_override]
309
+ end
310
+ settings[:analysis][:filter][:searchkick_stemmer_override] = stemmer_override
338
311
 
339
- # conversions
340
- Array(options[:conversions]).each do |conversions_field|
341
- mapping[conversions_field] = {
342
- type: "nested",
343
- properties: {
344
- query: {type: default_type, analyzer: "searchkick_keyword"},
345
- count: {type: "integer"}
346
- }
347
- }
312
+ settings[:analysis][:analyzer].each do |_, analyzer|
313
+ stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
314
+ analyzer[:filter].insert(stemmer_index, "searchkick_stemmer_override") if stemmer_index
348
315
  end
316
+ end
349
317
 
350
- mapping_options = Hash[
351
- [:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
352
- .map { |type| [type, (options[type] || []).map(&:to_s)] }
353
- ]
318
+ if options[:stem_exclusion]
319
+ settings[:analysis][:filter][:searchkick_stem_exclusion] = {
320
+ type: "keyword_marker",
321
+ keywords: options[:stem_exclusion]
322
+ }
354
323
 
355
- word = options[:word] != false && (!options[:match] || options[:match] == :word)
324
+ settings[:analysis][:analyzer].each do |_, analyzer|
325
+ stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
326
+ analyzer[:filter].insert(stemmer_index, "searchkick_stem_exclusion") if stemmer_index
327
+ end
328
+ end
329
+ end
356
330
 
357
- mapping_options[:searchable].delete("_all")
331
+ def generate_mappings
332
+ mapping = {}
358
333
 
359
- analyzed_field_options = {type: default_type, index: index_true_value, analyzer: default_analyzer}
334
+ keyword_mapping = {type: "keyword"}
335
+ keyword_mapping[:ignore_above] = options[:ignore_above] || 30000
360
336
 
361
- mapping_options.values.flatten.uniq.each do |field|
362
- fields = {}
337
+ # conversions
338
+ Array(options[:conversions]).each do |conversions_field|
339
+ mapping[conversions_field] = {
340
+ type: "nested",
341
+ properties: {
342
+ query: {type: default_type, analyzer: "searchkick_keyword"},
343
+ count: {type: "integer"}
344
+ }
345
+ }
346
+ end
363
347
 
364
- if options.key?(:filterable) && !mapping_options[:filterable].include?(field)
365
- fields[field] = {type: default_type, index: index_false_value}
366
- else
367
- fields[field] = keyword_mapping
368
- end
348
+ mapping_options = Hash[
349
+ [:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
350
+ .map { |type| [type, (options[type] || []).map(&:to_s)] }
351
+ ]
369
352
 
370
- if !options[:searchable] || mapping_options[:searchable].include?(field)
371
- if word
372
- fields[:analyzed] = analyzed_field_options
353
+ word = options[:word] != false && (!options[:match] || options[:match] == :word)
373
354
 
374
- if mapping_options[:highlight].include?(field)
375
- fields[:analyzed][:term_vector] = "with_positions_offsets"
376
- end
377
- end
355
+ mapping_options[:searchable].delete("_all")
378
356
 
379
- mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f|
380
- if options[:match] == type || f.include?(field)
381
- fields[type] = {type: default_type, index: index_true_value, analyzer: "searchkick_#{type}_index"}
382
- end
383
- end
384
- end
357
+ analyzed_field_options = {type: default_type, index: true, analyzer: default_analyzer}
385
358
 
386
- mapping[field] = fields[field].merge(fields: fields.except(field))
387
- end
359
+ mapping_options.values.flatten.uniq.each do |field|
360
+ fields = {}
388
361
 
389
- (options[:locations] || []).map(&:to_s).each do |field|
390
- mapping[field] = {
391
- type: "geo_point"
392
- }
362
+ if options.key?(:filterable) && !mapping_options[:filterable].include?(field)
363
+ fields[field] = {type: default_type, index: false}
364
+ else
365
+ fields[field] = keyword_mapping
393
366
  end
394
367
 
395
- options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array)
396
- (options[:geo_shape] || {}).each do |field, shape_options|
397
- mapping[field] = shape_options.merge(type: "geo_shape")
398
- end
368
+ if !options[:searchable] || mapping_options[:searchable].include?(field)
369
+ if word
370
+ fields[:analyzed] = analyzed_field_options
399
371
 
400
- if options[:inheritance]
401
- mapping[:type] = keyword_mapping
402
- end
372
+ if mapping_options[:highlight].include?(field)
373
+ fields[:analyzed][:term_vector] = "with_positions_offsets"
374
+ end
375
+ end
403
376
 
404
- routing = {}
405
- if options[:routing]
406
- routing = {required: true}
407
- unless options[:routing] == true
408
- routing[:path] = options[:routing].to_s
377
+ mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f|
378
+ if options[:match] == type || f.include?(field)
379
+ fields[type] = {type: default_type, index: true, analyzer: "searchkick_#{type}_index"}
380
+ end
409
381
  end
410
382
  end
411
383
 
412
- dynamic_fields = {
413
- # analyzed field must be the default field for include_in_all
414
- # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
415
- # however, we can include the not_analyzed field in _all
416
- # and the _all index analyzer will take care of it
417
- "{name}" => keyword_mapping
384
+ mapping[field] = fields[field].merge(fields: fields.except(field))
385
+ end
386
+
387
+ (options[:locations] || []).map(&:to_s).each do |field|
388
+ mapping[field] = {
389
+ type: "geo_point"
418
390
  }
391
+ end
419
392
 
420
- if options.key?(:filterable)
421
- dynamic_fields["{name}"] = {type: default_type, index: index_false_value}
422
- end
393
+ options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array)
394
+ (options[:geo_shape] || {}).each do |field, shape_options|
395
+ mapping[field] = shape_options.merge(type: "geo_shape")
396
+ end
423
397
 
424
- unless options[:searchable]
425
- if options[:match] && options[:match] != :word
426
- dynamic_fields[options[:match]] = {type: default_type, index: index_true_value, analyzer: "searchkick_#{options[:match]}_index"}
427
- end
398
+ if options[:inheritance]
399
+ mapping[:type] = keyword_mapping
400
+ end
428
401
 
429
- if word
430
- dynamic_fields[:analyzed] = analyzed_field_options
431
- end
402
+ routing = {}
403
+ if options[:routing]
404
+ routing = {required: true}
405
+ unless options[:routing] == true
406
+ routing[:path] = options[:routing].to_s
432
407
  end
408
+ end
433
409
 
410
+ dynamic_fields = {
411
+ # analyzed field must be the default field for include_in_all
434
412
  # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
435
- multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
436
-
437
- mappings = {
438
- properties: mapping,
439
- _routing: routing,
440
- # https://gist.github.com/kimchy/2898285
441
- dynamic_templates: [
442
- {
443
- string_template: {
444
- match: "*",
445
- match_mapping_type: "string",
446
- mapping: multi_field
447
- }
448
- }
449
- ]
450
- }
413
+ # however, we can include the not_analyzed field in _all
414
+ # and the _all index analyzer will take care of it
415
+ "{name}" => keyword_mapping
416
+ }
451
417
 
452
- if below70
453
- mappings = {index_type => mappings}
418
+ if options.key?(:filterable)
419
+ dynamic_fields["{name}"] = {type: default_type, index: false}
420
+ end
421
+
422
+ unless options[:searchable]
423
+ if options[:match] && options[:match] != :word
424
+ dynamic_fields[options[:match]] = {type: default_type, index: true, analyzer: "searchkick_#{options[:match]}_index"}
454
425
  end
455
426
 
456
- mappings = mappings.symbolize_keys.deep_merge(custom_mapping.symbolize_keys)
427
+ if word
428
+ dynamic_fields[:analyzed] = analyzed_field_options
429
+ end
457
430
  end
458
431
 
459
- if options[:deep_paging]
460
- if !settings.dig(:index, :max_result_window) && !settings[:"index.max_result_window"]
461
- settings[:index] ||= {}
462
- settings[:index][:max_result_window] = 1_000_000_000
432
+ # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
433
+ multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
434
+
435
+ mappings = {
436
+ properties: mapping,
437
+ _routing: routing,
438
+ # https://gist.github.com/kimchy/2898285
439
+ dynamic_templates: [
440
+ {
441
+ string_template: {
442
+ match: "*",
443
+ match_mapping_type: "string",
444
+ mapping: multi_field
445
+ }
446
+ }
447
+ ]
448
+ }
449
+
450
+ if below70?
451
+ mappings = {index_type => mappings}
452
+ end
453
+
454
+ mappings
455
+ end
456
+
457
+ def add_synonyms(settings)
458
+ synonyms = options[:synonyms] || []
459
+ synonyms = synonyms.call if synonyms.respond_to?(:call)
460
+ if synonyms.any?
461
+ settings[:analysis][:filter][:searchkick_synonym] = {
462
+ type: "synonym",
463
+ # only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently
464
+ synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)
465
+ }
466
+ # choosing a place for the synonym filter when stemming is not easy
467
+ # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
468
+ # TODO use a snowball stemmer on synonyms when creating the token filter
469
+
470
+ # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
471
+ # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
472
+ # - Only apply the synonym expansion at index time
473
+ # - Don't have the synonym filter applied search
474
+ # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
475
+ settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")
476
+
477
+ %w(word_start word_middle word_end).each do |type|
478
+ settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
479
+ end
480
+ end
481
+ end
482
+
483
+ def add_search_synonyms(settings)
484
+ search_synonyms = options[:search_synonyms] || []
485
+ search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call)
486
+ if search_synonyms.is_a?(String) || search_synonyms.any?
487
+ if search_synonyms.is_a?(String)
488
+ synonym_graph = {
489
+ type: "synonym_graph",
490
+ synonyms_path: search_synonyms
491
+ }
492
+ synonym_graph[:updateable] = true unless below73?
493
+ else
494
+ synonym_graph = {
495
+ type: "synonym_graph",
496
+ # TODO confirm this is correct
497
+ synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
498
+ }
499
+ end
500
+ settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
501
+
502
+ [:searchkick_search2, :searchkick_word_search].each do |analyzer|
503
+ settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
463
504
  end
464
505
  end
506
+ end
465
507
 
466
- {
467
- settings: settings,
468
- mappings: mappings
508
+ def add_wordnet(settings)
509
+ settings[:analysis][:filter][:searchkick_wordnet] = {
510
+ type: "synonym",
511
+ format: "wordnet",
512
+ synonyms_path: Searchkick.wordnet_path
469
513
  }
514
+
515
+ settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
516
+ settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
517
+
518
+ %w(word_start word_middle word_end).each do |type|
519
+ settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
520
+ end
521
+ end
522
+
523
+ def set_deep_paging(settings)
524
+ if !settings.dig(:index, :max_result_window) && !settings[:"index.max_result_window"]
525
+ settings[:index] ||= {}
526
+ settings[:index][:max_result_window] = 1_000_000_000
527
+ end
528
+ end
529
+
530
+ def index_type
531
+ @index_type ||= begin
532
+ index_type = options[:_type]
533
+ index_type = index_type.call if index_type.respond_to?(:call)
534
+ index_type
535
+ end
536
+ end
537
+
538
+ def default_type
539
+ "text"
540
+ end
541
+
542
+ def default_analyzer
543
+ :searchkick_index
544
+ end
545
+
546
+ def below62?
547
+ Searchkick.server_below?("6.2.0")
548
+ end
549
+
550
+ def below70?
551
+ Searchkick.server_below?("7.0.0")
552
+ end
553
+
554
+ def below73?
555
+ Searchkick.server_below?("7.3.0")
470
556
  end
471
557
  end
472
558
  end