searchkick 4.0.0 → 4.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,10 @@
1
+ # dependencies
1
2
  require "active_support"
2
3
  require "active_support/core_ext/hash/deep_merge"
3
4
  require "elasticsearch"
4
5
  require "hashie"
5
6
 
7
+ # modules
6
8
  require "searchkick/bulk_indexer"
7
9
  require "searchkick/index"
8
10
  require "searchkick/indexer"
@@ -17,6 +19,7 @@ require "searchkick/record_indexer"
17
19
  require "searchkick/results"
18
20
  require "searchkick/version"
19
21
 
22
+ # integrations
20
23
  require "searchkick/railtie" if defined?(Rails)
21
24
  require "searchkick/logging" if defined?(ActiveSupport::Notifications)
22
25
 
@@ -27,6 +30,7 @@ module Searchkick
27
30
  autoload :ProcessQueueJob, "searchkick/process_queue_job"
28
31
  autoload :ReindexV2Job, "searchkick/reindex_v2_job"
29
32
 
33
+ # errors
30
34
  class Error < StandardError; end
31
35
  class MissingIndexError < Error; end
32
36
  class UnsupportedVersionError < Error; end
@@ -49,7 +53,7 @@ module Searchkick
49
53
 
50
54
  def self.client
51
55
  @client ||= begin
52
- require "typhoeus/adapters/faraday" if defined?(Typhoeus)
56
+ require "typhoeus/adapters/faraday" if defined?(Typhoeus) && Gem::Version.new(Faraday::VERSION) < Gem::Version.new("0.14.0")
53
57
 
54
58
  Elasticsearch::Client.new({
55
59
  url: ENV["ELASTICSEARCH_URL"],
@@ -67,7 +71,7 @@ module Searchkick
67
71
  end
68
72
 
69
73
  def self.search_timeout
70
- @search_timeout || timeout
74
+ (defined?(@search_timeout) && @search_timeout) || timeout
71
75
  end
72
76
 
73
77
  def self.server_version
@@ -112,7 +116,7 @@ module Searchkick
112
116
  end
113
117
 
114
118
  options = options.merge(block: block) if block
115
- query = Searchkick::Query.new(klass, term, options)
119
+ query = Searchkick::Query.new(klass, term, **options)
116
120
  if options[:execute] == false
117
121
  query
118
122
  else
@@ -142,7 +146,7 @@ module Searchkick
142
146
  end
143
147
  end
144
148
 
145
- def self.callbacks(value)
149
+ def self.callbacks(value = nil)
146
150
  if block_given?
147
151
  previous_value = callbacks_value
148
152
  begin
@@ -160,6 +164,7 @@ module Searchkick
160
164
 
161
165
  def self.aws_credentials=(creds)
162
166
  begin
167
+ # TODO remove in Searchkick 5 (just use aws_sigv4)
163
168
  require "faraday_middleware/aws_signers_v4"
164
169
  rescue LoadError
165
170
  require "faraday_middleware/aws_sigv4"
@@ -169,17 +174,16 @@ module Searchkick
169
174
  end
170
175
 
171
176
  def self.reindex_status(index_name)
172
- if redis
173
- batches_left = Searchkick::Index.new(index_name).batches_left
174
- {
175
- completed: batches_left == 0,
176
- batches_left: batches_left
177
- }
178
- else
179
- raise Searchkick::Error, "Redis not configured"
180
- end
177
+ raise Searchkick::Error, "Redis not configured" unless redis
178
+
179
+ batches_left = Searchkick::Index.new(index_name).batches_left
180
+ {
181
+ completed: batches_left == 0,
182
+ batches_left: batches_left
183
+ }
181
184
  end
182
185
 
186
+ # TODO use ConnectionPool::Wrapper when redis is set so this is no longer needed
183
187
  def self.with_redis
184
188
  if redis
185
189
  if redis.respond_to?(:with)
@@ -192,6 +196,10 @@ module Searchkick
192
196
  end
193
197
  end
194
198
 
199
+ def self.warn(message)
200
+ super("[searchkick] WARNING: #{message}")
201
+ end
202
+
195
203
  # private
196
204
  def self.load_records(records, ids)
197
205
  records =
@@ -245,11 +253,26 @@ module Searchkick
245
253
  }
246
254
  end
247
255
  end
256
+
257
+ # private
258
+ # methods are forwarded to base class
259
+ # this check to see if scope exists on that class
260
+ # it's a bit tricky, but this seems to work
261
+ def self.relation?(klass)
262
+ if klass.respond_to?(:current_scope)
263
+ !klass.current_scope.nil?
264
+ elsif defined?(Mongoid::Threaded)
265
+ !Mongoid::Threaded.current_scope(klass).nil?
266
+ end
267
+ end
248
268
  end
249
269
 
250
- # TODO find better ActiveModel hook
251
270
  require "active_model/callbacks"
252
271
  ActiveModel::Callbacks.include(Searchkick::Model)
272
+ # TODO use
273
+ # ActiveSupport.on_load(:mongoid) do
274
+ # Mongoid::Document::ClassMethods.include Searchkick::Model
275
+ # end
253
276
 
254
277
  ActiveSupport.on_load(:active_record) do
255
278
  extend Searchkick::Model
@@ -61,7 +61,7 @@ module Searchkick
61
61
  if records.any?
62
62
  if async
63
63
  Searchkick::BulkReindexJob.perform_later(
64
- class_name: records.first.class.name,
64
+ class_name: records.first.class.searchkick_options[:class_name],
65
65
  record_ids: records.map(&:id),
66
66
  index_name: index.name,
67
67
  method_name: method_name ? method_name.to_s : nil
@@ -87,6 +87,8 @@ module Searchkick
87
87
  # TODO expire Redis key
88
88
  primary_key = scope.primary_key
89
89
 
90
+ scope = scope.select(primary_key).except(:includes, :preload)
91
+
90
92
  starting_id =
91
93
  begin
92
94
  scope.minimum(primary_key)
@@ -139,8 +141,8 @@ module Searchkick
139
141
 
140
142
  def bulk_reindex_job(scope, batch_id, options)
141
143
  Searchkick.with_redis { |r| r.sadd(batches_key, batch_id) }
142
- Searchkick::BulkReindexJob.perform_later({
143
- class_name: scope.model_name.name,
144
+ Searchkick::BulkReindexJob.perform_later(**{
145
+ class_name: scope.searchkick_options[:class_name],
144
146
  index_name: index.name,
145
147
  batch_id: batch_id
146
148
  }.merge(options))
@@ -2,8 +2,6 @@ require "searchkick/index_options"
2
2
 
3
3
  module Searchkick
4
4
  class Index
5
- include IndexOptions
6
-
7
5
  attr_reader :name, :options
8
6
 
9
7
  def initialize(name, options = {})
@@ -12,6 +10,10 @@ module Searchkick
12
10
  @klass_document_type = {} # cache
13
11
  end
14
12
 
13
+ def index_options
14
+ IndexOptions.new(self).index_options
15
+ end
16
+
15
17
  def create(body = {})
16
18
  client.indices.create index: name, body: body
17
19
  end
@@ -47,7 +49,7 @@ module Searchkick
47
49
  end
48
50
 
49
51
  def refresh_interval
50
- settings.values.first["settings"]["index"]["refresh_interval"]
52
+ index_settings["refresh_interval"]
51
53
  end
52
54
 
53
55
  def update_settings(settings)
@@ -174,6 +176,17 @@ module Searchkick
174
176
  Searchkick.search(like_text, model: record.class, **options)
175
177
  end
176
178
 
179
+ def reload_synonyms
180
+ require "elasticsearch/xpack"
181
+ raise Error, "Requires Elasticsearch 7.3+" if Searchkick.server_below?("7.3.0")
182
+ raise Error, "Requires elasticsearch-xpack 7.8+" unless client.xpack.respond_to?(:indices)
183
+ begin
184
+ client.xpack.indices.reload_search_analyzers(index: name)
185
+ rescue Elasticsearch::Transport::Transport::Errors::MethodNotAllowed
186
+ raise Error, "Requires non-OSS version of Elasticsearch"
187
+ end
188
+ end
189
+
177
190
  # queue
178
191
 
179
192
  def reindex_queue
@@ -184,13 +197,20 @@ module Searchkick
184
197
 
185
198
  def reindex(relation, method_name, scoped:, full: false, scope: nil, **options)
186
199
  refresh = options.fetch(:refresh, !scoped)
200
+ options.delete(:refresh)
187
201
 
188
202
  if method_name
203
+ # TODO throw ArgumentError
204
+ Searchkick.warn("unsupported keywords: #{options.keys.map(&:inspect).join(", ")}") if options.any?
205
+
189
206
  # update
190
207
  import_scope(relation, method_name: method_name, scope: scope)
191
208
  self.refresh if refresh
192
209
  true
193
210
  elsif scoped && !full
211
+ # TODO throw ArgumentError
212
+ Searchkick.warn("unsupported keywords: #{options.keys.map(&:inspect).join(", ")}") if options.any?
213
+
194
214
  # reindex association
195
215
  import_scope(relation, scope: scope)
196
216
  self.refresh if refresh
@@ -249,6 +269,11 @@ module Searchkick
249
269
  end
250
270
  end
251
271
 
272
+ # private
273
+ def uuid
274
+ index_settings["uuid"]
275
+ end
276
+
252
277
  protected
253
278
 
254
279
  def client
@@ -259,6 +284,14 @@ module Searchkick
259
284
  @bulk_indexer ||= BulkIndexer.new(self)
260
285
  end
261
286
 
287
+ def index_settings
288
+ settings.values.first["settings"]["index"]
289
+ end
290
+
291
+ def import_before_promotion(index, relation, **import_options)
292
+ index.import_scope(relation, **import_options)
293
+ end
294
+
262
295
  # https://gist.github.com/jarosan/3124884
263
296
  # http://www.elasticsearch.org/blog/changing-mapping-with-zero-downtime/
264
297
  def reindex_scope(relation, import: true, resume: false, retain: false, async: false, refresh_interval: nil, scope: nil)
@@ -281,14 +314,16 @@ module Searchkick
281
314
  scope: scope
282
315
  }
283
316
 
317
+ uuid = index.uuid
318
+
284
319
  # check if alias exists
285
320
  alias_exists = alias_exists?
286
321
  if alias_exists
287
- # import before promotion
288
- index.import_scope(relation, **import_options) if import
322
+ import_before_promotion(index, relation, **import_options) if import
289
323
 
290
324
  # get existing indices to remove
291
325
  unless async
326
+ check_uuid(uuid, index.uuid)
292
327
  promote(index.name, update_refresh_interval: !refresh_interval.nil?)
293
328
  clean_indices unless retain
294
329
  end
@@ -313,6 +348,7 @@ module Searchkick
313
348
  # already promoted if alias didn't exist
314
349
  if alias_exists
315
350
  puts "Jobs complete. Promoting..."
351
+ check_uuid(uuid, index.uuid)
316
352
  promote(index.name, update_refresh_interval: !refresh_interval.nil?)
317
353
  end
318
354
  clean_indices unless retain
@@ -331,5 +367,15 @@ module Searchkick
331
367
 
332
368
  raise e
333
369
  end
370
+
371
+ # safety check
372
+ # still a chance for race condition since its called before promotion
373
+ # ideal is for user to disable automatic index creation
374
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#index-creation
375
+ def check_uuid(old_uuid, new_uuid)
376
+ if old_uuid != new_uuid
377
+ raise Searchkick::Error, "Safety check failed - only run one Model.reindex per model at a time"
378
+ end
379
+ end
334
380
  end
335
381
  end
@@ -1,429 +1,558 @@
1
1
  module Searchkick
2
- module IndexOptions
2
+ class IndexOptions
3
+ attr_reader :options
4
+
5
+ def initialize(index)
6
+ @options = index.options
7
+ end
8
+
3
9
  def index_options
4
- options = @options
5
- language = options[:language]
6
- language = language.call if language.respond_to?(:call)
10
+ custom_mapping = options[:mappings] || {}
11
+ if below70? && custom_mapping.keys.map(&:to_sym).include?(:properties)
12
+ # add type
13
+ custom_mapping = {index_type => custom_mapping}
14
+ end
7
15
 
8
16
  if options[:mappings] && !options[:merge_mappings]
9
17
  settings = options[:settings] || {}
10
- mappings = options[:mappings]
18
+ mappings = custom_mapping
11
19
  else
12
- below62 = Searchkick.server_below?("6.2.0")
13
- below70 = Searchkick.server_below?("7.0.0")
14
-
15
- default_type = "text"
16
- default_analyzer = :searchkick_index
17
- keyword_mapping = {type: "keyword"}
18
-
19
- all = options.key?(:_all) ? options[:_all] : false
20
- index_true_value = true
21
- index_false_value = false
22
-
23
- keyword_mapping[:ignore_above] = options[:ignore_above] || 30000
24
-
25
- settings = {
26
- analysis: {
27
- analyzer: {
28
- searchkick_keyword: {
29
- type: "custom",
30
- tokenizer: "keyword",
31
- filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
32
- },
33
- default_analyzer => {
34
- type: "custom",
35
- # character filters -> tokenizer -> token filters
36
- # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
37
- char_filter: ["ampersand"],
38
- tokenizer: "standard",
39
- # synonym should come last, after stemming and shingle
40
- # shingle must come before searchkick_stemmer
41
- filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
42
- },
43
- searchkick_search: {
44
- type: "custom",
45
- char_filter: ["ampersand"],
46
- tokenizer: "standard",
47
- filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
48
- },
49
- searchkick_search2: {
50
- type: "custom",
51
- char_filter: ["ampersand"],
52
- tokenizer: "standard",
53
- filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
54
- },
55
- # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
56
- searchkick_autocomplete_search: {
57
- type: "custom",
58
- tokenizer: "keyword",
59
- filter: ["lowercase", "asciifolding"]
60
- },
61
- searchkick_word_search: {
62
- type: "custom",
63
- tokenizer: "standard",
64
- filter: ["lowercase", "asciifolding"]
65
- },
66
- searchkick_suggest_index: {
67
- type: "custom",
68
- tokenizer: "standard",
69
- filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
70
- },
71
- searchkick_text_start_index: {
72
- type: "custom",
73
- tokenizer: "keyword",
74
- filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
75
- },
76
- searchkick_text_middle_index: {
77
- type: "custom",
78
- tokenizer: "keyword",
79
- filter: ["lowercase", "asciifolding", "searchkick_ngram"]
80
- },
81
- searchkick_text_end_index: {
82
- type: "custom",
83
- tokenizer: "keyword",
84
- filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
85
- },
86
- searchkick_word_start_index: {
87
- type: "custom",
88
- tokenizer: "standard",
89
- filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
90
- },
91
- searchkick_word_middle_index: {
92
- type: "custom",
93
- tokenizer: "standard",
94
- filter: ["lowercase", "asciifolding", "searchkick_ngram"]
95
- },
96
- searchkick_word_end_index: {
97
- type: "custom",
98
- tokenizer: "standard",
99
- filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
100
- }
101
- },
102
- filter: {
103
- searchkick_index_shingle: {
104
- type: "shingle",
105
- token_separator: ""
106
- },
107
- # lucky find http://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
108
- searchkick_search_shingle: {
109
- type: "shingle",
110
- token_separator: "",
111
- output_unigrams: false,
112
- output_unigrams_if_no_shingles: true
113
- },
114
- searchkick_suggest_shingle: {
115
- type: "shingle",
116
- max_shingle_size: 5
117
- },
118
- searchkick_edge_ngram: {
119
- type: "edgeNGram",
120
- min_gram: 1,
121
- max_gram: 50
122
- },
123
- searchkick_ngram: {
124
- type: "nGram",
125
- min_gram: 1,
126
- max_gram: 50
127
- },
128
- searchkick_stemmer: {
129
- # use stemmer if language is lowercase, snowball otherwise
130
- type: language == language.to_s.downcase ? "stemmer" : "snowball",
131
- language: language || "English"
132
- }
133
- },
134
- char_filter: {
135
- # https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
136
- # &_to_and
137
- ampersand: {
138
- type: "mapping",
139
- mappings: ["&=> and "]
140
- }
141
- }
142
- }
143
- }
20
+ settings = generate_settings
21
+ mappings = generate_mappings.symbolize_keys.deep_merge(custom_mapping.symbolize_keys)
22
+ end
23
+
24
+ set_deep_paging(settings) if options[:deep_paging]
25
+
26
+ {
27
+ settings: settings,
28
+ mappings: mappings
29
+ }
30
+ end
144
31
 
145
- stem = options[:stem]
32
+ def generate_settings
33
+ language = options[:language]
34
+ language = language.call if language.respond_to?(:call)
146
35
 
147
- case language
148
- when "chinese"
149
- settings[:analysis][:analyzer].merge!(
36
+ settings = {
37
+ analysis: {
38
+ analyzer: {
39
+ searchkick_keyword: {
40
+ type: "custom",
41
+ tokenizer: "keyword",
42
+ filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
43
+ },
150
44
  default_analyzer => {
151
- type: "ik_smart"
45
+ type: "custom",
46
+ # character filters -> tokenizer -> token filters
47
+ # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
48
+ char_filter: ["ampersand"],
49
+ tokenizer: "standard",
50
+ # synonym should come last, after stemming and shingle
51
+ # shingle must come before searchkick_stemmer
52
+ filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
152
53
  },
153
54
  searchkick_search: {
154
- type: "ik_smart"
55
+ type: "custom",
56
+ char_filter: ["ampersand"],
57
+ tokenizer: "standard",
58
+ filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
155
59
  },
156
60
  searchkick_search2: {
157
- type: "ik_max_word"
158
- }
159
- )
160
-
161
- stem = false
162
- when "japanese"
163
- settings[:analysis][:analyzer].merge!(
164
- default_analyzer => {
165
- type: "kuromoji"
61
+ type: "custom",
62
+ char_filter: ["ampersand"],
63
+ tokenizer: "standard",
64
+ filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
166
65
  },
167
- searchkick_search: {
168
- type: "kuromoji"
66
+ # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
67
+ searchkick_autocomplete_search: {
68
+ type: "custom",
69
+ tokenizer: "keyword",
70
+ filter: ["lowercase", "asciifolding"]
169
71
  },
170
- searchkick_search2: {
171
- type: "kuromoji"
172
- }
173
- )
174
-
175
- stem = false
176
- when "korean"
177
- settings[:analysis][:analyzer].merge!(
178
- default_analyzer => {
179
- type: "openkoreantext-analyzer"
72
+ searchkick_word_search: {
73
+ type: "custom",
74
+ tokenizer: "standard",
75
+ filter: ["lowercase", "asciifolding"]
180
76
  },
181
- searchkick_search: {
182
- type: "openkoreantext-analyzer"
77
+ searchkick_suggest_index: {
78
+ type: "custom",
79
+ tokenizer: "standard",
80
+ filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
183
81
  },
184
- searchkick_search2: {
185
- type: "openkoreantext-analyzer"
186
- }
187
- )
188
-
189
- stem = false
190
- when "vietnamese"
191
- settings[:analysis][:analyzer].merge!(
192
- default_analyzer => {
193
- type: "vi_analyzer"
82
+ searchkick_text_start_index: {
83
+ type: "custom",
84
+ tokenizer: "keyword",
85
+ filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
194
86
  },
195
- searchkick_search: {
196
- type: "vi_analyzer"
87
+ searchkick_text_middle_index: {
88
+ type: "custom",
89
+ tokenizer: "keyword",
90
+ filter: ["lowercase", "asciifolding", "searchkick_ngram"]
197
91
  },
198
- searchkick_search2: {
199
- type: "vi_analyzer"
92
+ searchkick_text_end_index: {
93
+ type: "custom",
94
+ tokenizer: "keyword",
95
+ filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
96
+ },
97
+ searchkick_word_start_index: {
98
+ type: "custom",
99
+ tokenizer: "standard",
100
+ filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
101
+ },
102
+ searchkick_word_middle_index: {
103
+ type: "custom",
104
+ tokenizer: "standard",
105
+ filter: ["lowercase", "asciifolding", "searchkick_ngram"]
106
+ },
107
+ searchkick_word_end_index: {
108
+ type: "custom",
109
+ tokenizer: "standard",
110
+ filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
200
111
  }
201
- )
202
-
203
- stem = false
204
- when "polish", "ukrainian", "smartcn"
205
- settings[:analysis][:analyzer].merge!(
206
- default_analyzer => {
207
- type: language
112
+ },
113
+ filter: {
114
+ searchkick_index_shingle: {
115
+ type: "shingle",
116
+ token_separator: ""
208
117
  },
209
- searchkick_search: {
210
- type: language
118
+ # lucky find https://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
119
+ searchkick_search_shingle: {
120
+ type: "shingle",
121
+ token_separator: "",
122
+ output_unigrams: false,
123
+ output_unigrams_if_no_shingles: true
211
124
  },
212
- searchkick_search2: {
213
- type: language
125
+ searchkick_suggest_shingle: {
126
+ type: "shingle",
127
+ max_shingle_size: 5
128
+ },
129
+ searchkick_edge_ngram: {
130
+ type: "edge_ngram",
131
+ min_gram: 1,
132
+ max_gram: 50
133
+ },
134
+ searchkick_ngram: {
135
+ type: "ngram",
136
+ min_gram: 1,
137
+ max_gram: 50
138
+ },
139
+ searchkick_stemmer: {
140
+ # use stemmer if language is lowercase, snowball otherwise
141
+ type: language == language.to_s.downcase ? "stemmer" : "snowball",
142
+ language: language || "English"
143
+ }
144
+ },
145
+ char_filter: {
146
+ # https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
147
+ # &_to_and
148
+ ampersand: {
149
+ type: "mapping",
150
+ mappings: ["&=> and "]
214
151
  }
215
- )
152
+ }
153
+ }
154
+ }
216
155
 
217
- stem = false
218
- end
156
+ update_language(settings, language)
157
+ update_stemming(settings)
219
158
 
220
- if Searchkick.env == "test"
221
- settings[:number_of_shards] = 1
222
- settings[:number_of_replicas] = 0
223
- end
159
+ if Searchkick.env == "test"
160
+ settings[:number_of_shards] = 1
161
+ settings[:number_of_replicas] = 0
162
+ end
224
163
 
225
- if options[:similarity]
226
- settings[:similarity] = {default: {type: options[:similarity]}}
227
- end
164
+ # TODO remove in Searchkick 5 (classic no longer supported)
165
+ if options[:similarity]
166
+ settings[:similarity] = {default: {type: options[:similarity]}}
167
+ end
228
168
 
229
- unless below62
230
- settings[:index] = {
231
- max_ngram_diff: 49,
232
- max_shingle_diff: 4
233
- }
234
- end
169
+ unless below62?
170
+ settings[:index] = {
171
+ max_ngram_diff: 49,
172
+ max_shingle_diff: 4
173
+ }
174
+ end
235
175
 
236
- if options[:case_sensitive]
237
- settings[:analysis][:analyzer].each do |_, analyzer|
238
- analyzer[:filter].delete("lowercase")
239
- end
176
+ if options[:case_sensitive]
177
+ settings[:analysis][:analyzer].each do |_, analyzer|
178
+ analyzer[:filter].delete("lowercase")
240
179
  end
180
+ end
241
181
 
242
- if stem == false
243
- settings[:analysis][:filter].delete(:searchkick_stemmer)
244
- settings[:analysis][:analyzer].each do |_, analyzer|
245
- analyzer[:filter].delete("searchkick_stemmer") if analyzer[:filter]
246
- end
247
- end
182
+ # TODO do this last in Searchkick 5
183
+ settings = settings.symbolize_keys.deep_merge((options[:settings] || {}).symbolize_keys)
248
184
 
249
- settings = settings.symbolize_keys.deep_merge((options[:settings] || {}).symbolize_keys)
185
+ add_synonyms(settings)
186
+ add_search_synonyms(settings)
187
+ # TODO remove in Searchkick 5
188
+ add_wordnet(settings) if options[:wordnet]
250
189
 
251
- # synonyms
252
- synonyms = options[:synonyms] || []
190
+ if options[:special_characters] == false
191
+ settings[:analysis][:analyzer].each_value do |analyzer_settings|
192
+ analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
193
+ end
194
+ end
253
195
 
254
- synonyms = synonyms.call if synonyms.respond_to?(:call)
196
+ settings
197
+ end
255
198
 
256
- if synonyms.any?
257
- settings[:analysis][:filter][:searchkick_synonym] = {
258
- type: "synonym",
259
- # only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently
260
- synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)
199
+ def update_language(settings, language)
200
+ case language
201
+ when "chinese"
202
+ settings[:analysis][:analyzer].merge!(
203
+ default_analyzer => {
204
+ type: "ik_smart"
205
+ },
206
+ searchkick_search: {
207
+ type: "ik_smart"
208
+ },
209
+ searchkick_search2: {
210
+ type: "ik_max_word"
261
211
  }
262
- # choosing a place for the synonym filter when stemming is not easy
263
- # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
264
- # TODO use a snowball stemmer on synonyms when creating the token filter
265
-
266
- # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
267
- # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
268
- # - Only apply the synonym expansion at index time
269
- # - Don't have the synonym filter applied search
270
- # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
271
- settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")
272
-
273
- %w(word_start word_middle word_end).each do |type|
274
- settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
275
- end
276
- end
277
-
278
- if options[:wordnet]
279
- settings[:analysis][:filter][:searchkick_wordnet] = {
280
- type: "synonym",
281
- format: "wordnet",
282
- synonyms_path: Searchkick.wordnet_path
212
+ )
213
+ when "chinese2", "smartcn"
214
+ settings[:analysis][:analyzer].merge!(
215
+ default_analyzer => {
216
+ type: "smartcn"
217
+ },
218
+ searchkick_search: {
219
+ type: "smartcn"
220
+ },
221
+ searchkick_search2: {
222
+ type: "smartcn"
223
+ }
224
+ )
225
+ when "japanese"
226
+ settings[:analysis][:analyzer].merge!(
227
+ default_analyzer => {
228
+ type: "kuromoji"
229
+ },
230
+ searchkick_search: {
231
+ type: "kuromoji"
232
+ },
233
+ searchkick_search2: {
234
+ type: "kuromoji"
283
235
  }
236
+ )
237
+ when "korean"
238
+ settings[:analysis][:analyzer].merge!(
239
+ default_analyzer => {
240
+ type: "openkoreantext-analyzer"
241
+ },
242
+ searchkick_search: {
243
+ type: "openkoreantext-analyzer"
244
+ },
245
+ searchkick_search2: {
246
+ type: "openkoreantext-analyzer"
247
+ }
248
+ )
249
+ when "korean2"
250
+ settings[:analysis][:analyzer].merge!(
251
+ default_analyzer => {
252
+ type: "nori"
253
+ },
254
+ searchkick_search: {
255
+ type: "nori"
256
+ },
257
+ searchkick_search2: {
258
+ type: "nori"
259
+ }
260
+ )
261
+ when "vietnamese"
262
+ settings[:analysis][:analyzer].merge!(
263
+ default_analyzer => {
264
+ type: "vi_analyzer"
265
+ },
266
+ searchkick_search: {
267
+ type: "vi_analyzer"
268
+ },
269
+ searchkick_search2: {
270
+ type: "vi_analyzer"
271
+ }
272
+ )
273
+ when "polish", "ukrainian"
274
+ settings[:analysis][:analyzer].merge!(
275
+ default_analyzer => {
276
+ type: language
277
+ },
278
+ searchkick_search: {
279
+ type: language
280
+ },
281
+ searchkick_search2: {
282
+ type: language
283
+ }
284
+ )
285
+ end
286
+ end
284
287
 
285
- settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
286
- settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
288
+ def update_stemming(settings)
289
+ stem = options[:stem]
287
290
 
288
- %w(word_start word_middle word_end).each do |type|
289
- settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
290
- end
291
- end
291
+ # language analyzer used
292
+ stem = false if settings[:analysis][:analyzer][default_analyzer][:type] != "custom"
292
293
 
293
- if options[:special_characters] == false
294
- settings[:analysis][:analyzer].each_value do |analyzer_settings|
295
- analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
296
- end
294
+ if stem == false
295
+ settings[:analysis][:filter].delete(:searchkick_stemmer)
296
+ settings[:analysis][:analyzer].each do |_, analyzer|
297
+ analyzer[:filter].delete("searchkick_stemmer") if analyzer[:filter]
297
298
  end
299
+ end
298
300
 
299
- mapping = {}
301
+ if options[:stemmer_override]
302
+ stemmer_override = {
303
+ type: "stemmer_override"
304
+ }
305
+ if options[:stemmer_override].is_a?(String)
306
+ stemmer_override[:rules_path] = options[:stemmer_override]
307
+ else
308
+ stemmer_override[:rules] = options[:stemmer_override]
309
+ end
310
+ settings[:analysis][:filter][:searchkick_stemmer_override] = stemmer_override
300
311
 
301
- # conversions
302
- Array(options[:conversions]).each do |conversions_field|
303
- mapping[conversions_field] = {
304
- type: "nested",
305
- properties: {
306
- query: {type: default_type, analyzer: "searchkick_keyword"},
307
- count: {type: "integer"}
308
- }
309
- }
312
+ settings[:analysis][:analyzer].each do |_, analyzer|
313
+ stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
314
+ analyzer[:filter].insert(stemmer_index, "searchkick_stemmer_override") if stemmer_index
310
315
  end
316
+ end
311
317
 
312
- mapping_options = Hash[
313
- [:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
314
- .map { |type| [type, (options[type] || []).map(&:to_s)] }
315
- ]
318
+ if options[:stem_exclusion]
319
+ settings[:analysis][:filter][:searchkick_stem_exclusion] = {
320
+ type: "keyword_marker",
321
+ keywords: options[:stem_exclusion]
322
+ }
316
323
 
317
- word = options[:word] != false && (!options[:match] || options[:match] == :word)
324
+ settings[:analysis][:analyzer].each do |_, analyzer|
325
+ stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
326
+ analyzer[:filter].insert(stemmer_index, "searchkick_stem_exclusion") if stemmer_index
327
+ end
328
+ end
329
+ end
318
330
 
319
- mapping_options[:searchable].delete("_all")
331
+ def generate_mappings
332
+ mapping = {}
320
333
 
321
- analyzed_field_options = {type: default_type, index: index_true_value, analyzer: default_analyzer}
334
+ keyword_mapping = {type: "keyword"}
335
+ keyword_mapping[:ignore_above] = options[:ignore_above] || 30000
322
336
 
323
- mapping_options.values.flatten.uniq.each do |field|
324
- fields = {}
337
+ # conversions
338
+ Array(options[:conversions]).each do |conversions_field|
339
+ mapping[conversions_field] = {
340
+ type: "nested",
341
+ properties: {
342
+ query: {type: default_type, analyzer: "searchkick_keyword"},
343
+ count: {type: "integer"}
344
+ }
345
+ }
346
+ end
325
347
 
326
- if options.key?(:filterable) && !mapping_options[:filterable].include?(field)
327
- fields[field] = {type: default_type, index: index_false_value}
328
- else
329
- fields[field] = keyword_mapping
330
- end
348
+ mapping_options = Hash[
349
+ [:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
350
+ .map { |type| [type, (options[type] || []).map(&:to_s)] }
351
+ ]
331
352
 
332
- if !options[:searchable] || mapping_options[:searchable].include?(field)
333
- if word
334
- fields[:analyzed] = analyzed_field_options
353
+ word = options[:word] != false && (!options[:match] || options[:match] == :word)
335
354
 
336
- if mapping_options[:highlight].include?(field)
337
- fields[:analyzed][:term_vector] = "with_positions_offsets"
338
- end
339
- end
355
+ mapping_options[:searchable].delete("_all")
340
356
 
341
- mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f|
342
- if options[:match] == type || f.include?(field)
343
- fields[type] = {type: default_type, index: index_true_value, analyzer: "searchkick_#{type}_index"}
344
- end
345
- end
346
- end
357
+ analyzed_field_options = {type: default_type, index: true, analyzer: default_analyzer}
347
358
 
348
- mapping[field] = fields[field].merge(fields: fields.except(field))
349
- end
359
+ mapping_options.values.flatten.uniq.each do |field|
360
+ fields = {}
350
361
 
351
- (options[:locations] || []).map(&:to_s).each do |field|
352
- mapping[field] = {
353
- type: "geo_point"
354
- }
362
+ if options.key?(:filterable) && !mapping_options[:filterable].include?(field)
363
+ fields[field] = {type: default_type, index: false}
364
+ else
365
+ fields[field] = keyword_mapping
355
366
  end
356
367
 
357
- options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array)
358
- (options[:geo_shape] || {}).each do |field, shape_options|
359
- mapping[field] = shape_options.merge(type: "geo_shape")
360
- end
368
+ if !options[:searchable] || mapping_options[:searchable].include?(field)
369
+ if word
370
+ fields[:analyzed] = analyzed_field_options
361
371
 
362
- if options[:inheritance]
363
- mapping[:type] = keyword_mapping
364
- end
372
+ if mapping_options[:highlight].include?(field)
373
+ fields[:analyzed][:term_vector] = "with_positions_offsets"
374
+ end
375
+ end
365
376
 
366
- routing = {}
367
- if options[:routing]
368
- routing = {required: true}
369
- unless options[:routing] == true
370
- routing[:path] = options[:routing].to_s
377
+ mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f|
378
+ if options[:match] == type || f.include?(field)
379
+ fields[type] = {type: default_type, index: true, analyzer: "searchkick_#{type}_index"}
380
+ end
371
381
  end
372
382
  end
373
383
 
374
- dynamic_fields = {
375
- # analyzed field must be the default field for include_in_all
376
- # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
377
- # however, we can include the not_analyzed field in _all
378
- # and the _all index analyzer will take care of it
379
- "{name}" => keyword_mapping
384
+ mapping[field] = fields[field].merge(fields: fields.except(field))
385
+ end
386
+
387
+ (options[:locations] || []).map(&:to_s).each do |field|
388
+ mapping[field] = {
389
+ type: "geo_point"
380
390
  }
391
+ end
381
392
 
382
- if options.key?(:filterable)
383
- dynamic_fields["{name}"] = {type: default_type, index: index_false_value}
384
- end
393
+ options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array)
394
+ (options[:geo_shape] || {}).each do |field, shape_options|
395
+ mapping[field] = shape_options.merge(type: "geo_shape")
396
+ end
385
397
 
386
- unless options[:searchable]
387
- if options[:match] && options[:match] != :word
388
- dynamic_fields[options[:match]] = {type: default_type, index: index_true_value, analyzer: "searchkick_#{options[:match]}_index"}
389
- end
398
+ if options[:inheritance]
399
+ mapping[:type] = keyword_mapping
400
+ end
390
401
 
391
- if word
392
- dynamic_fields[:analyzed] = analyzed_field_options
393
- end
402
+ routing = {}
403
+ if options[:routing]
404
+ routing = {required: true}
405
+ unless options[:routing] == true
406
+ routing[:path] = options[:routing].to_s
394
407
  end
408
+ end
395
409
 
410
+ dynamic_fields = {
411
+ # analyzed field must be the default field for include_in_all
396
412
  # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
397
- multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
398
-
399
- mappings = {
400
- properties: mapping,
401
- _routing: routing,
402
- # https://gist.github.com/kimchy/2898285
403
- dynamic_templates: [
404
- {
405
- string_template: {
406
- match: "*",
407
- match_mapping_type: "string",
408
- mapping: multi_field
409
- }
413
+ # however, we can include the not_analyzed field in _all
414
+ # and the _all index analyzer will take care of it
415
+ "{name}" => keyword_mapping
416
+ }
417
+
418
+ if options.key?(:filterable)
419
+ dynamic_fields["{name}"] = {type: default_type, index: false}
420
+ end
421
+
422
+ unless options[:searchable]
423
+ if options[:match] && options[:match] != :word
424
+ dynamic_fields[options[:match]] = {type: default_type, index: true, analyzer: "searchkick_#{options[:match]}_index"}
425
+ end
426
+
427
+ if word
428
+ dynamic_fields[:analyzed] = analyzed_field_options
429
+ end
430
+ end
431
+
432
+ # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
433
+ multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
434
+
435
+ mappings = {
436
+ properties: mapping,
437
+ _routing: routing,
438
+ # https://gist.github.com/kimchy/2898285
439
+ dynamic_templates: [
440
+ {
441
+ string_template: {
442
+ match: "*",
443
+ match_mapping_type: "string",
444
+ mapping: multi_field
410
445
  }
411
- ]
446
+ }
447
+ ]
448
+ }
449
+
450
+ if below70?
451
+ mappings = {index_type => mappings}
452
+ end
453
+
454
+ mappings
455
+ end
456
+
457
+ def add_synonyms(settings)
458
+ synonyms = options[:synonyms] || []
459
+ synonyms = synonyms.call if synonyms.respond_to?(:call)
460
+ if synonyms.any?
461
+ settings[:analysis][:filter][:searchkick_synonym] = {
462
+ type: "synonym",
463
+ # only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently
464
+ synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)
412
465
  }
466
+ # choosing a place for the synonym filter when stemming is not easy
467
+ # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
468
+ # TODO use a snowball stemmer on synonyms when creating the token filter
469
+
470
+ # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
471
+ # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
472
+ # - Only apply the synonym expansion at index time
473
+ # - Don't have the synonym filter applied search
474
+ # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
475
+ settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")
476
+
477
+ %w(word_start word_middle word_end).each do |type|
478
+ settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
479
+ end
480
+ end
481
+ end
413
482
 
414
- if below70
415
- index_type = options[:_type]
416
- index_type = index_type.call if index_type.respond_to?(:call)
417
- mappings = {index_type => mappings}
483
+ def add_search_synonyms(settings)
484
+ search_synonyms = options[:search_synonyms] || []
485
+ search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call)
486
+ if search_synonyms.is_a?(String) || search_synonyms.any?
487
+ if search_synonyms.is_a?(String)
488
+ synonym_graph = {
489
+ type: "synonym_graph",
490
+ synonyms_path: search_synonyms
491
+ }
492
+ synonym_graph[:updateable] = true unless below73?
493
+ else
494
+ synonym_graph = {
495
+ type: "synonym_graph",
496
+ # TODO confirm this is correct
497
+ synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
498
+ }
418
499
  end
500
+ settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
419
501
 
420
- mappings = mappings.symbolize_keys.deep_merge((options[:mappings] || {}).symbolize_keys)
502
+ [:searchkick_search2, :searchkick_word_search].each do |analyzer|
503
+ settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
504
+ end
421
505
  end
506
+ end
422
507
 
423
- {
424
- settings: settings,
425
- mappings: mappings
508
+ def add_wordnet(settings)
509
+ settings[:analysis][:filter][:searchkick_wordnet] = {
510
+ type: "synonym",
511
+ format: "wordnet",
512
+ synonyms_path: Searchkick.wordnet_path
426
513
  }
514
+
515
+ settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
516
+ settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
517
+
518
+ %w(word_start word_middle word_end).each do |type|
519
+ settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
520
+ end
521
+ end
522
+
523
+ def set_deep_paging(settings)
524
+ if !settings.dig(:index, :max_result_window) && !settings[:"index.max_result_window"]
525
+ settings[:index] ||= {}
526
+ settings[:index][:max_result_window] = 1_000_000_000
527
+ end
528
+ end
529
+
530
+ def index_type
531
+ @index_type ||= begin
532
+ index_type = options[:_type]
533
+ index_type = index_type.call if index_type.respond_to?(:call)
534
+ index_type
535
+ end
536
+ end
537
+
538
+ def default_type
539
+ "text"
540
+ end
541
+
542
+ def default_analyzer
543
+ :searchkick_index
544
+ end
545
+
546
+ def below62?
547
+ Searchkick.server_below?("6.2.0")
548
+ end
549
+
550
+ def below70?
551
+ Searchkick.server_below?("7.0.0")
552
+ end
553
+
554
+ def below73?
555
+ Searchkick.server_below?("7.3.0")
427
556
  end
428
557
  end
429
558
  end