searchkick 4.0.0 → 4.4.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,8 +1,10 @@
1
+ # dependencies
1
2
  require "active_support"
2
3
  require "active_support/core_ext/hash/deep_merge"
3
4
  require "elasticsearch"
4
5
  require "hashie"
5
6
 
7
+ # modules
6
8
  require "searchkick/bulk_indexer"
7
9
  require "searchkick/index"
8
10
  require "searchkick/indexer"
@@ -17,6 +19,7 @@ require "searchkick/record_indexer"
17
19
  require "searchkick/results"
18
20
  require "searchkick/version"
19
21
 
22
+ # integrations
20
23
  require "searchkick/railtie" if defined?(Rails)
21
24
  require "searchkick/logging" if defined?(ActiveSupport::Notifications)
22
25
 
@@ -27,6 +30,7 @@ module Searchkick
27
30
  autoload :ProcessQueueJob, "searchkick/process_queue_job"
28
31
  autoload :ReindexV2Job, "searchkick/reindex_v2_job"
29
32
 
33
+ # errors
30
34
  class Error < StandardError; end
31
35
  class MissingIndexError < Error; end
32
36
  class UnsupportedVersionError < Error; end
@@ -49,7 +53,7 @@ module Searchkick
49
53
 
50
54
  def self.client
51
55
  @client ||= begin
52
- require "typhoeus/adapters/faraday" if defined?(Typhoeus)
56
+ require "typhoeus/adapters/faraday" if defined?(Typhoeus) && Gem::Version.new(Faraday::VERSION) < Gem::Version.new("0.14.0")
53
57
 
54
58
  Elasticsearch::Client.new({
55
59
  url: ENV["ELASTICSEARCH_URL"],
@@ -67,7 +71,7 @@ module Searchkick
67
71
  end
68
72
 
69
73
  def self.search_timeout
70
- @search_timeout || timeout
74
+ (defined?(@search_timeout) && @search_timeout) || timeout
71
75
  end
72
76
 
73
77
  def self.server_version
@@ -112,7 +116,7 @@ module Searchkick
112
116
  end
113
117
 
114
118
  options = options.merge(block: block) if block
115
- query = Searchkick::Query.new(klass, term, options)
119
+ query = Searchkick::Query.new(klass, term, **options)
116
120
  if options[:execute] == false
117
121
  query
118
122
  else
@@ -142,7 +146,7 @@ module Searchkick
142
146
  end
143
147
  end
144
148
 
145
- def self.callbacks(value)
149
+ def self.callbacks(value = nil)
146
150
  if block_given?
147
151
  previous_value = callbacks_value
148
152
  begin
@@ -160,6 +164,7 @@ module Searchkick
160
164
 
161
165
  def self.aws_credentials=(creds)
162
166
  begin
167
+ # TODO remove in Searchkick 5 (just use aws_sigv4)
163
168
  require "faraday_middleware/aws_signers_v4"
164
169
  rescue LoadError
165
170
  require "faraday_middleware/aws_sigv4"
@@ -169,17 +174,16 @@ module Searchkick
169
174
  end
170
175
 
171
176
  def self.reindex_status(index_name)
172
- if redis
173
- batches_left = Searchkick::Index.new(index_name).batches_left
174
- {
175
- completed: batches_left == 0,
176
- batches_left: batches_left
177
- }
178
- else
179
- raise Searchkick::Error, "Redis not configured"
180
- end
177
+ raise Searchkick::Error, "Redis not configured" unless redis
178
+
179
+ batches_left = Searchkick::Index.new(index_name).batches_left
180
+ {
181
+ completed: batches_left == 0,
182
+ batches_left: batches_left
183
+ }
181
184
  end
182
185
 
186
+ # TODO use ConnectionPool::Wrapper when redis is set so this is no longer needed
183
187
  def self.with_redis
184
188
  if redis
185
189
  if redis.respond_to?(:with)
@@ -192,6 +196,10 @@ module Searchkick
192
196
  end
193
197
  end
194
198
 
199
+ def self.warn(message)
200
+ super("[searchkick] WARNING: #{message}")
201
+ end
202
+
195
203
  # private
196
204
  def self.load_records(records, ids)
197
205
  records =
@@ -245,11 +253,26 @@ module Searchkick
245
253
  }
246
254
  end
247
255
  end
256
+
257
+ # private
258
+ # methods are forwarded to base class
259
+ # this check to see if scope exists on that class
260
+ # it's a bit tricky, but this seems to work
261
+ def self.relation?(klass)
262
+ if klass.respond_to?(:current_scope)
263
+ !klass.current_scope.nil?
264
+ elsif defined?(Mongoid::Threaded)
265
+ !Mongoid::Threaded.current_scope(klass).nil?
266
+ end
267
+ end
248
268
  end
249
269
 
250
- # TODO find better ActiveModel hook
251
270
  require "active_model/callbacks"
252
271
  ActiveModel::Callbacks.include(Searchkick::Model)
272
+ # TODO use
273
+ # ActiveSupport.on_load(:mongoid) do
274
+ # Mongoid::Document::ClassMethods.include Searchkick::Model
275
+ # end
253
276
 
254
277
  ActiveSupport.on_load(:active_record) do
255
278
  extend Searchkick::Model
@@ -61,7 +61,7 @@ module Searchkick
61
61
  if records.any?
62
62
  if async
63
63
  Searchkick::BulkReindexJob.perform_later(
64
- class_name: records.first.class.name,
64
+ class_name: records.first.class.searchkick_options[:class_name],
65
65
  record_ids: records.map(&:id),
66
66
  index_name: index.name,
67
67
  method_name: method_name ? method_name.to_s : nil
@@ -87,6 +87,8 @@ module Searchkick
87
87
  # TODO expire Redis key
88
88
  primary_key = scope.primary_key
89
89
 
90
+ scope = scope.select(primary_key).except(:includes, :preload)
91
+
90
92
  starting_id =
91
93
  begin
92
94
  scope.minimum(primary_key)
@@ -139,8 +141,8 @@ module Searchkick
139
141
 
140
142
  def bulk_reindex_job(scope, batch_id, options)
141
143
  Searchkick.with_redis { |r| r.sadd(batches_key, batch_id) }
142
- Searchkick::BulkReindexJob.perform_later({
143
- class_name: scope.model_name.name,
144
+ Searchkick::BulkReindexJob.perform_later(**{
145
+ class_name: scope.searchkick_options[:class_name],
144
146
  index_name: index.name,
145
147
  batch_id: batch_id
146
148
  }.merge(options))
@@ -2,8 +2,6 @@ require "searchkick/index_options"
2
2
 
3
3
  module Searchkick
4
4
  class Index
5
- include IndexOptions
6
-
7
5
  attr_reader :name, :options
8
6
 
9
7
  def initialize(name, options = {})
@@ -12,6 +10,10 @@ module Searchkick
12
10
  @klass_document_type = {} # cache
13
11
  end
14
12
 
13
+ def index_options
14
+ IndexOptions.new(self).index_options
15
+ end
16
+
15
17
  def create(body = {})
16
18
  client.indices.create index: name, body: body
17
19
  end
@@ -47,7 +49,7 @@ module Searchkick
47
49
  end
48
50
 
49
51
  def refresh_interval
50
- settings.values.first["settings"]["index"]["refresh_interval"]
52
+ index_settings["refresh_interval"]
51
53
  end
52
54
 
53
55
  def update_settings(settings)
@@ -174,6 +176,17 @@ module Searchkick
174
176
  Searchkick.search(like_text, model: record.class, **options)
175
177
  end
176
178
 
179
+ def reload_synonyms
180
+ require "elasticsearch/xpack"
181
+ raise Error, "Requires Elasticsearch 7.3+" if Searchkick.server_below?("7.3.0")
182
+ raise Error, "Requires elasticsearch-xpack 7.8+" unless client.xpack.respond_to?(:indices)
183
+ begin
184
+ client.xpack.indices.reload_search_analyzers(index: name)
185
+ rescue Elasticsearch::Transport::Transport::Errors::MethodNotAllowed
186
+ raise Error, "Requires non-OSS version of Elasticsearch"
187
+ end
188
+ end
189
+
177
190
  # queue
178
191
 
179
192
  def reindex_queue
@@ -184,13 +197,20 @@ module Searchkick
184
197
 
185
198
  def reindex(relation, method_name, scoped:, full: false, scope: nil, **options)
186
199
  refresh = options.fetch(:refresh, !scoped)
200
+ options.delete(:refresh)
187
201
 
188
202
  if method_name
203
+ # TODO throw ArgumentError
204
+ Searchkick.warn("unsupported keywords: #{options.keys.map(&:inspect).join(", ")}") if options.any?
205
+
189
206
  # update
190
207
  import_scope(relation, method_name: method_name, scope: scope)
191
208
  self.refresh if refresh
192
209
  true
193
210
  elsif scoped && !full
211
+ # TODO throw ArgumentError
212
+ Searchkick.warn("unsupported keywords: #{options.keys.map(&:inspect).join(", ")}") if options.any?
213
+
194
214
  # reindex association
195
215
  import_scope(relation, scope: scope)
196
216
  self.refresh if refresh
@@ -249,6 +269,11 @@ module Searchkick
249
269
  end
250
270
  end
251
271
 
272
+ # private
273
+ def uuid
274
+ index_settings["uuid"]
275
+ end
276
+
252
277
  protected
253
278
 
254
279
  def client
@@ -259,6 +284,14 @@ module Searchkick
259
284
  @bulk_indexer ||= BulkIndexer.new(self)
260
285
  end
261
286
 
287
+ def index_settings
288
+ settings.values.first["settings"]["index"]
289
+ end
290
+
291
+ def import_before_promotion(index, relation, **import_options)
292
+ index.import_scope(relation, **import_options)
293
+ end
294
+
262
295
  # https://gist.github.com/jarosan/3124884
263
296
  # http://www.elasticsearch.org/blog/changing-mapping-with-zero-downtime/
264
297
  def reindex_scope(relation, import: true, resume: false, retain: false, async: false, refresh_interval: nil, scope: nil)
@@ -281,14 +314,16 @@ module Searchkick
281
314
  scope: scope
282
315
  }
283
316
 
317
+ uuid = index.uuid
318
+
284
319
  # check if alias exists
285
320
  alias_exists = alias_exists?
286
321
  if alias_exists
287
- # import before promotion
288
- index.import_scope(relation, **import_options) if import
322
+ import_before_promotion(index, relation, **import_options) if import
289
323
 
290
324
  # get existing indices to remove
291
325
  unless async
326
+ check_uuid(uuid, index.uuid)
292
327
  promote(index.name, update_refresh_interval: !refresh_interval.nil?)
293
328
  clean_indices unless retain
294
329
  end
@@ -313,6 +348,7 @@ module Searchkick
313
348
  # already promoted if alias didn't exist
314
349
  if alias_exists
315
350
  puts "Jobs complete. Promoting..."
351
+ check_uuid(uuid, index.uuid)
316
352
  promote(index.name, update_refresh_interval: !refresh_interval.nil?)
317
353
  end
318
354
  clean_indices unless retain
@@ -331,5 +367,15 @@ module Searchkick
331
367
 
332
368
  raise e
333
369
  end
370
+
371
+ # safety check
372
+ # still a chance for race condition since its called before promotion
373
+ # ideal is for user to disable automatic index creation
374
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#index-creation
375
+ def check_uuid(old_uuid, new_uuid)
376
+ if old_uuid != new_uuid
377
+ raise Searchkick::Error, "Safety check failed - only run one Model.reindex per model at a time"
378
+ end
379
+ end
334
380
  end
335
381
  end
@@ -1,429 +1,558 @@
1
1
  module Searchkick
2
- module IndexOptions
2
+ class IndexOptions
3
+ attr_reader :options
4
+
5
+ def initialize(index)
6
+ @options = index.options
7
+ end
8
+
3
9
  def index_options
4
- options = @options
5
- language = options[:language]
6
- language = language.call if language.respond_to?(:call)
10
+ custom_mapping = options[:mappings] || {}
11
+ if below70? && custom_mapping.keys.map(&:to_sym).include?(:properties)
12
+ # add type
13
+ custom_mapping = {index_type => custom_mapping}
14
+ end
7
15
 
8
16
  if options[:mappings] && !options[:merge_mappings]
9
17
  settings = options[:settings] || {}
10
- mappings = options[:mappings]
18
+ mappings = custom_mapping
11
19
  else
12
- below62 = Searchkick.server_below?("6.2.0")
13
- below70 = Searchkick.server_below?("7.0.0")
14
-
15
- default_type = "text"
16
- default_analyzer = :searchkick_index
17
- keyword_mapping = {type: "keyword"}
18
-
19
- all = options.key?(:_all) ? options[:_all] : false
20
- index_true_value = true
21
- index_false_value = false
22
-
23
- keyword_mapping[:ignore_above] = options[:ignore_above] || 30000
24
-
25
- settings = {
26
- analysis: {
27
- analyzer: {
28
- searchkick_keyword: {
29
- type: "custom",
30
- tokenizer: "keyword",
31
- filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
32
- },
33
- default_analyzer => {
34
- type: "custom",
35
- # character filters -> tokenizer -> token filters
36
- # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
37
- char_filter: ["ampersand"],
38
- tokenizer: "standard",
39
- # synonym should come last, after stemming and shingle
40
- # shingle must come before searchkick_stemmer
41
- filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
42
- },
43
- searchkick_search: {
44
- type: "custom",
45
- char_filter: ["ampersand"],
46
- tokenizer: "standard",
47
- filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
48
- },
49
- searchkick_search2: {
50
- type: "custom",
51
- char_filter: ["ampersand"],
52
- tokenizer: "standard",
53
- filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
54
- },
55
- # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
56
- searchkick_autocomplete_search: {
57
- type: "custom",
58
- tokenizer: "keyword",
59
- filter: ["lowercase", "asciifolding"]
60
- },
61
- searchkick_word_search: {
62
- type: "custom",
63
- tokenizer: "standard",
64
- filter: ["lowercase", "asciifolding"]
65
- },
66
- searchkick_suggest_index: {
67
- type: "custom",
68
- tokenizer: "standard",
69
- filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
70
- },
71
- searchkick_text_start_index: {
72
- type: "custom",
73
- tokenizer: "keyword",
74
- filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
75
- },
76
- searchkick_text_middle_index: {
77
- type: "custom",
78
- tokenizer: "keyword",
79
- filter: ["lowercase", "asciifolding", "searchkick_ngram"]
80
- },
81
- searchkick_text_end_index: {
82
- type: "custom",
83
- tokenizer: "keyword",
84
- filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
85
- },
86
- searchkick_word_start_index: {
87
- type: "custom",
88
- tokenizer: "standard",
89
- filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
90
- },
91
- searchkick_word_middle_index: {
92
- type: "custom",
93
- tokenizer: "standard",
94
- filter: ["lowercase", "asciifolding", "searchkick_ngram"]
95
- },
96
- searchkick_word_end_index: {
97
- type: "custom",
98
- tokenizer: "standard",
99
- filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
100
- }
101
- },
102
- filter: {
103
- searchkick_index_shingle: {
104
- type: "shingle",
105
- token_separator: ""
106
- },
107
- # lucky find http://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
108
- searchkick_search_shingle: {
109
- type: "shingle",
110
- token_separator: "",
111
- output_unigrams: false,
112
- output_unigrams_if_no_shingles: true
113
- },
114
- searchkick_suggest_shingle: {
115
- type: "shingle",
116
- max_shingle_size: 5
117
- },
118
- searchkick_edge_ngram: {
119
- type: "edgeNGram",
120
- min_gram: 1,
121
- max_gram: 50
122
- },
123
- searchkick_ngram: {
124
- type: "nGram",
125
- min_gram: 1,
126
- max_gram: 50
127
- },
128
- searchkick_stemmer: {
129
- # use stemmer if language is lowercase, snowball otherwise
130
- type: language == language.to_s.downcase ? "stemmer" : "snowball",
131
- language: language || "English"
132
- }
133
- },
134
- char_filter: {
135
- # https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
136
- # &_to_and
137
- ampersand: {
138
- type: "mapping",
139
- mappings: ["&=> and "]
140
- }
141
- }
142
- }
143
- }
20
+ settings = generate_settings
21
+ mappings = generate_mappings.symbolize_keys.deep_merge(custom_mapping.symbolize_keys)
22
+ end
23
+
24
+ set_deep_paging(settings) if options[:deep_paging]
25
+
26
+ {
27
+ settings: settings,
28
+ mappings: mappings
29
+ }
30
+ end
144
31
 
145
- stem = options[:stem]
32
+ def generate_settings
33
+ language = options[:language]
34
+ language = language.call if language.respond_to?(:call)
146
35
 
147
- case language
148
- when "chinese"
149
- settings[:analysis][:analyzer].merge!(
36
+ settings = {
37
+ analysis: {
38
+ analyzer: {
39
+ searchkick_keyword: {
40
+ type: "custom",
41
+ tokenizer: "keyword",
42
+ filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
43
+ },
150
44
  default_analyzer => {
151
- type: "ik_smart"
45
+ type: "custom",
46
+ # character filters -> tokenizer -> token filters
47
+ # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
48
+ char_filter: ["ampersand"],
49
+ tokenizer: "standard",
50
+ # synonym should come last, after stemming and shingle
51
+ # shingle must come before searchkick_stemmer
52
+ filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
152
53
  },
153
54
  searchkick_search: {
154
- type: "ik_smart"
55
+ type: "custom",
56
+ char_filter: ["ampersand"],
57
+ tokenizer: "standard",
58
+ filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
155
59
  },
156
60
  searchkick_search2: {
157
- type: "ik_max_word"
158
- }
159
- )
160
-
161
- stem = false
162
- when "japanese"
163
- settings[:analysis][:analyzer].merge!(
164
- default_analyzer => {
165
- type: "kuromoji"
61
+ type: "custom",
62
+ char_filter: ["ampersand"],
63
+ tokenizer: "standard",
64
+ filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
166
65
  },
167
- searchkick_search: {
168
- type: "kuromoji"
66
+ # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
67
+ searchkick_autocomplete_search: {
68
+ type: "custom",
69
+ tokenizer: "keyword",
70
+ filter: ["lowercase", "asciifolding"]
169
71
  },
170
- searchkick_search2: {
171
- type: "kuromoji"
172
- }
173
- )
174
-
175
- stem = false
176
- when "korean"
177
- settings[:analysis][:analyzer].merge!(
178
- default_analyzer => {
179
- type: "openkoreantext-analyzer"
72
+ searchkick_word_search: {
73
+ type: "custom",
74
+ tokenizer: "standard",
75
+ filter: ["lowercase", "asciifolding"]
180
76
  },
181
- searchkick_search: {
182
- type: "openkoreantext-analyzer"
77
+ searchkick_suggest_index: {
78
+ type: "custom",
79
+ tokenizer: "standard",
80
+ filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
183
81
  },
184
- searchkick_search2: {
185
- type: "openkoreantext-analyzer"
186
- }
187
- )
188
-
189
- stem = false
190
- when "vietnamese"
191
- settings[:analysis][:analyzer].merge!(
192
- default_analyzer => {
193
- type: "vi_analyzer"
82
+ searchkick_text_start_index: {
83
+ type: "custom",
84
+ tokenizer: "keyword",
85
+ filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
194
86
  },
195
- searchkick_search: {
196
- type: "vi_analyzer"
87
+ searchkick_text_middle_index: {
88
+ type: "custom",
89
+ tokenizer: "keyword",
90
+ filter: ["lowercase", "asciifolding", "searchkick_ngram"]
197
91
  },
198
- searchkick_search2: {
199
- type: "vi_analyzer"
92
+ searchkick_text_end_index: {
93
+ type: "custom",
94
+ tokenizer: "keyword",
95
+ filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
96
+ },
97
+ searchkick_word_start_index: {
98
+ type: "custom",
99
+ tokenizer: "standard",
100
+ filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
101
+ },
102
+ searchkick_word_middle_index: {
103
+ type: "custom",
104
+ tokenizer: "standard",
105
+ filter: ["lowercase", "asciifolding", "searchkick_ngram"]
106
+ },
107
+ searchkick_word_end_index: {
108
+ type: "custom",
109
+ tokenizer: "standard",
110
+ filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
200
111
  }
201
- )
202
-
203
- stem = false
204
- when "polish", "ukrainian", "smartcn"
205
- settings[:analysis][:analyzer].merge!(
206
- default_analyzer => {
207
- type: language
112
+ },
113
+ filter: {
114
+ searchkick_index_shingle: {
115
+ type: "shingle",
116
+ token_separator: ""
208
117
  },
209
- searchkick_search: {
210
- type: language
118
+ # lucky find https://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
119
+ searchkick_search_shingle: {
120
+ type: "shingle",
121
+ token_separator: "",
122
+ output_unigrams: false,
123
+ output_unigrams_if_no_shingles: true
211
124
  },
212
- searchkick_search2: {
213
- type: language
125
+ searchkick_suggest_shingle: {
126
+ type: "shingle",
127
+ max_shingle_size: 5
128
+ },
129
+ searchkick_edge_ngram: {
130
+ type: "edge_ngram",
131
+ min_gram: 1,
132
+ max_gram: 50
133
+ },
134
+ searchkick_ngram: {
135
+ type: "ngram",
136
+ min_gram: 1,
137
+ max_gram: 50
138
+ },
139
+ searchkick_stemmer: {
140
+ # use stemmer if language is lowercase, snowball otherwise
141
+ type: language == language.to_s.downcase ? "stemmer" : "snowball",
142
+ language: language || "English"
143
+ }
144
+ },
145
+ char_filter: {
146
+ # https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
147
+ # &_to_and
148
+ ampersand: {
149
+ type: "mapping",
150
+ mappings: ["&=> and "]
214
151
  }
215
- )
152
+ }
153
+ }
154
+ }
216
155
 
217
- stem = false
218
- end
156
+ update_language(settings, language)
157
+ update_stemming(settings)
219
158
 
220
- if Searchkick.env == "test"
221
- settings[:number_of_shards] = 1
222
- settings[:number_of_replicas] = 0
223
- end
159
+ if Searchkick.env == "test"
160
+ settings[:number_of_shards] = 1
161
+ settings[:number_of_replicas] = 0
162
+ end
224
163
 
225
- if options[:similarity]
226
- settings[:similarity] = {default: {type: options[:similarity]}}
227
- end
164
+ # TODO remove in Searchkick 5 (classic no longer supported)
165
+ if options[:similarity]
166
+ settings[:similarity] = {default: {type: options[:similarity]}}
167
+ end
228
168
 
229
- unless below62
230
- settings[:index] = {
231
- max_ngram_diff: 49,
232
- max_shingle_diff: 4
233
- }
234
- end
169
+ unless below62?
170
+ settings[:index] = {
171
+ max_ngram_diff: 49,
172
+ max_shingle_diff: 4
173
+ }
174
+ end
235
175
 
236
- if options[:case_sensitive]
237
- settings[:analysis][:analyzer].each do |_, analyzer|
238
- analyzer[:filter].delete("lowercase")
239
- end
176
+ if options[:case_sensitive]
177
+ settings[:analysis][:analyzer].each do |_, analyzer|
178
+ analyzer[:filter].delete("lowercase")
240
179
  end
180
+ end
241
181
 
242
- if stem == false
243
- settings[:analysis][:filter].delete(:searchkick_stemmer)
244
- settings[:analysis][:analyzer].each do |_, analyzer|
245
- analyzer[:filter].delete("searchkick_stemmer") if analyzer[:filter]
246
- end
247
- end
182
+ # TODO do this last in Searchkick 5
183
+ settings = settings.symbolize_keys.deep_merge((options[:settings] || {}).symbolize_keys)
248
184
 
249
- settings = settings.symbolize_keys.deep_merge((options[:settings] || {}).symbolize_keys)
185
+ add_synonyms(settings)
186
+ add_search_synonyms(settings)
187
+ # TODO remove in Searchkick 5
188
+ add_wordnet(settings) if options[:wordnet]
250
189
 
251
- # synonyms
252
- synonyms = options[:synonyms] || []
190
+ if options[:special_characters] == false
191
+ settings[:analysis][:analyzer].each_value do |analyzer_settings|
192
+ analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
193
+ end
194
+ end
253
195
 
254
- synonyms = synonyms.call if synonyms.respond_to?(:call)
196
+ settings
197
+ end
255
198
 
256
- if synonyms.any?
257
- settings[:analysis][:filter][:searchkick_synonym] = {
258
- type: "synonym",
259
- # only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently
260
- synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)
199
+ def update_language(settings, language)
200
+ case language
201
+ when "chinese"
202
+ settings[:analysis][:analyzer].merge!(
203
+ default_analyzer => {
204
+ type: "ik_smart"
205
+ },
206
+ searchkick_search: {
207
+ type: "ik_smart"
208
+ },
209
+ searchkick_search2: {
210
+ type: "ik_max_word"
261
211
  }
262
- # choosing a place for the synonym filter when stemming is not easy
263
- # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
264
- # TODO use a snowball stemmer on synonyms when creating the token filter
265
-
266
- # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
267
- # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
268
- # - Only apply the synonym expansion at index time
269
- # - Don't have the synonym filter applied search
270
- # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
271
- settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")
272
-
273
- %w(word_start word_middle word_end).each do |type|
274
- settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
275
- end
276
- end
277
-
278
- if options[:wordnet]
279
- settings[:analysis][:filter][:searchkick_wordnet] = {
280
- type: "synonym",
281
- format: "wordnet",
282
- synonyms_path: Searchkick.wordnet_path
212
+ )
213
+ when "chinese2", "smartcn"
214
+ settings[:analysis][:analyzer].merge!(
215
+ default_analyzer => {
216
+ type: "smartcn"
217
+ },
218
+ searchkick_search: {
219
+ type: "smartcn"
220
+ },
221
+ searchkick_search2: {
222
+ type: "smartcn"
223
+ }
224
+ )
225
+ when "japanese"
226
+ settings[:analysis][:analyzer].merge!(
227
+ default_analyzer => {
228
+ type: "kuromoji"
229
+ },
230
+ searchkick_search: {
231
+ type: "kuromoji"
232
+ },
233
+ searchkick_search2: {
234
+ type: "kuromoji"
283
235
  }
236
+ )
237
+ when "korean"
238
+ settings[:analysis][:analyzer].merge!(
239
+ default_analyzer => {
240
+ type: "openkoreantext-analyzer"
241
+ },
242
+ searchkick_search: {
243
+ type: "openkoreantext-analyzer"
244
+ },
245
+ searchkick_search2: {
246
+ type: "openkoreantext-analyzer"
247
+ }
248
+ )
249
+ when "korean2"
250
+ settings[:analysis][:analyzer].merge!(
251
+ default_analyzer => {
252
+ type: "nori"
253
+ },
254
+ searchkick_search: {
255
+ type: "nori"
256
+ },
257
+ searchkick_search2: {
258
+ type: "nori"
259
+ }
260
+ )
261
+ when "vietnamese"
262
+ settings[:analysis][:analyzer].merge!(
263
+ default_analyzer => {
264
+ type: "vi_analyzer"
265
+ },
266
+ searchkick_search: {
267
+ type: "vi_analyzer"
268
+ },
269
+ searchkick_search2: {
270
+ type: "vi_analyzer"
271
+ }
272
+ )
273
+ when "polish", "ukrainian"
274
+ settings[:analysis][:analyzer].merge!(
275
+ default_analyzer => {
276
+ type: language
277
+ },
278
+ searchkick_search: {
279
+ type: language
280
+ },
281
+ searchkick_search2: {
282
+ type: language
283
+ }
284
+ )
285
+ end
286
+ end
284
287
 
285
- settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
286
- settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
288
+ def update_stemming(settings)
289
+ stem = options[:stem]
287
290
 
288
- %w(word_start word_middle word_end).each do |type|
289
- settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
290
- end
291
- end
291
+ # language analyzer used
292
+ stem = false if settings[:analysis][:analyzer][default_analyzer][:type] != "custom"
292
293
 
293
- if options[:special_characters] == false
294
- settings[:analysis][:analyzer].each_value do |analyzer_settings|
295
- analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
296
- end
294
+ if stem == false
295
+ settings[:analysis][:filter].delete(:searchkick_stemmer)
296
+ settings[:analysis][:analyzer].each do |_, analyzer|
297
+ analyzer[:filter].delete("searchkick_stemmer") if analyzer[:filter]
297
298
  end
299
+ end
298
300
 
299
- mapping = {}
301
+ if options[:stemmer_override]
302
+ stemmer_override = {
303
+ type: "stemmer_override"
304
+ }
305
+ if options[:stemmer_override].is_a?(String)
306
+ stemmer_override[:rules_path] = options[:stemmer_override]
307
+ else
308
+ stemmer_override[:rules] = options[:stemmer_override]
309
+ end
310
+ settings[:analysis][:filter][:searchkick_stemmer_override] = stemmer_override
300
311
 
301
- # conversions
302
- Array(options[:conversions]).each do |conversions_field|
303
- mapping[conversions_field] = {
304
- type: "nested",
305
- properties: {
306
- query: {type: default_type, analyzer: "searchkick_keyword"},
307
- count: {type: "integer"}
308
- }
309
- }
312
+ settings[:analysis][:analyzer].each do |_, analyzer|
313
+ stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
314
+ analyzer[:filter].insert(stemmer_index, "searchkick_stemmer_override") if stemmer_index
310
315
  end
316
+ end
311
317
 
312
- mapping_options = Hash[
313
- [:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
314
- .map { |type| [type, (options[type] || []).map(&:to_s)] }
315
- ]
318
+ if options[:stem_exclusion]
319
+ settings[:analysis][:filter][:searchkick_stem_exclusion] = {
320
+ type: "keyword_marker",
321
+ keywords: options[:stem_exclusion]
322
+ }
316
323
 
317
- word = options[:word] != false && (!options[:match] || options[:match] == :word)
324
+ settings[:analysis][:analyzer].each do |_, analyzer|
325
+ stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
326
+ analyzer[:filter].insert(stemmer_index, "searchkick_stem_exclusion") if stemmer_index
327
+ end
328
+ end
329
+ end
318
330
 
319
- mapping_options[:searchable].delete("_all")
331
+ def generate_mappings
332
+ mapping = {}
320
333
 
321
- analyzed_field_options = {type: default_type, index: index_true_value, analyzer: default_analyzer}
334
+ keyword_mapping = {type: "keyword"}
335
+ keyword_mapping[:ignore_above] = options[:ignore_above] || 30000
322
336
 
323
- mapping_options.values.flatten.uniq.each do |field|
324
- fields = {}
337
+ # conversions
338
+ Array(options[:conversions]).each do |conversions_field|
339
+ mapping[conversions_field] = {
340
+ type: "nested",
341
+ properties: {
342
+ query: {type: default_type, analyzer: "searchkick_keyword"},
343
+ count: {type: "integer"}
344
+ }
345
+ }
346
+ end
325
347
 
326
- if options.key?(:filterable) && !mapping_options[:filterable].include?(field)
327
- fields[field] = {type: default_type, index: index_false_value}
328
- else
329
- fields[field] = keyword_mapping
330
- end
348
+ mapping_options = Hash[
349
+ [:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
350
+ .map { |type| [type, (options[type] || []).map(&:to_s)] }
351
+ ]
331
352
 
332
- if !options[:searchable] || mapping_options[:searchable].include?(field)
333
- if word
334
- fields[:analyzed] = analyzed_field_options
353
+ word = options[:word] != false && (!options[:match] || options[:match] == :word)
335
354
 
336
- if mapping_options[:highlight].include?(field)
337
- fields[:analyzed][:term_vector] = "with_positions_offsets"
338
- end
339
- end
355
+ mapping_options[:searchable].delete("_all")
340
356
 
341
- mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f|
342
- if options[:match] == type || f.include?(field)
343
- fields[type] = {type: default_type, index: index_true_value, analyzer: "searchkick_#{type}_index"}
344
- end
345
- end
346
- end
357
+ analyzed_field_options = {type: default_type, index: true, analyzer: default_analyzer}
347
358
 
348
- mapping[field] = fields[field].merge(fields: fields.except(field))
349
- end
359
+ mapping_options.values.flatten.uniq.each do |field|
360
+ fields = {}
350
361
 
351
- (options[:locations] || []).map(&:to_s).each do |field|
352
- mapping[field] = {
353
- type: "geo_point"
354
- }
362
+ if options.key?(:filterable) && !mapping_options[:filterable].include?(field)
363
+ fields[field] = {type: default_type, index: false}
364
+ else
365
+ fields[field] = keyword_mapping
355
366
  end
356
367
 
357
- options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array)
358
- (options[:geo_shape] || {}).each do |field, shape_options|
359
- mapping[field] = shape_options.merge(type: "geo_shape")
360
- end
368
+ if !options[:searchable] || mapping_options[:searchable].include?(field)
369
+ if word
370
+ fields[:analyzed] = analyzed_field_options
361
371
 
362
- if options[:inheritance]
363
- mapping[:type] = keyword_mapping
364
- end
372
+ if mapping_options[:highlight].include?(field)
373
+ fields[:analyzed][:term_vector] = "with_positions_offsets"
374
+ end
375
+ end
365
376
 
366
- routing = {}
367
- if options[:routing]
368
- routing = {required: true}
369
- unless options[:routing] == true
370
- routing[:path] = options[:routing].to_s
377
+ mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f|
378
+ if options[:match] == type || f.include?(field)
379
+ fields[type] = {type: default_type, index: true, analyzer: "searchkick_#{type}_index"}
380
+ end
371
381
  end
372
382
  end
373
383
 
374
- dynamic_fields = {
375
- # analyzed field must be the default field for include_in_all
376
- # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
377
- # however, we can include the not_analyzed field in _all
378
- # and the _all index analyzer will take care of it
379
- "{name}" => keyword_mapping
384
+ mapping[field] = fields[field].merge(fields: fields.except(field))
385
+ end
386
+
387
+ (options[:locations] || []).map(&:to_s).each do |field|
388
+ mapping[field] = {
389
+ type: "geo_point"
380
390
  }
391
+ end
381
392
 
382
- if options.key?(:filterable)
383
- dynamic_fields["{name}"] = {type: default_type, index: index_false_value}
384
- end
393
+ options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array)
394
+ (options[:geo_shape] || {}).each do |field, shape_options|
395
+ mapping[field] = shape_options.merge(type: "geo_shape")
396
+ end
385
397
 
386
- unless options[:searchable]
387
- if options[:match] && options[:match] != :word
388
- dynamic_fields[options[:match]] = {type: default_type, index: index_true_value, analyzer: "searchkick_#{options[:match]}_index"}
389
- end
398
+ if options[:inheritance]
399
+ mapping[:type] = keyword_mapping
400
+ end
390
401
 
391
- if word
392
- dynamic_fields[:analyzed] = analyzed_field_options
393
- end
402
+ routing = {}
403
+ if options[:routing]
404
+ routing = {required: true}
405
+ unless options[:routing] == true
406
+ routing[:path] = options[:routing].to_s
394
407
  end
408
+ end
395
409
 
410
+ dynamic_fields = {
411
+ # analyzed field must be the default field for include_in_all
396
412
  # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
397
- multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
398
-
399
- mappings = {
400
- properties: mapping,
401
- _routing: routing,
402
- # https://gist.github.com/kimchy/2898285
403
- dynamic_templates: [
404
- {
405
- string_template: {
406
- match: "*",
407
- match_mapping_type: "string",
408
- mapping: multi_field
409
- }
413
+ # however, we can include the not_analyzed field in _all
414
+ # and the _all index analyzer will take care of it
415
+ "{name}" => keyword_mapping
416
+ }
417
+
418
+ if options.key?(:filterable)
419
+ dynamic_fields["{name}"] = {type: default_type, index: false}
420
+ end
421
+
422
+ unless options[:searchable]
423
+ if options[:match] && options[:match] != :word
424
+ dynamic_fields[options[:match]] = {type: default_type, index: true, analyzer: "searchkick_#{options[:match]}_index"}
425
+ end
426
+
427
+ if word
428
+ dynamic_fields[:analyzed] = analyzed_field_options
429
+ end
430
+ end
431
+
432
+ # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
433
+ multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
434
+
435
+ mappings = {
436
+ properties: mapping,
437
+ _routing: routing,
438
+ # https://gist.github.com/kimchy/2898285
439
+ dynamic_templates: [
440
+ {
441
+ string_template: {
442
+ match: "*",
443
+ match_mapping_type: "string",
444
+ mapping: multi_field
410
445
  }
411
- ]
446
+ }
447
+ ]
448
+ }
449
+
450
+ if below70?
451
+ mappings = {index_type => mappings}
452
+ end
453
+
454
+ mappings
455
+ end
456
+
457
+ def add_synonyms(settings)
458
+ synonyms = options[:synonyms] || []
459
+ synonyms = synonyms.call if synonyms.respond_to?(:call)
460
+ if synonyms.any?
461
+ settings[:analysis][:filter][:searchkick_synonym] = {
462
+ type: "synonym",
463
+ # only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently
464
+ synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)
412
465
  }
466
+ # choosing a place for the synonym filter when stemming is not easy
467
+ # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
468
+ # TODO use a snowball stemmer on synonyms when creating the token filter
469
+
470
+ # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
471
+ # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
472
+ # - Only apply the synonym expansion at index time
473
+ # - Don't have the synonym filter applied search
474
+ # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
475
+ settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")
476
+
477
+ %w(word_start word_middle word_end).each do |type|
478
+ settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
479
+ end
480
+ end
481
+ end
413
482
 
414
- if below70
415
- index_type = options[:_type]
416
- index_type = index_type.call if index_type.respond_to?(:call)
417
- mappings = {index_type => mappings}
483
+ def add_search_synonyms(settings)
484
+ search_synonyms = options[:search_synonyms] || []
485
+ search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call)
486
+ if search_synonyms.is_a?(String) || search_synonyms.any?
487
+ if search_synonyms.is_a?(String)
488
+ synonym_graph = {
489
+ type: "synonym_graph",
490
+ synonyms_path: search_synonyms
491
+ }
492
+ synonym_graph[:updateable] = true unless below73?
493
+ else
494
+ synonym_graph = {
495
+ type: "synonym_graph",
496
+ # TODO confirm this is correct
497
+ synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
498
+ }
418
499
  end
500
+ settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
419
501
 
420
- mappings = mappings.symbolize_keys.deep_merge((options[:mappings] || {}).symbolize_keys)
502
+ [:searchkick_search2, :searchkick_word_search].each do |analyzer|
503
+ settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
504
+ end
421
505
  end
506
+ end
422
507
 
423
- {
424
- settings: settings,
425
- mappings: mappings
508
+ def add_wordnet(settings)
509
+ settings[:analysis][:filter][:searchkick_wordnet] = {
510
+ type: "synonym",
511
+ format: "wordnet",
512
+ synonyms_path: Searchkick.wordnet_path
426
513
  }
514
+
515
+ settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
516
+ settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
517
+
518
+ %w(word_start word_middle word_end).each do |type|
519
+ settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
520
+ end
521
+ end
522
+
523
+ def set_deep_paging(settings)
524
+ if !settings.dig(:index, :max_result_window) && !settings[:"index.max_result_window"]
525
+ settings[:index] ||= {}
526
+ settings[:index][:max_result_window] = 1_000_000_000
527
+ end
528
+ end
529
+
530
+ def index_type
531
+ @index_type ||= begin
532
+ index_type = options[:_type]
533
+ index_type = index_type.call if index_type.respond_to?(:call)
534
+ index_type
535
+ end
536
+ end
537
+
538
+ def default_type
539
+ "text"
540
+ end
541
+
542
+ def default_analyzer
543
+ :searchkick_index
544
+ end
545
+
546
+ def below62?
547
+ Searchkick.server_below?("6.2.0")
548
+ end
549
+
550
+ def below70?
551
+ Searchkick.server_below?("7.0.0")
552
+ end
553
+
554
+ def below73?
555
+ Searchkick.server_below?("7.3.0")
427
556
  end
428
557
  end
429
558
  end