searchkick 1.3.4 → 1.3.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,359 @@
1
+ module Searchkick
2
+ module IndexOptions
3
+ def index_options
4
+ options = @options
5
+ language = options[:language]
6
+ language = language.call if language.respond_to?(:call)
7
+
8
+ if options[:mappings] && !options[:merge_mappings]
9
+ settings = options[:settings] || {}
10
+ mappings = options[:mappings]
11
+ else
12
+ below22 = Searchkick.server_below?("2.2.0")
13
+ below50 = Searchkick.server_below?("5.0.0-alpha1")
14
+ default_type = below50 ? "string" : "text"
15
+ default_analyzer = below50 ? :default_index : :default
16
+ keyword_mapping =
17
+ if below50
18
+ {
19
+ type: default_type,
20
+ index: "not_analyzed"
21
+ }
22
+ else
23
+ {
24
+ type: "keyword"
25
+ }
26
+ end
27
+
28
+ keyword_mapping[:ignore_above] = 256 unless below22
29
+
30
+ settings = {
31
+ analysis: {
32
+ analyzer: {
33
+ searchkick_keyword: {
34
+ type: "custom",
35
+ tokenizer: "keyword",
36
+ filter: ["lowercase"] + (options[:stem_conversions] == false ? [] : ["searchkick_stemmer"])
37
+ },
38
+ default_analyzer => {
39
+ type: "custom",
40
+ # character filters -> tokenizer -> token filters
41
+ # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
42
+ char_filter: ["ampersand"],
43
+ tokenizer: "standard",
44
+ # synonym should come last, after stemming and shingle
45
+ # shingle must come before searchkick_stemmer
46
+ filter: ["standard", "lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
47
+ },
48
+ searchkick_search: {
49
+ type: "custom",
50
+ char_filter: ["ampersand"],
51
+ tokenizer: "standard",
52
+ filter: ["standard", "lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
53
+ },
54
+ searchkick_search2: {
55
+ type: "custom",
56
+ char_filter: ["ampersand"],
57
+ tokenizer: "standard",
58
+ filter: ["standard", "lowercase", "asciifolding", "searchkick_stemmer"]
59
+ },
60
+ # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
61
+ searchkick_autocomplete_index: {
62
+ type: "custom",
63
+ tokenizer: "searchkick_autocomplete_ngram",
64
+ filter: ["lowercase", "asciifolding"]
65
+ },
66
+ searchkick_autocomplete_search: {
67
+ type: "custom",
68
+ tokenizer: "keyword",
69
+ filter: ["lowercase", "asciifolding"]
70
+ },
71
+ searchkick_word_search: {
72
+ type: "custom",
73
+ tokenizer: "standard",
74
+ filter: ["lowercase", "asciifolding"]
75
+ },
76
+ searchkick_suggest_index: {
77
+ type: "custom",
78
+ tokenizer: "standard",
79
+ filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
80
+ },
81
+ searchkick_text_start_index: {
82
+ type: "custom",
83
+ tokenizer: "keyword",
84
+ filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
85
+ },
86
+ searchkick_text_middle_index: {
87
+ type: "custom",
88
+ tokenizer: "keyword",
89
+ filter: ["lowercase", "asciifolding", "searchkick_ngram"]
90
+ },
91
+ searchkick_text_end_index: {
92
+ type: "custom",
93
+ tokenizer: "keyword",
94
+ filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
95
+ },
96
+ searchkick_word_start_index: {
97
+ type: "custom",
98
+ tokenizer: "standard",
99
+ filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
100
+ },
101
+ searchkick_word_middle_index: {
102
+ type: "custom",
103
+ tokenizer: "standard",
104
+ filter: ["lowercase", "asciifolding", "searchkick_ngram"]
105
+ },
106
+ searchkick_word_end_index: {
107
+ type: "custom",
108
+ tokenizer: "standard",
109
+ filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
110
+ }
111
+ },
112
+ filter: {
113
+ searchkick_index_shingle: {
114
+ type: "shingle",
115
+ token_separator: ""
116
+ },
117
+ # lucky find http://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
118
+ searchkick_search_shingle: {
119
+ type: "shingle",
120
+ token_separator: "",
121
+ output_unigrams: false,
122
+ output_unigrams_if_no_shingles: true
123
+ },
124
+ searchkick_suggest_shingle: {
125
+ type: "shingle",
126
+ max_shingle_size: 5
127
+ },
128
+ searchkick_edge_ngram: {
129
+ type: "edgeNGram",
130
+ min_gram: 1,
131
+ max_gram: 50
132
+ },
133
+ searchkick_ngram: {
134
+ type: "nGram",
135
+ min_gram: 1,
136
+ max_gram: 50
137
+ },
138
+ searchkick_stemmer: {
139
+ # use stemmer if language is lowercase, snowball otherwise
140
+ # TODO deprecate language option in favor of stemmer
141
+ type: language == language.to_s.downcase ? "stemmer" : "snowball",
142
+ language: language || "English"
143
+ }
144
+ },
145
+ char_filter: {
146
+ # https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
147
+ # &_to_and
148
+ ampersand: {
149
+ type: "mapping",
150
+ mappings: ["&=> and "]
151
+ }
152
+ },
153
+ tokenizer: {
154
+ searchkick_autocomplete_ngram: {
155
+ type: "edgeNGram",
156
+ min_gram: 1,
157
+ max_gram: 50
158
+ }
159
+ }
160
+ }
161
+ }
162
+
163
+ if Searchkick.env == "test"
164
+ settings[:number_of_shards] = 1
165
+ settings[:number_of_replicas] = 0
166
+ end
167
+
168
+ if options[:similarity]
169
+ settings[:similarity] = {default: {type: options[:similarity]}}
170
+ end
171
+
172
+ settings.deep_merge!(options[:settings] || {})
173
+
174
+ # synonyms
175
+ synonyms = options[:synonyms] || []
176
+
177
+ synonyms = synonyms.call if synonyms.respond_to?(:call)
178
+
179
+ if synonyms.any?
180
+ settings[:analysis][:filter][:searchkick_synonym] = {
181
+ type: "synonym",
182
+ synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.join(",") }
183
+ }
184
+ # choosing a place for the synonym filter when stemming is not easy
185
+ # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
186
+ # TODO use a snowball stemmer on synonyms when creating the token filter
187
+
188
+ # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
189
+ # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
190
+ # - Only apply the synonym expansion at index time
191
+ # - Don't have the synonym filter applied search
192
+ # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
193
+ settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_synonym")
194
+ settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_synonym"
195
+
196
+ %w(word_start word_middle word_end).each do |type|
197
+ settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
198
+ end
199
+ end
200
+
201
+ if options[:wordnet]
202
+ settings[:analysis][:filter][:searchkick_wordnet] = {
203
+ type: "synonym",
204
+ format: "wordnet",
205
+ synonyms_path: Searchkick.wordnet_path
206
+ }
207
+
208
+ settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
209
+ settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
210
+
211
+ %w(word_start word_middle word_end).each do |type|
212
+ settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
213
+ end
214
+ end
215
+
216
+ if options[:special_characters] == false
217
+ settings[:analysis][:analyzer].each do |_, analyzer_settings|
218
+ analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
219
+ end
220
+ end
221
+
222
+ mapping = {}
223
+
224
+ # conversions
225
+ Array(options[:conversions]).each do |conversions_field|
226
+ mapping[conversions_field] = {
227
+ type: "nested",
228
+ properties: {
229
+ query: {type: default_type, analyzer: "searchkick_keyword"},
230
+ count: {type: "integer"}
231
+ }
232
+ }
233
+ end
234
+
235
+ mapping_options = Hash[
236
+ [:autocomplete, :suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable, :only_analyzed]
237
+ .map { |type| [type, (options[type] || []).map(&:to_s)] }
238
+ ]
239
+
240
+ word = options[:word] != false && (!options[:match] || options[:match] == :word)
241
+
242
+ mapping_options.values.flatten.uniq.each do |field|
243
+ fields = {}
244
+
245
+ if mapping_options[:only_analyzed].include?(field) || (options.key?(:filterable) && !mapping_options[:filterable].include?(field))
246
+ fields[field] = {type: default_type, index: "no"}
247
+ else
248
+ fields[field] = keyword_mapping
249
+ end
250
+
251
+ if !options[:searchable] || mapping_options[:searchable].include?(field)
252
+ if word
253
+ fields["analyzed"] = {type: default_type, index: "analyzed", analyzer: default_analyzer}
254
+
255
+ if mapping_options[:highlight].include?(field)
256
+ fields["analyzed"][:term_vector] = "with_positions_offsets"
257
+ end
258
+ end
259
+
260
+ mapping_options.except(:highlight, :searchable, :filterable, :only_analyzed, :word).each do |type, f|
261
+ if options[:match] == type || f.include?(field)
262
+ fields[type] = {type: default_type, index: "analyzed", analyzer: "searchkick_#{type}_index"}
263
+ end
264
+ end
265
+ end
266
+
267
+ mapping[field] =
268
+ if below50
269
+ {
270
+ type: "multi_field",
271
+ fields: fields
272
+ }
273
+ elsif fields[field]
274
+ fields[field].merge(fields: fields.except(field))
275
+ end
276
+ end
277
+
278
+ (options[:locations] || []).map(&:to_s).each do |field|
279
+ mapping[field] = {
280
+ type: "geo_point"
281
+ }
282
+ end
283
+
284
+ (options[:unsearchable] || []).map(&:to_s).each do |field|
285
+ mapping[field] = {
286
+ type: default_type,
287
+ index: "no"
288
+ }
289
+ end
290
+
291
+ routing = {}
292
+ if options[:routing]
293
+ routing = {required: true}
294
+ unless options[:routing] == true
295
+ routing[:path] = options[:routing].to_s
296
+ end
297
+ end
298
+
299
+ dynamic_fields = {
300
+ # analyzed field must be the default field for include_in_all
301
+ # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
302
+ # however, we can include the not_analyzed field in _all
303
+ # and the _all index analyzer will take care of it
304
+ "{name}" => keyword_mapping.merge(include_in_all: !options[:searchable])
305
+ }
306
+
307
+ if options.key?(:filterable)
308
+ dynamic_fields["{name}"] = {type: default_type, index: "no"}
309
+ end
310
+
311
+ dynamic_fields["{name}"][:ignore_above] = 256 unless below22
312
+
313
+ unless options[:searchable]
314
+ if options[:match] && options[:match] != :word
315
+ dynamic_fields[options[:match]] = {type: default_type, index: "analyzed", analyzer: "searchkick_#{options[:match]}_index"}
316
+ end
317
+
318
+ if word
319
+ dynamic_fields["analyzed"] = {type: default_type, index: "analyzed"}
320
+ end
321
+ end
322
+
323
+ # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
324
+ multi_field =
325
+ if below50
326
+ {
327
+ type: "multi_field",
328
+ fields: dynamic_fields
329
+ }
330
+ else
331
+ dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
332
+ end
333
+
334
+ mappings = {
335
+ _default_: {
336
+ _all: {type: default_type, index: "analyzed", analyzer: default_analyzer},
337
+ properties: mapping,
338
+ _routing: routing,
339
+ # https://gist.github.com/kimchy/2898285
340
+ dynamic_templates: [
341
+ {
342
+ string_template: {
343
+ match: "*",
344
+ match_mapping_type: "string",
345
+ mapping: multi_field
346
+ }
347
+ }
348
+ ]
349
+ }
350
+ }.deep_merge(options[:mappings] || {})
351
+ end
352
+
353
+ {
354
+ settings: settings,
355
+ mappings: mappings
356
+ }
357
+ end
358
+ end
359
+ end
@@ -10,8 +10,7 @@ module Searchkick
10
10
  :took, :error, :model_name, :entry_name, :total_count, :total_entries,
11
11
  :current_page, :per_page, :limit_value, :padding, :total_pages, :num_pages,
12
12
  :offset_value, :offset, :previous_page, :prev_page, :next_page, :first_page?, :last_page?,
13
- :out_of_range?, :hits
14
-
13
+ :out_of_range?, :hits, :response, :to_a, :first
15
14
 
16
15
  def initialize(klass, term, options = {})
17
16
  if term.is_a?(Hash)
@@ -30,6 +29,12 @@ module Searchkick
30
29
  @options = options
31
30
  @match_suffix = options[:match] || searchkick_options[:match] || "analyzed"
32
31
 
32
+ # prevent Ruby warnings
33
+ @type = nil
34
+ @routing = nil
35
+ @misspellings_below = nil
36
+ @highlighted_fields = nil
37
+
33
38
  prepare
34
39
  end
35
40
 
@@ -59,8 +64,9 @@ module Searchkick
59
64
  index: index,
60
65
  body: body
61
66
  }
62
- params.merge!(type: @type) if @type
63
- params.merge!(routing: @routing) if @routing
67
+ params[:type] = @type if @type
68
+ params[:routing] = @routing if @routing
69
+ params.merge!(options[:request_params]) if options[:request_params]
64
70
  params
65
71
  end
66
72
 
@@ -86,7 +92,7 @@ module Searchkick
86
92
 
87
93
  # no easy way to tell which host the client will use
88
94
  host = Searchkick.client.transport.hosts.first
89
- credentials = (host[:user] || host[:password]) ? "#{host[:user]}:#{host[:password]}@" : nil
95
+ credentials = host[:user] || host[:password] ? "#{host[:user]}:#{host[:password]}@" : nil
90
96
  "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?pretty -d '#{query[:body].to_json}'"
91
97
  end
92
98
 
@@ -159,10 +165,10 @@ module Searchkick
159
165
  padding = [options[:padding].to_i, 0].max
160
166
  offset = options[:offset] || (page - 1) * per_page + padding
161
167
 
162
- # model and eagar loading
168
+ # model and eager loading
163
169
  load = options[:load].nil? ? true : options[:load]
164
170
 
165
- conversions_fields = Array(options[:conversions] || searchkick_options[:conversions])
171
+ conversions_fields = Array(options[:conversions] || searchkick_options[:conversions]).map(&:to_s)
166
172
  personalize_field = searchkick_options[:personalize]
167
173
 
168
174
  all = term == "*"
@@ -257,7 +263,7 @@ module Searchkick
257
263
  f = field.split(".")[0..-2].join(".")
258
264
  queries << {match: {f => shared_options.merge(analyzer: "keyword")}}
259
265
  else
260
- analyzer = field.match(/\.word_(start|middle|end)\z/) ? "searchkick_word_search" : "searchkick_autocomplete_search"
266
+ analyzer = field =~ /\.word_(start|middle|end)\z/ ? "searchkick_word_search" : "searchkick_autocomplete_search"
261
267
  qs << shared_options.merge(analyzer: analyzer)
262
268
  end
263
269
 
@@ -286,22 +292,22 @@ module Searchkick
286
292
  {field_value_factor: {field: "#{conversions_field}.count"}}
287
293
  end
288
294
 
289
- shoulds << {
290
- nested: {
291
- path: conversions_field,
292
- score_mode: "sum",
293
- query: {
294
- function_score: {
295
- boost_mode: "replace",
296
- query: {
297
- match: {
298
- "#{conversions_field}.query" => term
299
- }
300
- }
301
- }.merge(script_score)
302
- }
303
- }
304
- }
295
+ shoulds << {
296
+ nested: {
297
+ path: conversions_field,
298
+ score_mode: "sum",
299
+ query: {
300
+ function_score: {
301
+ boost_mode: "replace",
302
+ query: {
303
+ match: {
304
+ "#{conversions_field}.query" => term
305
+ }
306
+ }
307
+ }.merge(script_score)
308
+ }
309
+ }
310
+ }
305
311
  end
306
312
  payload = {
307
313
  bool: {
@@ -378,13 +384,22 @@ module Searchkick
378
384
  payload[:fields] = options[:select] if options[:select] != true
379
385
  elsif options[:select_v2]
380
386
  if options[:select_v2] == []
381
- payload[:fields] = [] # intuitively [] makes sense to return no fields, but ES by default returns all fields
387
+ # intuitively [] makes sense to return no fields, but ES by default returns all fields
388
+ if below50?
389
+ payload[:fields] = []
390
+ else
391
+ payload[:_source] = false
392
+ end
382
393
  else
383
394
  payload[:_source] = options[:select_v2]
384
395
  end
385
396
  elsif load
386
397
  # don't need any fields since we're going to load them from the DB anyways
387
- payload[:fields] = []
398
+ if below50?
399
+ payload[:fields] = []
400
+ else
401
+ payload[:_source] = false
402
+ end
388
403
  end
389
404
 
390
405
  if options[:type] || (klass != searchkick_klass && searchkick_index)
@@ -399,7 +414,7 @@ module Searchkick
399
414
  payload = payload.deep_merge(options[:body_options]) if options[:body_options]
400
415
 
401
416
  @body = payload
402
- @facet_limits = @facet_limits || {}
417
+ @facet_limits ||= {}
403
418
  @page = page
404
419
  @per_page = per_page
405
420
  @padding = padding
@@ -751,8 +766,8 @@ module Searchkick
751
766
  filters << {bool: {must_not: term_filters(field, op_value)}}
752
767
  end
753
768
  when :all
754
- op_value.each do |value|
755
- filters << term_filters(field, value)
769
+ op_value.each do |val|
770
+ filters << term_filters(field, val)
756
771
  end
757
772
  when :in
758
773
  filters << term_filters(field, op_value)