searchkick 1.3.4 → 1.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,359 @@
1
+ module Searchkick
2
+ module IndexOptions
3
+ def index_options
4
+ options = @options
5
+ language = options[:language]
6
+ language = language.call if language.respond_to?(:call)
7
+
8
+ if options[:mappings] && !options[:merge_mappings]
9
+ settings = options[:settings] || {}
10
+ mappings = options[:mappings]
11
+ else
12
+ below22 = Searchkick.server_below?("2.2.0")
13
+ below50 = Searchkick.server_below?("5.0.0-alpha1")
14
+ default_type = below50 ? "string" : "text"
15
+ default_analyzer = below50 ? :default_index : :default
16
+ keyword_mapping =
17
+ if below50
18
+ {
19
+ type: default_type,
20
+ index: "not_analyzed"
21
+ }
22
+ else
23
+ {
24
+ type: "keyword"
25
+ }
26
+ end
27
+
28
+ keyword_mapping[:ignore_above] = 256 unless below22
29
+
30
+ settings = {
31
+ analysis: {
32
+ analyzer: {
33
+ searchkick_keyword: {
34
+ type: "custom",
35
+ tokenizer: "keyword",
36
+ filter: ["lowercase"] + (options[:stem_conversions] == false ? [] : ["searchkick_stemmer"])
37
+ },
38
+ default_analyzer => {
39
+ type: "custom",
40
+ # character filters -> tokenizer -> token filters
41
+ # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
42
+ char_filter: ["ampersand"],
43
+ tokenizer: "standard",
44
+ # synonym should come last, after stemming and shingle
45
+ # shingle must come before searchkick_stemmer
46
+ filter: ["standard", "lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
47
+ },
48
+ searchkick_search: {
49
+ type: "custom",
50
+ char_filter: ["ampersand"],
51
+ tokenizer: "standard",
52
+ filter: ["standard", "lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
53
+ },
54
+ searchkick_search2: {
55
+ type: "custom",
56
+ char_filter: ["ampersand"],
57
+ tokenizer: "standard",
58
+ filter: ["standard", "lowercase", "asciifolding", "searchkick_stemmer"]
59
+ },
60
+ # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
61
+ searchkick_autocomplete_index: {
62
+ type: "custom",
63
+ tokenizer: "searchkick_autocomplete_ngram",
64
+ filter: ["lowercase", "asciifolding"]
65
+ },
66
+ searchkick_autocomplete_search: {
67
+ type: "custom",
68
+ tokenizer: "keyword",
69
+ filter: ["lowercase", "asciifolding"]
70
+ },
71
+ searchkick_word_search: {
72
+ type: "custom",
73
+ tokenizer: "standard",
74
+ filter: ["lowercase", "asciifolding"]
75
+ },
76
+ searchkick_suggest_index: {
77
+ type: "custom",
78
+ tokenizer: "standard",
79
+ filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
80
+ },
81
+ searchkick_text_start_index: {
82
+ type: "custom",
83
+ tokenizer: "keyword",
84
+ filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
85
+ },
86
+ searchkick_text_middle_index: {
87
+ type: "custom",
88
+ tokenizer: "keyword",
89
+ filter: ["lowercase", "asciifolding", "searchkick_ngram"]
90
+ },
91
+ searchkick_text_end_index: {
92
+ type: "custom",
93
+ tokenizer: "keyword",
94
+ filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
95
+ },
96
+ searchkick_word_start_index: {
97
+ type: "custom",
98
+ tokenizer: "standard",
99
+ filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
100
+ },
101
+ searchkick_word_middle_index: {
102
+ type: "custom",
103
+ tokenizer: "standard",
104
+ filter: ["lowercase", "asciifolding", "searchkick_ngram"]
105
+ },
106
+ searchkick_word_end_index: {
107
+ type: "custom",
108
+ tokenizer: "standard",
109
+ filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
110
+ }
111
+ },
112
+ filter: {
113
+ searchkick_index_shingle: {
114
+ type: "shingle",
115
+ token_separator: ""
116
+ },
117
+ # lucky find http://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
118
+ searchkick_search_shingle: {
119
+ type: "shingle",
120
+ token_separator: "",
121
+ output_unigrams: false,
122
+ output_unigrams_if_no_shingles: true
123
+ },
124
+ searchkick_suggest_shingle: {
125
+ type: "shingle",
126
+ max_shingle_size: 5
127
+ },
128
+ searchkick_edge_ngram: {
129
+ type: "edgeNGram",
130
+ min_gram: 1,
131
+ max_gram: 50
132
+ },
133
+ searchkick_ngram: {
134
+ type: "nGram",
135
+ min_gram: 1,
136
+ max_gram: 50
137
+ },
138
+ searchkick_stemmer: {
139
+ # use stemmer if language is lowercase, snowball otherwise
140
+ # TODO deprecate language option in favor of stemmer
141
+ type: language == language.to_s.downcase ? "stemmer" : "snowball",
142
+ language: language || "English"
143
+ }
144
+ },
145
+ char_filter: {
146
+ # https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
147
+ # &_to_and
148
+ ampersand: {
149
+ type: "mapping",
150
+ mappings: ["&=> and "]
151
+ }
152
+ },
153
+ tokenizer: {
154
+ searchkick_autocomplete_ngram: {
155
+ type: "edgeNGram",
156
+ min_gram: 1,
157
+ max_gram: 50
158
+ }
159
+ }
160
+ }
161
+ }
162
+
163
+ if Searchkick.env == "test"
164
+ settings[:number_of_shards] = 1
165
+ settings[:number_of_replicas] = 0
166
+ end
167
+
168
+ if options[:similarity]
169
+ settings[:similarity] = {default: {type: options[:similarity]}}
170
+ end
171
+
172
+ settings.deep_merge!(options[:settings] || {})
173
+
174
+ # synonyms
175
+ synonyms = options[:synonyms] || []
176
+
177
+ synonyms = synonyms.call if synonyms.respond_to?(:call)
178
+
179
+ if synonyms.any?
180
+ settings[:analysis][:filter][:searchkick_synonym] = {
181
+ type: "synonym",
182
+ synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.join(",") }
183
+ }
184
+ # choosing a place for the synonym filter when stemming is not easy
185
+ # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
186
+ # TODO use a snowball stemmer on synonyms when creating the token filter
187
+
188
+ # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
189
+ # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
190
+ # - Only apply the synonym expansion at index time
191
+ # - Don't have the synonym filter applied search
192
+ # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
193
+ settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_synonym")
194
+ settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_synonym"
195
+
196
+ %w(word_start word_middle word_end).each do |type|
197
+ settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
198
+ end
199
+ end
200
+
201
+ if options[:wordnet]
202
+ settings[:analysis][:filter][:searchkick_wordnet] = {
203
+ type: "synonym",
204
+ format: "wordnet",
205
+ synonyms_path: Searchkick.wordnet_path
206
+ }
207
+
208
+ settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
209
+ settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
210
+
211
+ %w(word_start word_middle word_end).each do |type|
212
+ settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
213
+ end
214
+ end
215
+
216
+ if options[:special_characters] == false
217
+ settings[:analysis][:analyzer].each do |_, analyzer_settings|
218
+ analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
219
+ end
220
+ end
221
+
222
+ mapping = {}
223
+
224
+ # conversions
225
+ Array(options[:conversions]).each do |conversions_field|
226
+ mapping[conversions_field] = {
227
+ type: "nested",
228
+ properties: {
229
+ query: {type: default_type, analyzer: "searchkick_keyword"},
230
+ count: {type: "integer"}
231
+ }
232
+ }
233
+ end
234
+
235
+ mapping_options = Hash[
236
+ [:autocomplete, :suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable, :only_analyzed]
237
+ .map { |type| [type, (options[type] || []).map(&:to_s)] }
238
+ ]
239
+
240
+ word = options[:word] != false && (!options[:match] || options[:match] == :word)
241
+
242
+ mapping_options.values.flatten.uniq.each do |field|
243
+ fields = {}
244
+
245
+ if mapping_options[:only_analyzed].include?(field) || (options.key?(:filterable) && !mapping_options[:filterable].include?(field))
246
+ fields[field] = {type: default_type, index: "no"}
247
+ else
248
+ fields[field] = keyword_mapping
249
+ end
250
+
251
+ if !options[:searchable] || mapping_options[:searchable].include?(field)
252
+ if word
253
+ fields["analyzed"] = {type: default_type, index: "analyzed", analyzer: default_analyzer}
254
+
255
+ if mapping_options[:highlight].include?(field)
256
+ fields["analyzed"][:term_vector] = "with_positions_offsets"
257
+ end
258
+ end
259
+
260
+ mapping_options.except(:highlight, :searchable, :filterable, :only_analyzed, :word).each do |type, f|
261
+ if options[:match] == type || f.include?(field)
262
+ fields[type] = {type: default_type, index: "analyzed", analyzer: "searchkick_#{type}_index"}
263
+ end
264
+ end
265
+ end
266
+
267
+ mapping[field] =
268
+ if below50
269
+ {
270
+ type: "multi_field",
271
+ fields: fields
272
+ }
273
+ elsif fields[field]
274
+ fields[field].merge(fields: fields.except(field))
275
+ end
276
+ end
277
+
278
+ (options[:locations] || []).map(&:to_s).each do |field|
279
+ mapping[field] = {
280
+ type: "geo_point"
281
+ }
282
+ end
283
+
284
+ (options[:unsearchable] || []).map(&:to_s).each do |field|
285
+ mapping[field] = {
286
+ type: default_type,
287
+ index: "no"
288
+ }
289
+ end
290
+
291
+ routing = {}
292
+ if options[:routing]
293
+ routing = {required: true}
294
+ unless options[:routing] == true
295
+ routing[:path] = options[:routing].to_s
296
+ end
297
+ end
298
+
299
+ dynamic_fields = {
300
+ # analyzed field must be the default field for include_in_all
301
+ # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
302
+ # however, we can include the not_analyzed field in _all
303
+ # and the _all index analyzer will take care of it
304
+ "{name}" => keyword_mapping.merge(include_in_all: !options[:searchable])
305
+ }
306
+
307
+ if options.key?(:filterable)
308
+ dynamic_fields["{name}"] = {type: default_type, index: "no"}
309
+ end
310
+
311
+ dynamic_fields["{name}"][:ignore_above] = 256 unless below22
312
+
313
+ unless options[:searchable]
314
+ if options[:match] && options[:match] != :word
315
+ dynamic_fields[options[:match]] = {type: default_type, index: "analyzed", analyzer: "searchkick_#{options[:match]}_index"}
316
+ end
317
+
318
+ if word
319
+ dynamic_fields["analyzed"] = {type: default_type, index: "analyzed"}
320
+ end
321
+ end
322
+
323
+ # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
324
+ multi_field =
325
+ if below50
326
+ {
327
+ type: "multi_field",
328
+ fields: dynamic_fields
329
+ }
330
+ else
331
+ dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
332
+ end
333
+
334
+ mappings = {
335
+ _default_: {
336
+ _all: {type: default_type, index: "analyzed", analyzer: default_analyzer},
337
+ properties: mapping,
338
+ _routing: routing,
339
+ # https://gist.github.com/kimchy/2898285
340
+ dynamic_templates: [
341
+ {
342
+ string_template: {
343
+ match: "*",
344
+ match_mapping_type: "string",
345
+ mapping: multi_field
346
+ }
347
+ }
348
+ ]
349
+ }
350
+ }.deep_merge(options[:mappings] || {})
351
+ end
352
+
353
+ {
354
+ settings: settings,
355
+ mappings: mappings
356
+ }
357
+ end
358
+ end
359
+ end
@@ -10,8 +10,7 @@ module Searchkick
10
10
  :took, :error, :model_name, :entry_name, :total_count, :total_entries,
11
11
  :current_page, :per_page, :limit_value, :padding, :total_pages, :num_pages,
12
12
  :offset_value, :offset, :previous_page, :prev_page, :next_page, :first_page?, :last_page?,
13
- :out_of_range?, :hits
14
-
13
+ :out_of_range?, :hits, :response, :to_a, :first
15
14
 
16
15
  def initialize(klass, term, options = {})
17
16
  if term.is_a?(Hash)
@@ -30,6 +29,12 @@ module Searchkick
30
29
  @options = options
31
30
  @match_suffix = options[:match] || searchkick_options[:match] || "analyzed"
32
31
 
32
+ # prevent Ruby warnings
33
+ @type = nil
34
+ @routing = nil
35
+ @misspellings_below = nil
36
+ @highlighted_fields = nil
37
+
33
38
  prepare
34
39
  end
35
40
 
@@ -59,8 +64,9 @@ module Searchkick
59
64
  index: index,
60
65
  body: body
61
66
  }
62
- params.merge!(type: @type) if @type
63
- params.merge!(routing: @routing) if @routing
67
+ params[:type] = @type if @type
68
+ params[:routing] = @routing if @routing
69
+ params.merge!(options[:request_params]) if options[:request_params]
64
70
  params
65
71
  end
66
72
 
@@ -86,7 +92,7 @@ module Searchkick
86
92
 
87
93
  # no easy way to tell which host the client will use
88
94
  host = Searchkick.client.transport.hosts.first
89
- credentials = (host[:user] || host[:password]) ? "#{host[:user]}:#{host[:password]}@" : nil
95
+ credentials = host[:user] || host[:password] ? "#{host[:user]}:#{host[:password]}@" : nil
90
96
  "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?pretty -d '#{query[:body].to_json}'"
91
97
  end
92
98
 
@@ -159,10 +165,10 @@ module Searchkick
159
165
  padding = [options[:padding].to_i, 0].max
160
166
  offset = options[:offset] || (page - 1) * per_page + padding
161
167
 
162
- # model and eagar loading
168
+ # model and eager loading
163
169
  load = options[:load].nil? ? true : options[:load]
164
170
 
165
- conversions_fields = Array(options[:conversions] || searchkick_options[:conversions])
171
+ conversions_fields = Array(options[:conversions] || searchkick_options[:conversions]).map(&:to_s)
166
172
  personalize_field = searchkick_options[:personalize]
167
173
 
168
174
  all = term == "*"
@@ -257,7 +263,7 @@ module Searchkick
257
263
  f = field.split(".")[0..-2].join(".")
258
264
  queries << {match: {f => shared_options.merge(analyzer: "keyword")}}
259
265
  else
260
- analyzer = field.match(/\.word_(start|middle|end)\z/) ? "searchkick_word_search" : "searchkick_autocomplete_search"
266
+ analyzer = field =~ /\.word_(start|middle|end)\z/ ? "searchkick_word_search" : "searchkick_autocomplete_search"
261
267
  qs << shared_options.merge(analyzer: analyzer)
262
268
  end
263
269
 
@@ -286,22 +292,22 @@ module Searchkick
286
292
  {field_value_factor: {field: "#{conversions_field}.count"}}
287
293
  end
288
294
 
289
- shoulds << {
290
- nested: {
291
- path: conversions_field,
292
- score_mode: "sum",
293
- query: {
294
- function_score: {
295
- boost_mode: "replace",
296
- query: {
297
- match: {
298
- "#{conversions_field}.query" => term
299
- }
300
- }
301
- }.merge(script_score)
302
- }
303
- }
304
- }
295
+ shoulds << {
296
+ nested: {
297
+ path: conversions_field,
298
+ score_mode: "sum",
299
+ query: {
300
+ function_score: {
301
+ boost_mode: "replace",
302
+ query: {
303
+ match: {
304
+ "#{conversions_field}.query" => term
305
+ }
306
+ }
307
+ }.merge(script_score)
308
+ }
309
+ }
310
+ }
305
311
  end
306
312
  payload = {
307
313
  bool: {
@@ -378,13 +384,22 @@ module Searchkick
378
384
  payload[:fields] = options[:select] if options[:select] != true
379
385
  elsif options[:select_v2]
380
386
  if options[:select_v2] == []
381
- payload[:fields] = [] # intuitively [] makes sense to return no fields, but ES by default returns all fields
387
+ # intuitively [] makes sense to return no fields, but ES by default returns all fields
388
+ if below50?
389
+ payload[:fields] = []
390
+ else
391
+ payload[:_source] = false
392
+ end
382
393
  else
383
394
  payload[:_source] = options[:select_v2]
384
395
  end
385
396
  elsif load
386
397
  # don't need any fields since we're going to load them from the DB anyways
387
- payload[:fields] = []
398
+ if below50?
399
+ payload[:fields] = []
400
+ else
401
+ payload[:_source] = false
402
+ end
388
403
  end
389
404
 
390
405
  if options[:type] || (klass != searchkick_klass && searchkick_index)
@@ -399,7 +414,7 @@ module Searchkick
399
414
  payload = payload.deep_merge(options[:body_options]) if options[:body_options]
400
415
 
401
416
  @body = payload
402
- @facet_limits = @facet_limits || {}
417
+ @facet_limits ||= {}
403
418
  @page = page
404
419
  @per_page = per_page
405
420
  @padding = padding
@@ -751,8 +766,8 @@ module Searchkick
751
766
  filters << {bool: {must_not: term_filters(field, op_value)}}
752
767
  end
753
768
  when :all
754
- op_value.each do |value|
755
- filters << term_filters(field, value)
769
+ op_value.each do |val|
770
+ filters << term_filters(field, val)
756
771
  end
757
772
  when :in
758
773
  filters << term_filters(field, op_value)