searchkick-hooopo 2.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +22 -0
- data/.travis.yml +35 -0
- data/CHANGELOG.md +491 -0
- data/Gemfile +12 -0
- data/LICENSE.txt +22 -0
- data/README.md +1908 -0
- data/Rakefile +20 -0
- data/benchmark/Gemfile +23 -0
- data/benchmark/benchmark.rb +97 -0
- data/lib/searchkick/bulk_reindex_job.rb +17 -0
- data/lib/searchkick/index.rb +500 -0
- data/lib/searchkick/index_options.rb +333 -0
- data/lib/searchkick/indexer.rb +28 -0
- data/lib/searchkick/logging.rb +242 -0
- data/lib/searchkick/middleware.rb +12 -0
- data/lib/searchkick/model.rb +156 -0
- data/lib/searchkick/process_batch_job.rb +23 -0
- data/lib/searchkick/process_queue_job.rb +23 -0
- data/lib/searchkick/query.rb +901 -0
- data/lib/searchkick/reindex_queue.rb +38 -0
- data/lib/searchkick/reindex_v2_job.rb +39 -0
- data/lib/searchkick/results.rb +216 -0
- data/lib/searchkick/tasks.rb +33 -0
- data/lib/searchkick/version.rb +3 -0
- data/lib/searchkick.rb +215 -0
- data/searchkick.gemspec +28 -0
- data/test/aggs_test.rb +197 -0
- data/test/autocomplete_test.rb +75 -0
- data/test/boost_test.rb +175 -0
- data/test/callbacks_test.rb +59 -0
- data/test/ci/before_install.sh +17 -0
- data/test/errors_test.rb +19 -0
- data/test/gemfiles/activerecord31.gemfile +7 -0
- data/test/gemfiles/activerecord32.gemfile +7 -0
- data/test/gemfiles/activerecord40.gemfile +8 -0
- data/test/gemfiles/activerecord41.gemfile +8 -0
- data/test/gemfiles/activerecord42.gemfile +7 -0
- data/test/gemfiles/activerecord50.gemfile +7 -0
- data/test/gemfiles/apartment.gemfile +8 -0
- data/test/gemfiles/cequel.gemfile +8 -0
- data/test/gemfiles/mongoid2.gemfile +7 -0
- data/test/gemfiles/mongoid3.gemfile +6 -0
- data/test/gemfiles/mongoid4.gemfile +7 -0
- data/test/gemfiles/mongoid5.gemfile +7 -0
- data/test/gemfiles/mongoid6.gemfile +8 -0
- data/test/gemfiles/nobrainer.gemfile +8 -0
- data/test/gemfiles/parallel_tests.gemfile +8 -0
- data/test/geo_shape_test.rb +172 -0
- data/test/highlight_test.rb +78 -0
- data/test/index_test.rb +153 -0
- data/test/inheritance_test.rb +83 -0
- data/test/marshal_test.rb +8 -0
- data/test/match_test.rb +276 -0
- data/test/misspellings_test.rb +56 -0
- data/test/model_test.rb +42 -0
- data/test/multi_search_test.rb +22 -0
- data/test/multi_tenancy_test.rb +22 -0
- data/test/order_test.rb +46 -0
- data/test/pagination_test.rb +53 -0
- data/test/partial_reindex_test.rb +58 -0
- data/test/query_test.rb +35 -0
- data/test/records_test.rb +10 -0
- data/test/reindex_test.rb +52 -0
- data/test/reindex_v2_job_test.rb +32 -0
- data/test/routing_test.rb +23 -0
- data/test/should_index_test.rb +32 -0
- data/test/similar_test.rb +28 -0
- data/test/sql_test.rb +198 -0
- data/test/suggest_test.rb +85 -0
- data/test/synonyms_test.rb +67 -0
- data/test/test_helper.rb +527 -0
- data/test/where_test.rb +223 -0
- metadata +250 -0
@@ -0,0 +1,901 @@
|
|
1
|
+
module Searchkick
|
2
|
+
class Query
|
3
|
+
extend Forwardable
|
4
|
+
|
5
|
+
@@metric_aggs = [:avg, :cardinality, :max, :min, :sum]
|
6
|
+
|
7
|
+
attr_reader :klass, :term, :options
|
8
|
+
attr_accessor :body
|
9
|
+
|
10
|
+
def_delegators :execute, :map, :each, :any?, :empty?, :size, :length, :slice, :[], :to_ary,
|
11
|
+
:records, :results, :suggestions, :each_with_hit, :with_details, :aggregations, :aggs,
|
12
|
+
:took, :error, :model_name, :entry_name, :total_count, :total_entries,
|
13
|
+
:current_page, :per_page, :limit_value, :padding, :total_pages, :num_pages,
|
14
|
+
:offset_value, :offset, :previous_page, :prev_page, :next_page, :first_page?, :last_page?,
|
15
|
+
:out_of_range?, :hits, :response, :to_a, :first
|
16
|
+
|
17
|
+
def initialize(klass, term = "*", **options)
|
18
|
+
unknown_keywords = options.keys - [:aggs, :body, :body_options, :boost,
|
19
|
+
:boost_by, :boost_by_distance, :boost_where, :conversions, :debug, :emoji, :exclude, :execute, :explain,
|
20
|
+
:fields, :highlight, :includes, :index_name, :indices_boost, :limit, :load,
|
21
|
+
:match, :misspellings, :offset, :operator, :order, :padding, :page, :per_page, :profile,
|
22
|
+
:request_params, :routing, :select, :similar, :smart_aggs, :suggest, :track, :type, :where]
|
23
|
+
raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any?
|
24
|
+
|
25
|
+
term = term.to_s
|
26
|
+
|
27
|
+
if options[:emoji]
|
28
|
+
term = EmojiParser.parse_unicode(term) { |e| " #{e.name} " }.strip
|
29
|
+
end
|
30
|
+
|
31
|
+
@klass = klass
|
32
|
+
@term = term
|
33
|
+
@options = options
|
34
|
+
@match_suffix = options[:match] || searchkick_options[:match] || "analyzed"
|
35
|
+
|
36
|
+
# prevent Ruby warnings
|
37
|
+
@type = nil
|
38
|
+
@routing = nil
|
39
|
+
@misspellings = false
|
40
|
+
@misspellings_below = nil
|
41
|
+
@highlighted_fields = nil
|
42
|
+
|
43
|
+
prepare
|
44
|
+
end
|
45
|
+
|
46
|
+
def searchkick_index
|
47
|
+
klass ? klass.searchkick_index : nil
|
48
|
+
end
|
49
|
+
|
50
|
+
def searchkick_options
|
51
|
+
klass ? klass.searchkick_options : {}
|
52
|
+
end
|
53
|
+
|
54
|
+
def searchkick_klass
|
55
|
+
klass ? klass.searchkick_klass : nil
|
56
|
+
end
|
57
|
+
|
58
|
+
def params
|
59
|
+
index =
|
60
|
+
if options[:index_name]
|
61
|
+
Array(options[:index_name]).map { |v| v.respond_to?(:searchkick_index) ? v.searchkick_index.name : v }.join(",")
|
62
|
+
elsif searchkick_index
|
63
|
+
searchkick_index.name
|
64
|
+
else
|
65
|
+
"_all"
|
66
|
+
end
|
67
|
+
|
68
|
+
params = {
|
69
|
+
index: index,
|
70
|
+
body: body
|
71
|
+
}
|
72
|
+
params[:type] = @type if @type
|
73
|
+
params[:routing] = @routing if @routing
|
74
|
+
params.merge!(options[:request_params]) if options[:request_params]
|
75
|
+
params
|
76
|
+
end
|
77
|
+
|
78
|
+
def execute
|
79
|
+
@execute ||= begin
|
80
|
+
begin
|
81
|
+
response = execute_search
|
82
|
+
if @misspellings_below && response["hits"]["total"] < @misspellings_below
|
83
|
+
prepare
|
84
|
+
response = execute_search
|
85
|
+
end
|
86
|
+
rescue => e # TODO rescue type
|
87
|
+
handle_error(e)
|
88
|
+
end
|
89
|
+
handle_response(response)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def to_curl
|
94
|
+
query = params
|
95
|
+
type = query[:type]
|
96
|
+
index = query[:index].is_a?(Array) ? query[:index].join(",") : query[:index]
|
97
|
+
|
98
|
+
# no easy way to tell which host the client will use
|
99
|
+
host = Searchkick.client.transport.hosts.first
|
100
|
+
credentials = host[:user] || host[:password] ? "#{host[:user]}:#{host[:password]}@" : nil
|
101
|
+
"curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?pretty -d '#{query[:body].to_json}'"
|
102
|
+
end
|
103
|
+
|
104
|
+
def handle_response(response)
|
105
|
+
opts = {
|
106
|
+
page: @page,
|
107
|
+
per_page: @per_page,
|
108
|
+
padding: @padding,
|
109
|
+
load: @load,
|
110
|
+
includes: options[:includes],
|
111
|
+
json: !@json.nil?,
|
112
|
+
match_suffix: @match_suffix,
|
113
|
+
highlighted_fields: @highlighted_fields || [],
|
114
|
+
misspellings: @misspellings
|
115
|
+
}
|
116
|
+
|
117
|
+
if options[:debug]
|
118
|
+
require "pp"
|
119
|
+
|
120
|
+
puts "Searchkick Version: #{Searchkick::VERSION}"
|
121
|
+
puts "Elasticsearch Version: #{Searchkick.server_version}"
|
122
|
+
puts
|
123
|
+
|
124
|
+
puts "Model Searchkick Options"
|
125
|
+
pp searchkick_options
|
126
|
+
puts
|
127
|
+
|
128
|
+
puts "Search Options"
|
129
|
+
pp options
|
130
|
+
puts
|
131
|
+
|
132
|
+
if searchkick_index
|
133
|
+
puts "Model Search Data"
|
134
|
+
begin
|
135
|
+
pp klass.first(3).map { |r| {index: searchkick_index.record_data(r).merge(data: searchkick_index.send(:search_data, r))}}
|
136
|
+
rescue => e
|
137
|
+
puts "#{e.class.name}: #{e.message}"
|
138
|
+
end
|
139
|
+
puts
|
140
|
+
|
141
|
+
puts "Elasticsearch Mapping"
|
142
|
+
puts JSON.pretty_generate(searchkick_index.mapping)
|
143
|
+
puts
|
144
|
+
|
145
|
+
puts "Elasticsearch Settings"
|
146
|
+
puts JSON.pretty_generate(searchkick_index.settings)
|
147
|
+
puts
|
148
|
+
end
|
149
|
+
|
150
|
+
puts "Elasticsearch Query"
|
151
|
+
puts to_curl
|
152
|
+
puts
|
153
|
+
|
154
|
+
puts "Elasticsearch Results"
|
155
|
+
puts JSON.pretty_generate(response)
|
156
|
+
end
|
157
|
+
|
158
|
+
# set execute for multi search
|
159
|
+
@execute = Searchkick::Results.new(searchkick_klass, response, opts)
|
160
|
+
end
|
161
|
+
|
162
|
+
private
|
163
|
+
|
164
|
+
def handle_error(e)
|
165
|
+
status_code = e.message[1..3].to_i
|
166
|
+
if status_code == 404
|
167
|
+
raise MissingIndexError, "Index missing - run #{reindex_command}"
|
168
|
+
elsif status_code == 500 && (
|
169
|
+
e.message.include?("IllegalArgumentException[minimumSimilarity >= 1]") ||
|
170
|
+
e.message.include?("No query registered for [multi_match]") ||
|
171
|
+
e.message.include?("[match] query does not support [cutoff_frequency]") ||
|
172
|
+
e.message.include?("No query registered for [function_score]")
|
173
|
+
)
|
174
|
+
|
175
|
+
raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 2 or greater"
|
176
|
+
elsif status_code == 400
|
177
|
+
if (
|
178
|
+
e.message.include?("bool query does not support [filter]") ||
|
179
|
+
e.message.include?("[bool] filter does not support [filter]")
|
180
|
+
)
|
181
|
+
|
182
|
+
raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 2 or greater"
|
183
|
+
elsif e.message.include?("[multi_match] analyzer [searchkick_search] not found")
|
184
|
+
raise InvalidQueryError, "Bad mapping - run #{reindex_command}"
|
185
|
+
else
|
186
|
+
raise InvalidQueryError, e.message
|
187
|
+
end
|
188
|
+
else
|
189
|
+
raise e
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
def reindex_command
|
194
|
+
searchkick_klass ? "#{searchkick_klass.name}.reindex" : "reindex"
|
195
|
+
end
|
196
|
+
|
197
|
+
def execute_search
|
198
|
+
Searchkick.client.search(params)
|
199
|
+
end
|
200
|
+
|
201
|
+
def prepare
|
202
|
+
boost_fields, fields = set_fields
|
203
|
+
|
204
|
+
operator = options[:operator] || "and"
|
205
|
+
|
206
|
+
# pagination
|
207
|
+
page = [options[:page].to_i, 1].max
|
208
|
+
per_page = (options[:limit] || options[:per_page] || 1_000).to_i
|
209
|
+
padding = [options[:padding].to_i, 0].max
|
210
|
+
offset = options[:offset] || (page - 1) * per_page + padding
|
211
|
+
|
212
|
+
# model and eager loading
|
213
|
+
load = options[:load].nil? ? true : options[:load]
|
214
|
+
|
215
|
+
conversions_fields = Array(options[:conversions] || searchkick_options[:conversions]).map(&:to_s)
|
216
|
+
|
217
|
+
all = term == "*"
|
218
|
+
|
219
|
+
@json = options[:body]
|
220
|
+
if @json
|
221
|
+
payload = @json
|
222
|
+
else
|
223
|
+
if options[:similar]
|
224
|
+
payload = {
|
225
|
+
more_like_this: {
|
226
|
+
like_text: term,
|
227
|
+
min_doc_freq: 1,
|
228
|
+
min_term_freq: 1,
|
229
|
+
analyzer: Searchkick.searchkick_search2_analyzer
|
230
|
+
}
|
231
|
+
}
|
232
|
+
if fields != ["_all"]
|
233
|
+
payload[:more_like_this][:fields] = fields
|
234
|
+
end
|
235
|
+
elsif all
|
236
|
+
payload = {
|
237
|
+
match_all: {}
|
238
|
+
}
|
239
|
+
else
|
240
|
+
queries = []
|
241
|
+
|
242
|
+
misspellings =
|
243
|
+
if options.key?(:misspellings)
|
244
|
+
options[:misspellings]
|
245
|
+
else
|
246
|
+
true
|
247
|
+
end
|
248
|
+
|
249
|
+
if misspellings.is_a?(Hash) && misspellings[:below] && !@misspellings_below
|
250
|
+
@misspellings_below = misspellings[:below].to_i
|
251
|
+
misspellings = false
|
252
|
+
end
|
253
|
+
|
254
|
+
if misspellings != false
|
255
|
+
edit_distance = (misspellings.is_a?(Hash) && (misspellings[:edit_distance] || misspellings[:distance])) || 1
|
256
|
+
transpositions =
|
257
|
+
if misspellings.is_a?(Hash) && misspellings.key?(:transpositions)
|
258
|
+
{fuzzy_transpositions: misspellings[:transpositions]}
|
259
|
+
else
|
260
|
+
{fuzzy_transpositions: true}
|
261
|
+
end
|
262
|
+
prefix_length = (misspellings.is_a?(Hash) && misspellings[:prefix_length]) || 0
|
263
|
+
default_max_expansions = @misspellings_below ? 20 : 3
|
264
|
+
max_expansions = (misspellings.is_a?(Hash) && misspellings[:max_expansions]) || default_max_expansions
|
265
|
+
@misspellings = true
|
266
|
+
else
|
267
|
+
@misspellings = false
|
268
|
+
end
|
269
|
+
|
270
|
+
fields.each do |field|
|
271
|
+
queries_to_add = []
|
272
|
+
qs = []
|
273
|
+
|
274
|
+
factor = boost_fields[field] || 1
|
275
|
+
shared_options = {
|
276
|
+
query: term,
|
277
|
+
boost: 10 * factor
|
278
|
+
}
|
279
|
+
|
280
|
+
match_type =
|
281
|
+
if field.end_with?(".phrase")
|
282
|
+
field =
|
283
|
+
if field == "_all.phrase"
|
284
|
+
"_all"
|
285
|
+
else
|
286
|
+
field.sub(/\.phrase\z/, ".analyzed")
|
287
|
+
end
|
288
|
+
|
289
|
+
:match_phrase
|
290
|
+
else
|
291
|
+
:match
|
292
|
+
end
|
293
|
+
|
294
|
+
shared_options[:operator] = operator if match_type == :match
|
295
|
+
|
296
|
+
exclude_analyzer = nil
|
297
|
+
exclude_field = field
|
298
|
+
|
299
|
+
if field == "_all" || field.end_with?(".analyzed")
|
300
|
+
shared_options[:cutoff_frequency] = 0.001 unless operator == "and" || misspellings == false
|
301
|
+
qs.concat [
|
302
|
+
shared_options.merge(analyzer: Searchkick.searchkick_search_analyer),
|
303
|
+
shared_options.merge(analyzer: Searchkick.searchkick_search2_analyzer)
|
304
|
+
]
|
305
|
+
exclude_analyzer = Searchkick.searchkick_search2_analyzer
|
306
|
+
elsif field.end_with?(".exact")
|
307
|
+
f = field.split(".")[0..-2].join(".")
|
308
|
+
queries_to_add << {match: {f => shared_options.merge(analyzer: "keyword")}}
|
309
|
+
exclude_field = f
|
310
|
+
exclude_analyzer = "keyword"
|
311
|
+
else
|
312
|
+
analyzer = field =~ /\.word_(start|middle|end)\z/ ? "searchkick_word_search" : "searchkick_autocomplete_search"
|
313
|
+
qs << shared_options.merge(analyzer: analyzer)
|
314
|
+
exclude_analyzer = analyzer
|
315
|
+
end
|
316
|
+
|
317
|
+
if misspellings != false && match_type == :match
|
318
|
+
qs.concat qs.map { |q| q.except(:cutoff_frequency).merge(fuzziness: edit_distance, prefix_length: prefix_length, max_expansions: max_expansions, boost: factor).merge(transpositions) }
|
319
|
+
end
|
320
|
+
|
321
|
+
q2 = qs.map { |q| {match_type => {field => q}} }
|
322
|
+
|
323
|
+
# boost exact matches more
|
324
|
+
if field =~ /\.word_(start|middle|end)\z/ && searchkick_options[:word] != false
|
325
|
+
queries_to_add << {
|
326
|
+
bool: {
|
327
|
+
must: {
|
328
|
+
bool: {
|
329
|
+
should: q2
|
330
|
+
}
|
331
|
+
},
|
332
|
+
should: {match_type => {field.sub(/\.word_(start|middle|end)\z/, ".analyzed") => qs.first}}
|
333
|
+
}
|
334
|
+
}
|
335
|
+
else
|
336
|
+
queries_to_add.concat(q2)
|
337
|
+
end
|
338
|
+
|
339
|
+
if options[:exclude]
|
340
|
+
must_not =
|
341
|
+
Array(options[:exclude]).map do |phrase|
|
342
|
+
{
|
343
|
+
match_phrase: {
|
344
|
+
exclude_field => {
|
345
|
+
query: phrase,
|
346
|
+
analyzer: exclude_analyzer
|
347
|
+
}
|
348
|
+
}
|
349
|
+
}
|
350
|
+
end
|
351
|
+
|
352
|
+
queries_to_add = [{
|
353
|
+
bool: {
|
354
|
+
should: queries_to_add,
|
355
|
+
must_not: must_not
|
356
|
+
}
|
357
|
+
}]
|
358
|
+
end
|
359
|
+
|
360
|
+
queries.concat(queries_to_add)
|
361
|
+
end
|
362
|
+
|
363
|
+
payload = {
|
364
|
+
dis_max: {
|
365
|
+
queries: queries
|
366
|
+
}
|
367
|
+
}
|
368
|
+
|
369
|
+
if conversions_fields.present? && options[:conversions] != false
|
370
|
+
shoulds = []
|
371
|
+
conversions_fields.each do |conversions_field|
|
372
|
+
# wrap payload in a bool query
|
373
|
+
script_score = {field_value_factor: {field: "#{conversions_field}.count"}}
|
374
|
+
|
375
|
+
shoulds << {
|
376
|
+
nested: {
|
377
|
+
path: conversions_field,
|
378
|
+
score_mode: "sum",
|
379
|
+
query: {
|
380
|
+
function_score: {
|
381
|
+
boost_mode: "replace",
|
382
|
+
query: {
|
383
|
+
match: {
|
384
|
+
"#{conversions_field}.query" => term
|
385
|
+
}
|
386
|
+
}
|
387
|
+
}.merge(script_score)
|
388
|
+
}
|
389
|
+
}
|
390
|
+
}
|
391
|
+
end
|
392
|
+
payload = {
|
393
|
+
bool: {
|
394
|
+
must: payload,
|
395
|
+
should: shoulds
|
396
|
+
}
|
397
|
+
}
|
398
|
+
end
|
399
|
+
end
|
400
|
+
|
401
|
+
custom_filters = []
|
402
|
+
multiply_filters = []
|
403
|
+
|
404
|
+
set_boost_by(multiply_filters, custom_filters)
|
405
|
+
set_boost_where(custom_filters)
|
406
|
+
set_boost_by_distance(custom_filters) if options[:boost_by_distance]
|
407
|
+
|
408
|
+
if custom_filters.any?
|
409
|
+
payload = {
|
410
|
+
function_score: {
|
411
|
+
functions: custom_filters,
|
412
|
+
query: payload,
|
413
|
+
score_mode: "sum"
|
414
|
+
}
|
415
|
+
}
|
416
|
+
end
|
417
|
+
|
418
|
+
if multiply_filters.any?
|
419
|
+
payload = {
|
420
|
+
function_score: {
|
421
|
+
functions: multiply_filters,
|
422
|
+
query: payload,
|
423
|
+
score_mode: "multiply"
|
424
|
+
}
|
425
|
+
}
|
426
|
+
end
|
427
|
+
|
428
|
+
payload = {
|
429
|
+
query: payload,
|
430
|
+
size: per_page,
|
431
|
+
from: offset
|
432
|
+
}
|
433
|
+
payload[:explain] = options[:explain] if options[:explain]
|
434
|
+
payload[:profile] = options[:profile] if options[:profile]
|
435
|
+
|
436
|
+
# order
|
437
|
+
set_order(payload) if options[:order]
|
438
|
+
|
439
|
+
# indices_boost
|
440
|
+
set_boost_by_indices(payload)
|
441
|
+
|
442
|
+
# filters
|
443
|
+
filters = where_filters(options[:where])
|
444
|
+
set_filters(payload, filters) if filters.any?
|
445
|
+
|
446
|
+
# aggregations
|
447
|
+
set_aggregations(payload) if options[:aggs]
|
448
|
+
|
449
|
+
# suggestions
|
450
|
+
set_suggestions(payload) if options[:suggest]
|
451
|
+
|
452
|
+
# highlight
|
453
|
+
set_highlights(payload, fields) if options[:highlight]
|
454
|
+
|
455
|
+
# timeout shortly after client times out
|
456
|
+
payload[:timeout] ||= "#{Searchkick.search_timeout + 1}s"
|
457
|
+
|
458
|
+
# An empty array will cause only the _id and _type for each hit to be returned
|
459
|
+
# doc for :select - http://www.elasticsearch.org/guide/reference/api/search/fields/
|
460
|
+
# doc for :select_v2 - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-source-filtering.html
|
461
|
+
if options[:select]
|
462
|
+
if options[:select] == []
|
463
|
+
# intuitively [] makes sense to return no fields, but ES by default returns all fields
|
464
|
+
payload[:_source] = false
|
465
|
+
else
|
466
|
+
payload[:_source] = options[:select]
|
467
|
+
end
|
468
|
+
elsif load
|
469
|
+
payload[:_source] = false
|
470
|
+
end
|
471
|
+
|
472
|
+
if options[:type] || (klass != searchkick_klass && searchkick_index)
|
473
|
+
@type = [options[:type] || klass].flatten.map { |v| searchkick_index.klass_document_type(v) }
|
474
|
+
end
|
475
|
+
|
476
|
+
# routing
|
477
|
+
@routing = options[:routing] if options[:routing]
|
478
|
+
end
|
479
|
+
|
480
|
+
# merge more body options
|
481
|
+
payload = payload.deep_merge(options[:body_options]) if options[:body_options]
|
482
|
+
|
483
|
+
@body = payload
|
484
|
+
@page = page
|
485
|
+
@per_page = per_page
|
486
|
+
@padding = padding
|
487
|
+
@load = load
|
488
|
+
end
|
489
|
+
|
490
|
+
def set_fields
|
491
|
+
boost_fields = {}
|
492
|
+
fields = options[:fields] || searchkick_options[:searchable]
|
493
|
+
default_match = options[:match] || searchkick_options[:match] || :word
|
494
|
+
fields =
|
495
|
+
if fields
|
496
|
+
fields.map do |value|
|
497
|
+
k, v = value.is_a?(Hash) ? value.to_a.first : [value, default_match]
|
498
|
+
k2, boost = k.to_s.split("^", 2)
|
499
|
+
field = "#{k2}.#{v == :word ? 'analyzed' : v}"
|
500
|
+
boost_fields[field] = boost.to_f if boost
|
501
|
+
field
|
502
|
+
end
|
503
|
+
elsif default_match == :word
|
504
|
+
["_all"]
|
505
|
+
elsif default_match == :phrase
|
506
|
+
["_all.phrase"]
|
507
|
+
else
|
508
|
+
raise ArgumentError, "Must specify fields"
|
509
|
+
end
|
510
|
+
[boost_fields, fields]
|
511
|
+
end
|
512
|
+
|
513
|
+
def set_boost_by_distance(custom_filters)
|
514
|
+
boost_by_distance = options[:boost_by_distance] || {}
|
515
|
+
|
516
|
+
# legacy format
|
517
|
+
if boost_by_distance[:field]
|
518
|
+
boost_by_distance = {boost_by_distance[:field] => boost_by_distance.except(:field)}
|
519
|
+
end
|
520
|
+
|
521
|
+
boost_by_distance.each do |field, attributes|
|
522
|
+
attributes = {function: :gauss, scale: "5mi"}.merge(attributes)
|
523
|
+
unless attributes[:origin]
|
524
|
+
raise ArgumentError, "boost_by_distance requires :origin"
|
525
|
+
end
|
526
|
+
function_params = attributes.select { |k, _| [:origin, :scale, :offset, :decay].include?(k) }
|
527
|
+
function_params[:origin] = location_value(function_params[:origin])
|
528
|
+
custom_filters << {
|
529
|
+
attributes[:function] => {
|
530
|
+
field => function_params
|
531
|
+
}
|
532
|
+
}
|
533
|
+
end
|
534
|
+
end
|
535
|
+
|
536
|
+
def set_boost_by(multiply_filters, custom_filters)
|
537
|
+
boost_by = options[:boost_by] || {}
|
538
|
+
if boost_by.is_a?(Array)
|
539
|
+
boost_by = Hash[boost_by.map { |f| [f, {factor: 1}] }]
|
540
|
+
elsif boost_by.is_a?(Hash)
|
541
|
+
multiply_by, boost_by = boost_by.partition { |_, v| v[:boost_mode] == "multiply" }.map { |i| Hash[i] }
|
542
|
+
end
|
543
|
+
boost_by[options[:boost]] = {factor: 1} if options[:boost]
|
544
|
+
|
545
|
+
custom_filters.concat boost_filters(boost_by, log: true)
|
546
|
+
multiply_filters.concat boost_filters(multiply_by || {})
|
547
|
+
end
|
548
|
+
|
549
|
+
def set_boost_where(custom_filters)
|
550
|
+
boost_where = options[:boost_where] || {}
|
551
|
+
boost_where.each do |field, value|
|
552
|
+
if value.is_a?(Array) && value.first.is_a?(Hash)
|
553
|
+
value.each do |value_factor|
|
554
|
+
custom_filters << custom_filter(field, value_factor[:value], value_factor[:factor])
|
555
|
+
end
|
556
|
+
elsif value.is_a?(Hash)
|
557
|
+
custom_filters << custom_filter(field, value[:value], value[:factor])
|
558
|
+
else
|
559
|
+
factor = 1000
|
560
|
+
custom_filters << custom_filter(field, value, factor)
|
561
|
+
end
|
562
|
+
end
|
563
|
+
end
|
564
|
+
|
565
|
+
def set_boost_by_indices(payload)
|
566
|
+
return unless options[:indices_boost]
|
567
|
+
|
568
|
+
indices_boost = options[:indices_boost].each_with_object({}) do |(key, boost), memo|
|
569
|
+
index = key.respond_to?(:searchkick_index) ? key.searchkick_index.name : key
|
570
|
+
# try to use index explicitly instead of alias: https://github.com/elasticsearch/elasticsearch/issues/4756
|
571
|
+
index_by_alias = Searchkick.client.indices.get_alias(index: index).keys.first
|
572
|
+
memo[index_by_alias || index] = boost
|
573
|
+
end
|
574
|
+
|
575
|
+
payload[:indices_boost] = indices_boost
|
576
|
+
end
|
577
|
+
|
578
|
+
def set_suggestions(payload)
|
579
|
+
suggest_fields = (searchkick_options[:suggest] || []).map(&:to_s)
|
580
|
+
|
581
|
+
# intersection
|
582
|
+
if options[:fields]
|
583
|
+
suggest_fields &= options[:fields].map { |v| (v.is_a?(Hash) ? v.keys.first : v).to_s.split("^", 2).first }
|
584
|
+
end
|
585
|
+
|
586
|
+
if suggest_fields.any?
|
587
|
+
payload[:suggest] = {text: term}
|
588
|
+
suggest_fields.each do |field|
|
589
|
+
payload[:suggest][field] = {
|
590
|
+
phrase: {
|
591
|
+
field: "#{field}.suggest"
|
592
|
+
}
|
593
|
+
}
|
594
|
+
end
|
595
|
+
end
|
596
|
+
end
|
597
|
+
|
598
|
+
def set_highlights(payload, fields)
|
599
|
+
payload[:highlight] = {
|
600
|
+
fields: Hash[fields.map { |f| [f, {}] }]
|
601
|
+
}
|
602
|
+
|
603
|
+
if options[:highlight].is_a?(Hash)
|
604
|
+
if (tag = options[:highlight][:tag])
|
605
|
+
payload[:highlight][:pre_tags] = [tag]
|
606
|
+
payload[:highlight][:post_tags] = [tag.to_s.gsub(/\A<(\w+).+/, "</\\1>")]
|
607
|
+
end
|
608
|
+
|
609
|
+
if (fragment_size = options[:highlight][:fragment_size])
|
610
|
+
payload[:highlight][:fragment_size] = fragment_size
|
611
|
+
end
|
612
|
+
if (encoder = options[:highlight][:encoder])
|
613
|
+
payload[:highlight][:encoder] = encoder
|
614
|
+
end
|
615
|
+
|
616
|
+
highlight_fields = options[:highlight][:fields]
|
617
|
+
if highlight_fields
|
618
|
+
payload[:highlight][:fields] = {}
|
619
|
+
|
620
|
+
highlight_fields.each do |name, opts|
|
621
|
+
payload[:highlight][:fields]["#{name}.#{@match_suffix}"] = opts || {}
|
622
|
+
end
|
623
|
+
end
|
624
|
+
end
|
625
|
+
|
626
|
+
@highlighted_fields = payload[:highlight][:fields].keys
|
627
|
+
end
|
628
|
+
|
629
|
+
def set_aggregations(payload)
|
630
|
+
aggs = options[:aggs]
|
631
|
+
payload[:aggs] = {}
|
632
|
+
|
633
|
+
aggs = Hash[aggs.map { |f| [f, {}] }] if aggs.is_a?(Array) # convert to more advanced syntax
|
634
|
+
|
635
|
+
aggs.each do |field, agg_options|
|
636
|
+
size = agg_options[:limit] ? agg_options[:limit] : 1_000
|
637
|
+
shared_agg_options = agg_options.slice(:order, :min_doc_count)
|
638
|
+
|
639
|
+
if agg_options[:ranges]
|
640
|
+
payload[:aggs][field] = {
|
641
|
+
range: {
|
642
|
+
field: agg_options[:field] || field,
|
643
|
+
ranges: agg_options[:ranges]
|
644
|
+
}.merge(shared_agg_options)
|
645
|
+
}
|
646
|
+
elsif agg_options[:date_ranges]
|
647
|
+
payload[:aggs][field] = {
|
648
|
+
date_range: {
|
649
|
+
field: agg_options[:field] || field,
|
650
|
+
ranges: agg_options[:date_ranges]
|
651
|
+
}.merge(shared_agg_options)
|
652
|
+
}
|
653
|
+
elsif histogram = agg_options[:date_histogram]
|
654
|
+
interval = histogram[:interval]
|
655
|
+
payload[:aggs][field] = {
|
656
|
+
date_histogram: {
|
657
|
+
field: histogram[:field],
|
658
|
+
interval: interval
|
659
|
+
}
|
660
|
+
}
|
661
|
+
elsif metric = @@metric_aggs.find { |k| agg_options.has_key?(k) }
|
662
|
+
payload[:aggs][field] = {
|
663
|
+
metric => {
|
664
|
+
field: agg_options[metric][:field] || field
|
665
|
+
}
|
666
|
+
}
|
667
|
+
else
|
668
|
+
payload[:aggs][field] = {
|
669
|
+
terms: {
|
670
|
+
field: agg_options[:field] || field,
|
671
|
+
size: size
|
672
|
+
}.merge(shared_agg_options)
|
673
|
+
}
|
674
|
+
end
|
675
|
+
|
676
|
+
where = {}
|
677
|
+
where = (options[:where] || {}).reject { |k| k == field } unless options[:smart_aggs] == false
|
678
|
+
agg_filters = where_filters(where.merge(agg_options[:where] || {}))
|
679
|
+
if agg_filters.any?
|
680
|
+
payload[:aggs][field] = {
|
681
|
+
filter: {
|
682
|
+
bool: {
|
683
|
+
must: agg_filters
|
684
|
+
}
|
685
|
+
},
|
686
|
+
aggs: {
|
687
|
+
field => payload[:aggs][field]
|
688
|
+
}
|
689
|
+
}
|
690
|
+
end
|
691
|
+
end
|
692
|
+
end
|
693
|
+
|
694
|
+
def set_filters(payload, filters)
|
695
|
+
if options[:aggs]
|
696
|
+
payload[:post_filter] = {
|
697
|
+
bool: {
|
698
|
+
filter: filters
|
699
|
+
}
|
700
|
+
}
|
701
|
+
else
|
702
|
+
# more efficient query if no aggs
|
703
|
+
payload[:query] = {
|
704
|
+
bool: {
|
705
|
+
must: payload[:query],
|
706
|
+
filter: filters
|
707
|
+
}
|
708
|
+
}
|
709
|
+
end
|
710
|
+
end
|
711
|
+
|
712
|
+
# TODO id transformation for arrays
|
713
|
+
def set_order(payload)
|
714
|
+
order = options[:order].is_a?(Enumerable) ? options[:order] : {options[:order] => :asc}
|
715
|
+
id_field = below50? ? :_id : :_uid
|
716
|
+
payload[:sort] = order.is_a?(Array) ? order : Hash[order.map { |k, v| [k.to_s == "id" ? id_field : k, v] }]
|
717
|
+
end
|
718
|
+
|
719
|
+
def where_filters(where)
|
720
|
+
filters = []
|
721
|
+
(where || {}).each do |field, value|
|
722
|
+
field = :_id if field.to_s == "id"
|
723
|
+
|
724
|
+
if field == :or
|
725
|
+
value.each do |or_clause|
|
726
|
+
filters << {bool: {should: or_clause.map { |or_statement| {bool: {filter: where_filters(or_statement)}} }}}
|
727
|
+
end
|
728
|
+
elsif field == :_or
|
729
|
+
filters << {bool: {should: value.map { |or_statement| {bool: {filter: where_filters(or_statement)}} }}}
|
730
|
+
elsif field == :_not
|
731
|
+
filters << {bool: {must_not: where_filters(value)}}
|
732
|
+
elsif field == :_and
|
733
|
+
filters << {bool: {must: value.map { |or_statement| {bool: {filter: where_filters(or_statement)}} }}}
|
734
|
+
else
|
735
|
+
# expand ranges
|
736
|
+
if value.is_a?(Range)
|
737
|
+
value = {gte: value.first, (value.exclude_end? ? :lt : :lte) => value.last}
|
738
|
+
end
|
739
|
+
|
740
|
+
value = {in: value} if value.is_a?(Array)
|
741
|
+
|
742
|
+
if value.is_a?(Hash)
|
743
|
+
value.each do |op, op_value|
|
744
|
+
case op
|
745
|
+
when :within, :bottom_right
|
746
|
+
# do nothing
|
747
|
+
when :near
|
748
|
+
filters << {
|
749
|
+
geo_distance: {
|
750
|
+
field => location_value(op_value),
|
751
|
+
distance: value[:within] || "50mi"
|
752
|
+
}
|
753
|
+
}
|
754
|
+
when :geo_polygon
|
755
|
+
filters << {
|
756
|
+
geo_polygon: {
|
757
|
+
field => op_value
|
758
|
+
}
|
759
|
+
}
|
760
|
+
when :geo_shape
|
761
|
+
shape = op_value.except(:relation)
|
762
|
+
shape[:coordinates] = coordinate_array(shape[:coordinates]) if shape[:coordinates]
|
763
|
+
filters << {
|
764
|
+
geo_shape: {
|
765
|
+
field => {
|
766
|
+
relation: op_value[:relation] || "intersects",
|
767
|
+
shape: shape
|
768
|
+
}
|
769
|
+
}
|
770
|
+
}
|
771
|
+
when :top_left
|
772
|
+
filters << {
|
773
|
+
geo_bounding_box: {
|
774
|
+
field => {
|
775
|
+
top_left: location_value(op_value),
|
776
|
+
bottom_right: location_value(value[:bottom_right])
|
777
|
+
}
|
778
|
+
}
|
779
|
+
}
|
780
|
+
when :regexp # support for regexp queries without using a regexp ruby object
|
781
|
+
filters << {regexp: {field => {value: op_value}}}
|
782
|
+
when :not # not equal
|
783
|
+
filters << {bool: {must_not: term_filters(field, op_value)}}
|
784
|
+
when :all
|
785
|
+
op_value.each do |val|
|
786
|
+
filters << term_filters(field, val)
|
787
|
+
end
|
788
|
+
when :in
|
789
|
+
filters << term_filters(field, op_value)
|
790
|
+
else
|
791
|
+
range_query =
|
792
|
+
case op
|
793
|
+
when :gt
|
794
|
+
{from: op_value, include_lower: false}
|
795
|
+
when :gte
|
796
|
+
{from: op_value, include_lower: true}
|
797
|
+
when :lt
|
798
|
+
{to: op_value, include_upper: false}
|
799
|
+
when :lte
|
800
|
+
{to: op_value, include_upper: true}
|
801
|
+
else
|
802
|
+
raise "Unknown where operator: #{op.inspect}"
|
803
|
+
end
|
804
|
+
# issue 132
|
805
|
+
if (existing = filters.find { |f| f[:range] && f[:range][field] })
|
806
|
+
existing[:range][field].merge!(range_query)
|
807
|
+
else
|
808
|
+
filters << {range: {field => range_query}}
|
809
|
+
end
|
810
|
+
end
|
811
|
+
end
|
812
|
+
else
|
813
|
+
filters << term_filters(field, value)
|
814
|
+
end
|
815
|
+
end
|
816
|
+
end
|
817
|
+
filters
|
818
|
+
end
|
819
|
+
|
820
|
+
def term_filters(field, value)
|
821
|
+
if value.is_a?(Array) # in query
|
822
|
+
if value.any?(&:nil?)
|
823
|
+
{bool: {should: [term_filters(field, nil), term_filters(field, value.compact)]}}
|
824
|
+
else
|
825
|
+
{in: {field => value}}
|
826
|
+
end
|
827
|
+
elsif value.nil?
|
828
|
+
{bool: {must_not: {exists: {field: field}}}}
|
829
|
+
elsif value.is_a?(Regexp)
|
830
|
+
{regexp: {field => {value: value.source}}}
|
831
|
+
else
|
832
|
+
{term: {field => value}}
|
833
|
+
end
|
834
|
+
end
|
835
|
+
|
836
|
+
def custom_filter(field, value, factor)
|
837
|
+
if below50?
|
838
|
+
{
|
839
|
+
filter: {
|
840
|
+
bool: {
|
841
|
+
must: where_filters(field => value)
|
842
|
+
}
|
843
|
+
},
|
844
|
+
boost_factor: factor
|
845
|
+
}
|
846
|
+
else
|
847
|
+
{
|
848
|
+
filter: where_filters(field => value),
|
849
|
+
weight: factor
|
850
|
+
}
|
851
|
+
end
|
852
|
+
end
|
853
|
+
|
854
|
+
def boost_filters(boost_by, options = {})
|
855
|
+
boost_by.map do |field, value|
|
856
|
+
log = value.key?(:log) ? value[:log] : options[:log]
|
857
|
+
value[:factor] ||= 1
|
858
|
+
script_score = {
|
859
|
+
field_value_factor: {
|
860
|
+
field: field,
|
861
|
+
factor: value[:factor].to_f,
|
862
|
+
modifier: log ? "ln2p" : nil
|
863
|
+
}
|
864
|
+
}
|
865
|
+
|
866
|
+
{
|
867
|
+
filter: {
|
868
|
+
exists: {
|
869
|
+
field: field
|
870
|
+
}
|
871
|
+
}
|
872
|
+
}.merge(script_score)
|
873
|
+
end
|
874
|
+
end
|
875
|
+
|
876
|
+
# Recursively descend through nesting of arrays until we reach either a lat/lon object or an array of numbers,
|
877
|
+
# eventually returning the same structure with all values transformed to [lon, lat].
|
878
|
+
#
|
879
|
+
def coordinate_array(value)
|
880
|
+
if value.is_a?(Hash)
|
881
|
+
[value[:lon], value[:lat]]
|
882
|
+
elsif value.is_a?(Array) and !value[0].is_a?(Numeric)
|
883
|
+
value.map { |a| coordinate_array(a) }
|
884
|
+
else
|
885
|
+
value
|
886
|
+
end
|
887
|
+
end
|
888
|
+
|
889
|
+
def location_value(value)
|
890
|
+
if value.is_a?(Array)
|
891
|
+
value.map(&:to_f).reverse
|
892
|
+
else
|
893
|
+
value
|
894
|
+
end
|
895
|
+
end
|
896
|
+
|
897
|
+
def below50?
|
898
|
+
Searchkick.server_below?("5.0.0-alpha1")
|
899
|
+
end
|
900
|
+
end
|
901
|
+
end
|