searchkick-hooopo 2.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +22 -0
  3. data/.travis.yml +35 -0
  4. data/CHANGELOG.md +491 -0
  5. data/Gemfile +12 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +1908 -0
  8. data/Rakefile +20 -0
  9. data/benchmark/Gemfile +23 -0
  10. data/benchmark/benchmark.rb +97 -0
  11. data/lib/searchkick/bulk_reindex_job.rb +17 -0
  12. data/lib/searchkick/index.rb +500 -0
  13. data/lib/searchkick/index_options.rb +333 -0
  14. data/lib/searchkick/indexer.rb +28 -0
  15. data/lib/searchkick/logging.rb +242 -0
  16. data/lib/searchkick/middleware.rb +12 -0
  17. data/lib/searchkick/model.rb +156 -0
  18. data/lib/searchkick/process_batch_job.rb +23 -0
  19. data/lib/searchkick/process_queue_job.rb +23 -0
  20. data/lib/searchkick/query.rb +901 -0
  21. data/lib/searchkick/reindex_queue.rb +38 -0
  22. data/lib/searchkick/reindex_v2_job.rb +39 -0
  23. data/lib/searchkick/results.rb +216 -0
  24. data/lib/searchkick/tasks.rb +33 -0
  25. data/lib/searchkick/version.rb +3 -0
  26. data/lib/searchkick.rb +215 -0
  27. data/searchkick.gemspec +28 -0
  28. data/test/aggs_test.rb +197 -0
  29. data/test/autocomplete_test.rb +75 -0
  30. data/test/boost_test.rb +175 -0
  31. data/test/callbacks_test.rb +59 -0
  32. data/test/ci/before_install.sh +17 -0
  33. data/test/errors_test.rb +19 -0
  34. data/test/gemfiles/activerecord31.gemfile +7 -0
  35. data/test/gemfiles/activerecord32.gemfile +7 -0
  36. data/test/gemfiles/activerecord40.gemfile +8 -0
  37. data/test/gemfiles/activerecord41.gemfile +8 -0
  38. data/test/gemfiles/activerecord42.gemfile +7 -0
  39. data/test/gemfiles/activerecord50.gemfile +7 -0
  40. data/test/gemfiles/apartment.gemfile +8 -0
  41. data/test/gemfiles/cequel.gemfile +8 -0
  42. data/test/gemfiles/mongoid2.gemfile +7 -0
  43. data/test/gemfiles/mongoid3.gemfile +6 -0
  44. data/test/gemfiles/mongoid4.gemfile +7 -0
  45. data/test/gemfiles/mongoid5.gemfile +7 -0
  46. data/test/gemfiles/mongoid6.gemfile +8 -0
  47. data/test/gemfiles/nobrainer.gemfile +8 -0
  48. data/test/gemfiles/parallel_tests.gemfile +8 -0
  49. data/test/geo_shape_test.rb +172 -0
  50. data/test/highlight_test.rb +78 -0
  51. data/test/index_test.rb +153 -0
  52. data/test/inheritance_test.rb +83 -0
  53. data/test/marshal_test.rb +8 -0
  54. data/test/match_test.rb +276 -0
  55. data/test/misspellings_test.rb +56 -0
  56. data/test/model_test.rb +42 -0
  57. data/test/multi_search_test.rb +22 -0
  58. data/test/multi_tenancy_test.rb +22 -0
  59. data/test/order_test.rb +46 -0
  60. data/test/pagination_test.rb +53 -0
  61. data/test/partial_reindex_test.rb +58 -0
  62. data/test/query_test.rb +35 -0
  63. data/test/records_test.rb +10 -0
  64. data/test/reindex_test.rb +52 -0
  65. data/test/reindex_v2_job_test.rb +32 -0
  66. data/test/routing_test.rb +23 -0
  67. data/test/should_index_test.rb +32 -0
  68. data/test/similar_test.rb +28 -0
  69. data/test/sql_test.rb +198 -0
  70. data/test/suggest_test.rb +85 -0
  71. data/test/synonyms_test.rb +67 -0
  72. data/test/test_helper.rb +527 -0
  73. data/test/where_test.rb +223 -0
  74. metadata +250 -0
@@ -0,0 +1,901 @@
1
+ module Searchkick
2
+ class Query
3
+ extend Forwardable
4
+
5
+ @@metric_aggs = [:avg, :cardinality, :max, :min, :sum]
6
+
7
+ attr_reader :klass, :term, :options
8
+ attr_accessor :body
9
+
10
+ def_delegators :execute, :map, :each, :any?, :empty?, :size, :length, :slice, :[], :to_ary,
11
+ :records, :results, :suggestions, :each_with_hit, :with_details, :aggregations, :aggs,
12
+ :took, :error, :model_name, :entry_name, :total_count, :total_entries,
13
+ :current_page, :per_page, :limit_value, :padding, :total_pages, :num_pages,
14
+ :offset_value, :offset, :previous_page, :prev_page, :next_page, :first_page?, :last_page?,
15
+ :out_of_range?, :hits, :response, :to_a, :first
16
+
17
+ def initialize(klass, term = "*", **options)
18
+ unknown_keywords = options.keys - [:aggs, :body, :body_options, :boost,
19
+ :boost_by, :boost_by_distance, :boost_where, :conversions, :debug, :emoji, :exclude, :execute, :explain,
20
+ :fields, :highlight, :includes, :index_name, :indices_boost, :limit, :load,
21
+ :match, :misspellings, :offset, :operator, :order, :padding, :page, :per_page, :profile,
22
+ :request_params, :routing, :select, :similar, :smart_aggs, :suggest, :track, :type, :where]
23
+ raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any?
24
+
25
+ term = term.to_s
26
+
27
+ if options[:emoji]
28
+ term = EmojiParser.parse_unicode(term) { |e| " #{e.name} " }.strip
29
+ end
30
+
31
+ @klass = klass
32
+ @term = term
33
+ @options = options
34
+ @match_suffix = options[:match] || searchkick_options[:match] || "analyzed"
35
+
36
+ # prevent Ruby warnings
37
+ @type = nil
38
+ @routing = nil
39
+ @misspellings = false
40
+ @misspellings_below = nil
41
+ @highlighted_fields = nil
42
+
43
+ prepare
44
+ end
45
+
46
+ def searchkick_index
47
+ klass ? klass.searchkick_index : nil
48
+ end
49
+
50
+ def searchkick_options
51
+ klass ? klass.searchkick_options : {}
52
+ end
53
+
54
+ def searchkick_klass
55
+ klass ? klass.searchkick_klass : nil
56
+ end
57
+
58
+ def params
59
+ index =
60
+ if options[:index_name]
61
+ Array(options[:index_name]).map { |v| v.respond_to?(:searchkick_index) ? v.searchkick_index.name : v }.join(",")
62
+ elsif searchkick_index
63
+ searchkick_index.name
64
+ else
65
+ "_all"
66
+ end
67
+
68
+ params = {
69
+ index: index,
70
+ body: body
71
+ }
72
+ params[:type] = @type if @type
73
+ params[:routing] = @routing if @routing
74
+ params.merge!(options[:request_params]) if options[:request_params]
75
+ params
76
+ end
77
+
78
+ def execute
79
+ @execute ||= begin
80
+ begin
81
+ response = execute_search
82
+ if @misspellings_below && response["hits"]["total"] < @misspellings_below
83
+ prepare
84
+ response = execute_search
85
+ end
86
+ rescue => e # TODO rescue type
87
+ handle_error(e)
88
+ end
89
+ handle_response(response)
90
+ end
91
+ end
92
+
93
+ def to_curl
94
+ query = params
95
+ type = query[:type]
96
+ index = query[:index].is_a?(Array) ? query[:index].join(",") : query[:index]
97
+
98
+ # no easy way to tell which host the client will use
99
+ host = Searchkick.client.transport.hosts.first
100
+ credentials = host[:user] || host[:password] ? "#{host[:user]}:#{host[:password]}@" : nil
101
+ "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?pretty -d '#{query[:body].to_json}'"
102
+ end
103
+
104
+ def handle_response(response)
105
+ opts = {
106
+ page: @page,
107
+ per_page: @per_page,
108
+ padding: @padding,
109
+ load: @load,
110
+ includes: options[:includes],
111
+ json: !@json.nil?,
112
+ match_suffix: @match_suffix,
113
+ highlighted_fields: @highlighted_fields || [],
114
+ misspellings: @misspellings
115
+ }
116
+
117
+ if options[:debug]
118
+ require "pp"
119
+
120
+ puts "Searchkick Version: #{Searchkick::VERSION}"
121
+ puts "Elasticsearch Version: #{Searchkick.server_version}"
122
+ puts
123
+
124
+ puts "Model Searchkick Options"
125
+ pp searchkick_options
126
+ puts
127
+
128
+ puts "Search Options"
129
+ pp options
130
+ puts
131
+
132
+ if searchkick_index
133
+ puts "Model Search Data"
134
+ begin
135
+ pp klass.first(3).map { |r| {index: searchkick_index.record_data(r).merge(data: searchkick_index.send(:search_data, r))}}
136
+ rescue => e
137
+ puts "#{e.class.name}: #{e.message}"
138
+ end
139
+ puts
140
+
141
+ puts "Elasticsearch Mapping"
142
+ puts JSON.pretty_generate(searchkick_index.mapping)
143
+ puts
144
+
145
+ puts "Elasticsearch Settings"
146
+ puts JSON.pretty_generate(searchkick_index.settings)
147
+ puts
148
+ end
149
+
150
+ puts "Elasticsearch Query"
151
+ puts to_curl
152
+ puts
153
+
154
+ puts "Elasticsearch Results"
155
+ puts JSON.pretty_generate(response)
156
+ end
157
+
158
+ # set execute for multi search
159
+ @execute = Searchkick::Results.new(searchkick_klass, response, opts)
160
+ end
161
+
162
+ private
163
+
164
+ def handle_error(e)
165
+ status_code = e.message[1..3].to_i
166
+ if status_code == 404
167
+ raise MissingIndexError, "Index missing - run #{reindex_command}"
168
+ elsif status_code == 500 && (
169
+ e.message.include?("IllegalArgumentException[minimumSimilarity >= 1]") ||
170
+ e.message.include?("No query registered for [multi_match]") ||
171
+ e.message.include?("[match] query does not support [cutoff_frequency]") ||
172
+ e.message.include?("No query registered for [function_score]")
173
+ )
174
+
175
+ raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 2 or greater"
176
+ elsif status_code == 400
177
+ if (
178
+ e.message.include?("bool query does not support [filter]") ||
179
+ e.message.include?("[bool] filter does not support [filter]")
180
+ )
181
+
182
+ raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 2 or greater"
183
+ elsif e.message.include?("[multi_match] analyzer [searchkick_search] not found")
184
+ raise InvalidQueryError, "Bad mapping - run #{reindex_command}"
185
+ else
186
+ raise InvalidQueryError, e.message
187
+ end
188
+ else
189
+ raise e
190
+ end
191
+ end
192
+
193
+ def reindex_command
194
+ searchkick_klass ? "#{searchkick_klass.name}.reindex" : "reindex"
195
+ end
196
+
197
+ def execute_search
198
+ Searchkick.client.search(params)
199
+ end
200
+
201
+ def prepare
202
+ boost_fields, fields = set_fields
203
+
204
+ operator = options[:operator] || "and"
205
+
206
+ # pagination
207
+ page = [options[:page].to_i, 1].max
208
+ per_page = (options[:limit] || options[:per_page] || 1_000).to_i
209
+ padding = [options[:padding].to_i, 0].max
210
+ offset = options[:offset] || (page - 1) * per_page + padding
211
+
212
+ # model and eager loading
213
+ load = options[:load].nil? ? true : options[:load]
214
+
215
+ conversions_fields = Array(options[:conversions] || searchkick_options[:conversions]).map(&:to_s)
216
+
217
+ all = term == "*"
218
+
219
+ @json = options[:body]
220
+ if @json
221
+ payload = @json
222
+ else
223
+ if options[:similar]
224
+ payload = {
225
+ more_like_this: {
226
+ like_text: term,
227
+ min_doc_freq: 1,
228
+ min_term_freq: 1,
229
+ analyzer: Searchkick.searchkick_search2_analyzer
230
+ }
231
+ }
232
+ if fields != ["_all"]
233
+ payload[:more_like_this][:fields] = fields
234
+ end
235
+ elsif all
236
+ payload = {
237
+ match_all: {}
238
+ }
239
+ else
240
+ queries = []
241
+
242
+ misspellings =
243
+ if options.key?(:misspellings)
244
+ options[:misspellings]
245
+ else
246
+ true
247
+ end
248
+
249
+ if misspellings.is_a?(Hash) && misspellings[:below] && !@misspellings_below
250
+ @misspellings_below = misspellings[:below].to_i
251
+ misspellings = false
252
+ end
253
+
254
+ if misspellings != false
255
+ edit_distance = (misspellings.is_a?(Hash) && (misspellings[:edit_distance] || misspellings[:distance])) || 1
256
+ transpositions =
257
+ if misspellings.is_a?(Hash) && misspellings.key?(:transpositions)
258
+ {fuzzy_transpositions: misspellings[:transpositions]}
259
+ else
260
+ {fuzzy_transpositions: true}
261
+ end
262
+ prefix_length = (misspellings.is_a?(Hash) && misspellings[:prefix_length]) || 0
263
+ default_max_expansions = @misspellings_below ? 20 : 3
264
+ max_expansions = (misspellings.is_a?(Hash) && misspellings[:max_expansions]) || default_max_expansions
265
+ @misspellings = true
266
+ else
267
+ @misspellings = false
268
+ end
269
+
270
+ fields.each do |field|
271
+ queries_to_add = []
272
+ qs = []
273
+
274
+ factor = boost_fields[field] || 1
275
+ shared_options = {
276
+ query: term,
277
+ boost: 10 * factor
278
+ }
279
+
280
+ match_type =
281
+ if field.end_with?(".phrase")
282
+ field =
283
+ if field == "_all.phrase"
284
+ "_all"
285
+ else
286
+ field.sub(/\.phrase\z/, ".analyzed")
287
+ end
288
+
289
+ :match_phrase
290
+ else
291
+ :match
292
+ end
293
+
294
+ shared_options[:operator] = operator if match_type == :match
295
+
296
+ exclude_analyzer = nil
297
+ exclude_field = field
298
+
299
+ if field == "_all" || field.end_with?(".analyzed")
300
+ shared_options[:cutoff_frequency] = 0.001 unless operator == "and" || misspellings == false
301
+ qs.concat [
302
+ shared_options.merge(analyzer: Searchkick.searchkick_search_analyer),
303
+ shared_options.merge(analyzer: Searchkick.searchkick_search2_analyzer)
304
+ ]
305
+ exclude_analyzer = Searchkick.searchkick_search2_analyzer
306
+ elsif field.end_with?(".exact")
307
+ f = field.split(".")[0..-2].join(".")
308
+ queries_to_add << {match: {f => shared_options.merge(analyzer: "keyword")}}
309
+ exclude_field = f
310
+ exclude_analyzer = "keyword"
311
+ else
312
+ analyzer = field =~ /\.word_(start|middle|end)\z/ ? "searchkick_word_search" : "searchkick_autocomplete_search"
313
+ qs << shared_options.merge(analyzer: analyzer)
314
+ exclude_analyzer = analyzer
315
+ end
316
+
317
+ if misspellings != false && match_type == :match
318
+ qs.concat qs.map { |q| q.except(:cutoff_frequency).merge(fuzziness: edit_distance, prefix_length: prefix_length, max_expansions: max_expansions, boost: factor).merge(transpositions) }
319
+ end
320
+
321
+ q2 = qs.map { |q| {match_type => {field => q}} }
322
+
323
+ # boost exact matches more
324
+ if field =~ /\.word_(start|middle|end)\z/ && searchkick_options[:word] != false
325
+ queries_to_add << {
326
+ bool: {
327
+ must: {
328
+ bool: {
329
+ should: q2
330
+ }
331
+ },
332
+ should: {match_type => {field.sub(/\.word_(start|middle|end)\z/, ".analyzed") => qs.first}}
333
+ }
334
+ }
335
+ else
336
+ queries_to_add.concat(q2)
337
+ end
338
+
339
+ if options[:exclude]
340
+ must_not =
341
+ Array(options[:exclude]).map do |phrase|
342
+ {
343
+ match_phrase: {
344
+ exclude_field => {
345
+ query: phrase,
346
+ analyzer: exclude_analyzer
347
+ }
348
+ }
349
+ }
350
+ end
351
+
352
+ queries_to_add = [{
353
+ bool: {
354
+ should: queries_to_add,
355
+ must_not: must_not
356
+ }
357
+ }]
358
+ end
359
+
360
+ queries.concat(queries_to_add)
361
+ end
362
+
363
+ payload = {
364
+ dis_max: {
365
+ queries: queries
366
+ }
367
+ }
368
+
369
+ if conversions_fields.present? && options[:conversions] != false
370
+ shoulds = []
371
+ conversions_fields.each do |conversions_field|
372
+ # wrap payload in a bool query
373
+ script_score = {field_value_factor: {field: "#{conversions_field}.count"}}
374
+
375
+ shoulds << {
376
+ nested: {
377
+ path: conversions_field,
378
+ score_mode: "sum",
379
+ query: {
380
+ function_score: {
381
+ boost_mode: "replace",
382
+ query: {
383
+ match: {
384
+ "#{conversions_field}.query" => term
385
+ }
386
+ }
387
+ }.merge(script_score)
388
+ }
389
+ }
390
+ }
391
+ end
392
+ payload = {
393
+ bool: {
394
+ must: payload,
395
+ should: shoulds
396
+ }
397
+ }
398
+ end
399
+ end
400
+
401
+ custom_filters = []
402
+ multiply_filters = []
403
+
404
+ set_boost_by(multiply_filters, custom_filters)
405
+ set_boost_where(custom_filters)
406
+ set_boost_by_distance(custom_filters) if options[:boost_by_distance]
407
+
408
+ if custom_filters.any?
409
+ payload = {
410
+ function_score: {
411
+ functions: custom_filters,
412
+ query: payload,
413
+ score_mode: "sum"
414
+ }
415
+ }
416
+ end
417
+
418
+ if multiply_filters.any?
419
+ payload = {
420
+ function_score: {
421
+ functions: multiply_filters,
422
+ query: payload,
423
+ score_mode: "multiply"
424
+ }
425
+ }
426
+ end
427
+
428
+ payload = {
429
+ query: payload,
430
+ size: per_page,
431
+ from: offset
432
+ }
433
+ payload[:explain] = options[:explain] if options[:explain]
434
+ payload[:profile] = options[:profile] if options[:profile]
435
+
436
+ # order
437
+ set_order(payload) if options[:order]
438
+
439
+ # indices_boost
440
+ set_boost_by_indices(payload)
441
+
442
+ # filters
443
+ filters = where_filters(options[:where])
444
+ set_filters(payload, filters) if filters.any?
445
+
446
+ # aggregations
447
+ set_aggregations(payload) if options[:aggs]
448
+
449
+ # suggestions
450
+ set_suggestions(payload) if options[:suggest]
451
+
452
+ # highlight
453
+ set_highlights(payload, fields) if options[:highlight]
454
+
455
+ # timeout shortly after client times out
456
+ payload[:timeout] ||= "#{Searchkick.search_timeout + 1}s"
457
+
458
+ # An empty array will cause only the _id and _type for each hit to be returned
459
+ # doc for :select - http://www.elasticsearch.org/guide/reference/api/search/fields/
460
+ # doc for :select_v2 - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-source-filtering.html
461
+ if options[:select]
462
+ if options[:select] == []
463
+ # intuitively [] makes sense to return no fields, but ES by default returns all fields
464
+ payload[:_source] = false
465
+ else
466
+ payload[:_source] = options[:select]
467
+ end
468
+ elsif load
469
+ payload[:_source] = false
470
+ end
471
+
472
+ if options[:type] || (klass != searchkick_klass && searchkick_index)
473
+ @type = [options[:type] || klass].flatten.map { |v| searchkick_index.klass_document_type(v) }
474
+ end
475
+
476
+ # routing
477
+ @routing = options[:routing] if options[:routing]
478
+ end
479
+
480
+ # merge more body options
481
+ payload = payload.deep_merge(options[:body_options]) if options[:body_options]
482
+
483
+ @body = payload
484
+ @page = page
485
+ @per_page = per_page
486
+ @padding = padding
487
+ @load = load
488
+ end
489
+
490
+ def set_fields
491
+ boost_fields = {}
492
+ fields = options[:fields] || searchkick_options[:searchable]
493
+ default_match = options[:match] || searchkick_options[:match] || :word
494
+ fields =
495
+ if fields
496
+ fields.map do |value|
497
+ k, v = value.is_a?(Hash) ? value.to_a.first : [value, default_match]
498
+ k2, boost = k.to_s.split("^", 2)
499
+ field = "#{k2}.#{v == :word ? 'analyzed' : v}"
500
+ boost_fields[field] = boost.to_f if boost
501
+ field
502
+ end
503
+ elsif default_match == :word
504
+ ["_all"]
505
+ elsif default_match == :phrase
506
+ ["_all.phrase"]
507
+ else
508
+ raise ArgumentError, "Must specify fields"
509
+ end
510
+ [boost_fields, fields]
511
+ end
512
+
513
+ def set_boost_by_distance(custom_filters)
514
+ boost_by_distance = options[:boost_by_distance] || {}
515
+
516
+ # legacy format
517
+ if boost_by_distance[:field]
518
+ boost_by_distance = {boost_by_distance[:field] => boost_by_distance.except(:field)}
519
+ end
520
+
521
+ boost_by_distance.each do |field, attributes|
522
+ attributes = {function: :gauss, scale: "5mi"}.merge(attributes)
523
+ unless attributes[:origin]
524
+ raise ArgumentError, "boost_by_distance requires :origin"
525
+ end
526
+ function_params = attributes.select { |k, _| [:origin, :scale, :offset, :decay].include?(k) }
527
+ function_params[:origin] = location_value(function_params[:origin])
528
+ custom_filters << {
529
+ attributes[:function] => {
530
+ field => function_params
531
+ }
532
+ }
533
+ end
534
+ end
535
+
536
+ def set_boost_by(multiply_filters, custom_filters)
537
+ boost_by = options[:boost_by] || {}
538
+ if boost_by.is_a?(Array)
539
+ boost_by = Hash[boost_by.map { |f| [f, {factor: 1}] }]
540
+ elsif boost_by.is_a?(Hash)
541
+ multiply_by, boost_by = boost_by.partition { |_, v| v[:boost_mode] == "multiply" }.map { |i| Hash[i] }
542
+ end
543
+ boost_by[options[:boost]] = {factor: 1} if options[:boost]
544
+
545
+ custom_filters.concat boost_filters(boost_by, log: true)
546
+ multiply_filters.concat boost_filters(multiply_by || {})
547
+ end
548
+
549
+ def set_boost_where(custom_filters)
550
+ boost_where = options[:boost_where] || {}
551
+ boost_where.each do |field, value|
552
+ if value.is_a?(Array) && value.first.is_a?(Hash)
553
+ value.each do |value_factor|
554
+ custom_filters << custom_filter(field, value_factor[:value], value_factor[:factor])
555
+ end
556
+ elsif value.is_a?(Hash)
557
+ custom_filters << custom_filter(field, value[:value], value[:factor])
558
+ else
559
+ factor = 1000
560
+ custom_filters << custom_filter(field, value, factor)
561
+ end
562
+ end
563
+ end
564
+
565
+ def set_boost_by_indices(payload)
566
+ return unless options[:indices_boost]
567
+
568
+ indices_boost = options[:indices_boost].each_with_object({}) do |(key, boost), memo|
569
+ index = key.respond_to?(:searchkick_index) ? key.searchkick_index.name : key
570
+ # try to use index explicitly instead of alias: https://github.com/elasticsearch/elasticsearch/issues/4756
571
+ index_by_alias = Searchkick.client.indices.get_alias(index: index).keys.first
572
+ memo[index_by_alias || index] = boost
573
+ end
574
+
575
+ payload[:indices_boost] = indices_boost
576
+ end
577
+
578
+ def set_suggestions(payload)
579
+ suggest_fields = (searchkick_options[:suggest] || []).map(&:to_s)
580
+
581
+ # intersection
582
+ if options[:fields]
583
+ suggest_fields &= options[:fields].map { |v| (v.is_a?(Hash) ? v.keys.first : v).to_s.split("^", 2).first }
584
+ end
585
+
586
+ if suggest_fields.any?
587
+ payload[:suggest] = {text: term}
588
+ suggest_fields.each do |field|
589
+ payload[:suggest][field] = {
590
+ phrase: {
591
+ field: "#{field}.suggest"
592
+ }
593
+ }
594
+ end
595
+ end
596
+ end
597
+
598
+ def set_highlights(payload, fields)
599
+ payload[:highlight] = {
600
+ fields: Hash[fields.map { |f| [f, {}] }]
601
+ }
602
+
603
+ if options[:highlight].is_a?(Hash)
604
+ if (tag = options[:highlight][:tag])
605
+ payload[:highlight][:pre_tags] = [tag]
606
+ payload[:highlight][:post_tags] = [tag.to_s.gsub(/\A<(\w+).+/, "</\\1>")]
607
+ end
608
+
609
+ if (fragment_size = options[:highlight][:fragment_size])
610
+ payload[:highlight][:fragment_size] = fragment_size
611
+ end
612
+ if (encoder = options[:highlight][:encoder])
613
+ payload[:highlight][:encoder] = encoder
614
+ end
615
+
616
+ highlight_fields = options[:highlight][:fields]
617
+ if highlight_fields
618
+ payload[:highlight][:fields] = {}
619
+
620
+ highlight_fields.each do |name, opts|
621
+ payload[:highlight][:fields]["#{name}.#{@match_suffix}"] = opts || {}
622
+ end
623
+ end
624
+ end
625
+
626
+ @highlighted_fields = payload[:highlight][:fields].keys
627
+ end
628
+
629
+ def set_aggregations(payload)
630
+ aggs = options[:aggs]
631
+ payload[:aggs] = {}
632
+
633
+ aggs = Hash[aggs.map { |f| [f, {}] }] if aggs.is_a?(Array) # convert to more advanced syntax
634
+
635
+ aggs.each do |field, agg_options|
636
+ size = agg_options[:limit] ? agg_options[:limit] : 1_000
637
+ shared_agg_options = agg_options.slice(:order, :min_doc_count)
638
+
639
+ if agg_options[:ranges]
640
+ payload[:aggs][field] = {
641
+ range: {
642
+ field: agg_options[:field] || field,
643
+ ranges: agg_options[:ranges]
644
+ }.merge(shared_agg_options)
645
+ }
646
+ elsif agg_options[:date_ranges]
647
+ payload[:aggs][field] = {
648
+ date_range: {
649
+ field: agg_options[:field] || field,
650
+ ranges: agg_options[:date_ranges]
651
+ }.merge(shared_agg_options)
652
+ }
653
+ elsif histogram = agg_options[:date_histogram]
654
+ interval = histogram[:interval]
655
+ payload[:aggs][field] = {
656
+ date_histogram: {
657
+ field: histogram[:field],
658
+ interval: interval
659
+ }
660
+ }
661
+ elsif metric = @@metric_aggs.find { |k| agg_options.has_key?(k) }
662
+ payload[:aggs][field] = {
663
+ metric => {
664
+ field: agg_options[metric][:field] || field
665
+ }
666
+ }
667
+ else
668
+ payload[:aggs][field] = {
669
+ terms: {
670
+ field: agg_options[:field] || field,
671
+ size: size
672
+ }.merge(shared_agg_options)
673
+ }
674
+ end
675
+
676
+ where = {}
677
+ where = (options[:where] || {}).reject { |k| k == field } unless options[:smart_aggs] == false
678
+ agg_filters = where_filters(where.merge(agg_options[:where] || {}))
679
+ if agg_filters.any?
680
+ payload[:aggs][field] = {
681
+ filter: {
682
+ bool: {
683
+ must: agg_filters
684
+ }
685
+ },
686
+ aggs: {
687
+ field => payload[:aggs][field]
688
+ }
689
+ }
690
+ end
691
+ end
692
+ end
693
+
694
+ def set_filters(payload, filters)
695
+ if options[:aggs]
696
+ payload[:post_filter] = {
697
+ bool: {
698
+ filter: filters
699
+ }
700
+ }
701
+ else
702
+ # more efficient query if no aggs
703
+ payload[:query] = {
704
+ bool: {
705
+ must: payload[:query],
706
+ filter: filters
707
+ }
708
+ }
709
+ end
710
+ end
711
+
712
+ # TODO id transformation for arrays
713
+ def set_order(payload)
714
+ order = options[:order].is_a?(Enumerable) ? options[:order] : {options[:order] => :asc}
715
+ id_field = below50? ? :_id : :_uid
716
+ payload[:sort] = order.is_a?(Array) ? order : Hash[order.map { |k, v| [k.to_s == "id" ? id_field : k, v] }]
717
+ end
718
+
719
+ def where_filters(where)
720
+ filters = []
721
+ (where || {}).each do |field, value|
722
+ field = :_id if field.to_s == "id"
723
+
724
+ if field == :or
725
+ value.each do |or_clause|
726
+ filters << {bool: {should: or_clause.map { |or_statement| {bool: {filter: where_filters(or_statement)}} }}}
727
+ end
728
+ elsif field == :_or
729
+ filters << {bool: {should: value.map { |or_statement| {bool: {filter: where_filters(or_statement)}} }}}
730
+ elsif field == :_not
731
+ filters << {bool: {must_not: where_filters(value)}}
732
+ elsif field == :_and
733
+ filters << {bool: {must: value.map { |or_statement| {bool: {filter: where_filters(or_statement)}} }}}
734
+ else
735
+ # expand ranges
736
+ if value.is_a?(Range)
737
+ value = {gte: value.first, (value.exclude_end? ? :lt : :lte) => value.last}
738
+ end
739
+
740
+ value = {in: value} if value.is_a?(Array)
741
+
742
+ if value.is_a?(Hash)
743
+ value.each do |op, op_value|
744
+ case op
745
+ when :within, :bottom_right
746
+ # do nothing
747
+ when :near
748
+ filters << {
749
+ geo_distance: {
750
+ field => location_value(op_value),
751
+ distance: value[:within] || "50mi"
752
+ }
753
+ }
754
+ when :geo_polygon
755
+ filters << {
756
+ geo_polygon: {
757
+ field => op_value
758
+ }
759
+ }
760
+ when :geo_shape
761
+ shape = op_value.except(:relation)
762
+ shape[:coordinates] = coordinate_array(shape[:coordinates]) if shape[:coordinates]
763
+ filters << {
764
+ geo_shape: {
765
+ field => {
766
+ relation: op_value[:relation] || "intersects",
767
+ shape: shape
768
+ }
769
+ }
770
+ }
771
+ when :top_left
772
+ filters << {
773
+ geo_bounding_box: {
774
+ field => {
775
+ top_left: location_value(op_value),
776
+ bottom_right: location_value(value[:bottom_right])
777
+ }
778
+ }
779
+ }
780
+ when :regexp # support for regexp queries without using a regexp ruby object
781
+ filters << {regexp: {field => {value: op_value}}}
782
+ when :not # not equal
783
+ filters << {bool: {must_not: term_filters(field, op_value)}}
784
+ when :all
785
+ op_value.each do |val|
786
+ filters << term_filters(field, val)
787
+ end
788
+ when :in
789
+ filters << term_filters(field, op_value)
790
+ else
791
+ range_query =
792
+ case op
793
+ when :gt
794
+ {from: op_value, include_lower: false}
795
+ when :gte
796
+ {from: op_value, include_lower: true}
797
+ when :lt
798
+ {to: op_value, include_upper: false}
799
+ when :lte
800
+ {to: op_value, include_upper: true}
801
+ else
802
+ raise "Unknown where operator: #{op.inspect}"
803
+ end
804
+ # issue 132
805
+ if (existing = filters.find { |f| f[:range] && f[:range][field] })
806
+ existing[:range][field].merge!(range_query)
807
+ else
808
+ filters << {range: {field => range_query}}
809
+ end
810
+ end
811
+ end
812
+ else
813
+ filters << term_filters(field, value)
814
+ end
815
+ end
816
+ end
817
+ filters
818
+ end
819
+
820
+ def term_filters(field, value)
821
+ if value.is_a?(Array) # in query
822
+ if value.any?(&:nil?)
823
+ {bool: {should: [term_filters(field, nil), term_filters(field, value.compact)]}}
824
+ else
825
+ {in: {field => value}}
826
+ end
827
+ elsif value.nil?
828
+ {bool: {must_not: {exists: {field: field}}}}
829
+ elsif value.is_a?(Regexp)
830
+ {regexp: {field => {value: value.source}}}
831
+ else
832
+ {term: {field => value}}
833
+ end
834
+ end
835
+
836
+ def custom_filter(field, value, factor)
837
+ if below50?
838
+ {
839
+ filter: {
840
+ bool: {
841
+ must: where_filters(field => value)
842
+ }
843
+ },
844
+ boost_factor: factor
845
+ }
846
+ else
847
+ {
848
+ filter: where_filters(field => value),
849
+ weight: factor
850
+ }
851
+ end
852
+ end
853
+
854
+ def boost_filters(boost_by, options = {})
855
+ boost_by.map do |field, value|
856
+ log = value.key?(:log) ? value[:log] : options[:log]
857
+ value[:factor] ||= 1
858
+ script_score = {
859
+ field_value_factor: {
860
+ field: field,
861
+ factor: value[:factor].to_f,
862
+ modifier: log ? "ln2p" : nil
863
+ }
864
+ }
865
+
866
+ {
867
+ filter: {
868
+ exists: {
869
+ field: field
870
+ }
871
+ }
872
+ }.merge(script_score)
873
+ end
874
+ end
875
+
876
+ # Recursively descend through nesting of arrays until we reach either a lat/lon object or an array of numbers,
877
+ # eventually returning the same structure with all values transformed to [lon, lat].
878
+ #
879
+ def coordinate_array(value)
880
+ if value.is_a?(Hash)
881
+ [value[:lon], value[:lat]]
882
+ elsif value.is_a?(Array) and !value[0].is_a?(Numeric)
883
+ value.map { |a| coordinate_array(a) }
884
+ else
885
+ value
886
+ end
887
+ end
888
+
889
+ def location_value(value)
890
+ if value.is_a?(Array)
891
+ value.map(&:to_f).reverse
892
+ else
893
+ value
894
+ end
895
+ end
896
+
897
+ def below50?
898
+ Searchkick.server_below?("5.0.0-alpha1")
899
+ end
900
+ end
901
+ end