searchkick 5.5.2 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,10 +17,18 @@ module Searchkick
17
17
  :with_score, :misspellings?, :scroll_id, :clear_scroll, :missing_records, :with_hit
18
18
 
19
19
  def initialize(klass, term = "*", **options)
20
+ if options[:conversions]
21
+ Searchkick.warn("The `conversions` option is deprecated in favor of `conversions_v2`, which provides much better search performance. Upgrade to `conversions_v2` or rename `conversions` to `conversions_v1`")
22
+ end
23
+
24
+ if options.key?(:conversions_v1)
25
+ options[:conversions] = options.delete(:conversions_v1)
26
+ end
27
+
20
28
  unknown_keywords = options.keys - [:aggs, :block, :body, :body_options, :boost,
21
- :boost_by, :boost_by_distance, :boost_by_recency, :boost_where, :conversions, :conversions_term, :debug, :emoji, :exclude, :explain,
29
+ :boost_by, :boost_by_distance, :boost_by_recency, :boost_where, :conversions, :conversions_v2, :conversions_term, :debug, :emoji, :exclude, :explain,
22
30
  :fields, :highlight, :includes, :index_name, :indices_boost, :knn, :limit, :load,
23
- :match, :misspellings, :models, :model_includes, :offset, :operator, :order, :padding, :page, :per_page, :profile,
31
+ :match, :misspellings, :models, :model_includes, :offset, :opaque_id, :operator, :order, :padding, :page, :per_page, :profile,
24
32
  :request_params, :routing, :scope_results, :scroll, :select, :similar, :smart_aggs, :suggest, :total_entries, :track, :type, :where]
25
33
  raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any?
26
34
 
@@ -86,6 +94,7 @@ module Searchkick
86
94
  params[:type] = @type if @type
87
95
  params[:routing] = @routing if @routing
88
96
  params[:scroll] = @scroll if @scroll
97
+ params[:opaque_id] = @opaque_id if @opaque_id
89
98
  params.merge!(options[:request_params]) if options[:request_params]
90
99
  params
91
100
  end
@@ -98,34 +107,13 @@ module Searchkick
98
107
  prepare
99
108
  response = execute_search
100
109
  end
101
- rescue => e # TODO rescue type
110
+ rescue => e
102
111
  handle_error(e)
103
112
  end
104
113
  handle_response(response)
105
114
  end
106
115
  end
107
116
 
108
- def to_curl
109
- query = params
110
- type = query[:type]
111
- index = query[:index].is_a?(Array) ? query[:index].join(",") : query[:index]
112
- request_params = query.except(:index, :type, :body)
113
-
114
- # no easy way to tell which host the client will use
115
- host =
116
- if Searchkick.client.transport.respond_to?(:transport)
117
- Searchkick.client.transport.transport.hosts.first
118
- else
119
- Searchkick.client.transport.hosts.first
120
- end
121
- credentials = host[:user] || host[:password] ? "#{host[:user]}:#{host[:password]}@" : nil
122
- params = ["pretty"]
123
- request_params.each do |k, v|
124
- params << "#{CGI.escape(k.to_s)}=#{CGI.escape(v.to_s)}"
125
- end
126
- "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?#{params.join('&')} -H 'Content-Type: application/json' -d '#{query[:body].to_json}'"
127
- end
128
-
129
117
  def handle_response(response)
130
118
  opts = {
131
119
  page: @page,
@@ -144,15 +132,17 @@ module Searchkick
144
132
  total_entries: options[:total_entries],
145
133
  index_mapping: @index_mapping,
146
134
  suggest: options[:suggest],
147
- scroll: options[:scroll]
135
+ scroll: options[:scroll],
136
+ opaque_id: options[:opaque_id]
148
137
  }
149
138
 
150
139
  if options[:debug]
151
- puts "Searchkick Version: #{Searchkick::VERSION}"
152
- puts "Elasticsearch Version: #{Searchkick.server_version}"
140
+ server = Searchkick.opensearch? ? "OpenSearch" : "Elasticsearch"
141
+ puts "Searchkick #{Searchkick::VERSION}"
142
+ puts "#{server} #{Searchkick.server_version}"
153
143
  puts
154
144
 
155
- puts "Model Searchkick Options"
145
+ puts "Model Options"
156
146
  pp searchkick_options
157
147
  puts
158
148
 
@@ -161,7 +151,7 @@ module Searchkick
161
151
  puts
162
152
 
163
153
  if searchkick_index
164
- puts "Model Search Data"
154
+ puts "Record Data"
165
155
  begin
166
156
  pp klass.limit(3).map { |r| RecordData.new(searchkick_index, r).index_data }
167
157
  rescue => e
@@ -169,21 +159,21 @@ module Searchkick
169
159
  end
170
160
  puts
171
161
 
172
- puts "Elasticsearch Mapping"
162
+ puts "Mapping"
173
163
  puts JSON.pretty_generate(searchkick_index.mapping)
174
164
  puts
175
165
 
176
- puts "Elasticsearch Settings"
166
+ puts "Settings"
177
167
  puts JSON.pretty_generate(searchkick_index.settings)
178
168
  puts
179
169
  end
180
170
 
181
- puts "Elasticsearch Query"
182
- puts to_curl
171
+ puts "Query"
172
+ puts JSON.pretty_generate(params[:body])
183
173
  puts
184
174
 
185
- puts "Elasticsearch Results"
186
- puts JSON.pretty_generate(response)
175
+ puts "Results"
176
+ puts JSON.pretty_generate(response.to_h)
187
177
  end
188
178
 
189
179
  # set execute for multi search
@@ -257,6 +247,7 @@ module Searchkick
257
247
  padding = [options[:padding].to_i, 0].max
258
248
  offset = (options[:offset] || (page - 1) * per_page + padding).to_i
259
249
  scroll = options[:scroll]
250
+ opaque_id = options[:opaque_id]
260
251
 
261
252
  max_result_window = searchkick_options[:max_result_window]
262
253
  original_per_page = per_page
@@ -374,7 +365,6 @@ module Searchkick
374
365
  field_misspellings = misspellings && (!misspellings_fields || misspellings_fields.include?(base_field(field)))
375
366
 
376
367
  if field == "_all" || field.end_with?(".analyzed")
377
- shared_options[:cutoff_frequency] = 0.001 unless operator.to_s == "and" || field_misspellings == false || (!below73? && !track_total_hits?) || match_type == :match_phrase || !below80? || Searchkick.opensearch?
378
368
  qs << shared_options.merge(analyzer: "searchkick_search")
379
369
 
380
370
  # searchkick_search and searchkick_search2 are the same for some languages
@@ -442,6 +432,7 @@ module Searchkick
442
432
  }
443
433
 
444
434
  should.concat(set_conversions)
435
+ should.concat(set_conversions_v2)
445
436
  end
446
437
 
447
438
  query = payload
@@ -457,20 +448,14 @@ module Searchkick
457
448
 
458
449
  models = Array(options[:models])
459
450
  if models.any? { |m| m != m.searchkick_klass }
460
- # aliases are not supported with _index in ES below 7.5
461
- # see https://github.com/elastic/elasticsearch/pull/46640
462
- if below75?
463
- Searchkick.warn("Passing child models to models option throws off hits and pagination - use type option instead")
464
- else
465
- index_type_or =
466
- models.map do |m|
467
- v = {_index: m.searchkick_index.name}
468
- v[:type] = m.searchkick_index.klass_document_type(m, true) if m != m.searchkick_klass
469
- v
470
- end
451
+ index_type_or =
452
+ models.map do |m|
453
+ v = {_index: m.searchkick_index.name}
454
+ v[:type] = m.searchkick_index.klass_document_type(m, true) if m != m.searchkick_klass
455
+ v
456
+ end
471
457
 
472
- where[:or] = Array(where[:or]) + [index_type_or]
473
- end
458
+ where[:or] = Array(where[:or]) + [index_type_or]
474
459
  end
475
460
 
476
461
  # start everything as efficient filters
@@ -554,7 +539,7 @@ module Searchkick
554
539
  # run block
555
540
  options[:block].call(payload) if options[:block]
556
541
 
557
- # scroll optimization when interating over all docs
542
+ # scroll optimization when iterating over all docs
558
543
  # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-scroll.html
559
544
  if options[:scroll] && payload[:query] == {match_all: {}}
560
545
  payload[:sort] ||= ["_doc"]
@@ -566,6 +551,7 @@ module Searchkick
566
551
  @padding = padding
567
552
  @load = load
568
553
  @scroll = scroll
554
+ @opaque_id = opaque_id
569
555
  end
570
556
 
571
557
  def set_fields
@@ -656,6 +642,43 @@ module Searchkick
656
642
  end
657
643
  end
658
644
 
645
+ def set_conversions_v2
646
+ conversions_v2 = options[:conversions_v2]
647
+ return [] if conversions_v2.nil? && !searchkick_options[:conversions_v2]
648
+ return [] if conversions_v2 == false
649
+
650
+ # disable if searchkick_options[:conversions] to make it easy to upgrade without downtime
651
+ return [] if conversions_v2.nil? && searchkick_options[:conversions]
652
+
653
+ unless conversions_v2.is_a?(Hash)
654
+ conversions_v2 = {field: conversions_v2}
655
+ end
656
+
657
+ conversions_fields =
658
+ case conversions_v2[:field]
659
+ when true, nil
660
+ Array(searchkick_options[:conversions_v2]).map(&:to_s)
661
+ else
662
+ [conversions_v2[:field].to_s]
663
+ end
664
+
665
+ conversions_term = (conversions_v2[:term] || options[:conversions_term] || term).to_s
666
+ unless searchkick_options[:case_sensitive]
667
+ conversions_term = conversions_term.downcase
668
+ end
669
+ conversions_term = conversions_term.gsub(".", "*")
670
+
671
+ conversions_fields.map do |conversions_field|
672
+ {
673
+ rank_feature: {
674
+ field: "#{conversions_field}.#{conversions_term}",
675
+ linear: {},
676
+ boost: conversions_v2[:factor] || 1
677
+ }
678
+ }
679
+ end
680
+ end
681
+
659
682
  def set_exclude(field, analyzer)
660
683
  Array(options[:exclude]).map do |phrase|
661
684
  {
@@ -712,7 +735,7 @@ module Searchkick
712
735
  if boost_by.is_a?(Array)
713
736
  boost_by = boost_by.to_h { |f| [f, {factor: 1}] }
714
737
  elsif boost_by.is_a?(Hash)
715
- multiply_by, boost_by = boost_by.partition { |_, v| v.delete(:boost_mode) == "multiply" }.map(&:to_h)
738
+ multiply_by, boost_by = boost_by.transform_values(&:dup).partition { |_, v| v.delete(:boost_mode) == "multiply" }.map(&:to_h)
716
739
  end
717
740
  boost_by[options[:boost]] = {factor: 1} if options[:boost]
718
741
 
@@ -935,11 +958,11 @@ module Searchkick
935
958
  space_type: space_type
936
959
  }
937
960
  },
938
- boost: distance == "cosine" && Searchkick.server_below?("2.19.0", true) ? 0.5 : 1.0
961
+ boost: distance == "cosine" && Searchkick.server_below?("2.19.0") ? 0.5 : 1.0
939
962
  }
940
963
  }
941
964
  else
942
- if ef_search && Searchkick.server_below?("2.16.0", true)
965
+ if ef_search && Searchkick.server_below?("2.16.0")
943
966
  raise Error, "ef_search requires OpenSearch 2.16+"
944
967
  end
945
968
 
@@ -956,7 +979,7 @@ module Searchkick
956
979
  else
957
980
  if exact
958
981
  # prevent incorrect distances/results with Elasticsearch 9.0.0-rc1
959
- if !below90? && field_options[:distance] == "cosine" && distance != "cosine"
982
+ if !Searchkick.server_below?("9.0.0") && field_options[:distance] == "cosine" && distance != "cosine"
960
983
  raise ArgumentError, "distance must match searchkick options"
961
984
  end
962
985
 
@@ -1113,11 +1136,7 @@ module Searchkick
1113
1136
  regex = regex.gsub(/(?<!\\)%/, ".*").gsub(/(?<!\\)_/, ".").gsub("\\%", "%").gsub("\\_", "_")
1114
1137
 
1115
1138
  if op == :ilike
1116
- if below710?
1117
- raise ArgumentError, "ilike requires Elasticsearch 7.10+"
1118
- else
1119
- filters << {regexp: {field => {value: regex, flags: "NONE", case_insensitive: true}}}
1120
- end
1139
+ filters << {regexp: {field => {value: regex, flags: "NONE", case_insensitive: true}}}
1121
1140
  else
1122
1141
  filters << {regexp: {field => {value: regex, flags: "NONE"}}}
1123
1142
  end
@@ -1134,27 +1153,25 @@ module Searchkick
1134
1153
  when :in
1135
1154
  filters << term_filters(field, op_value)
1136
1155
  when :exists
1137
- # TODO add support for false in Searchkick 6
1138
- if op_value != true
1139
- # TODO raise error in Searchkick 6
1140
- Searchkick.warn("Passing a value other than true to exists is not supported")
1156
+ case op_value
1157
+ when true
1158
+ filters << {exists: {field: field}}
1159
+ when false
1160
+ filters << {bool: {must_not: {exists: {field: field}}}}
1161
+ else
1162
+ raise ArgumentError, "Passing a value other than true or false to exists is not supported"
1141
1163
  end
1142
- filters << {exists: {field: field}}
1143
1164
  else
1144
1165
  range_query =
1145
1166
  case op
1146
1167
  when :gt
1147
- # TODO always use gt in Searchkick 6
1148
- below90? ? {from: op_value, include_lower: false} : {gt: op_value}
1168
+ {gt: op_value}
1149
1169
  when :gte
1150
- # TODO always use gte in Searchkick 6
1151
- below90? ? {from: op_value, include_lower: true} : {gte: op_value}
1170
+ {gte: op_value}
1152
1171
  when :lt
1153
- # TODO always use lt in Searchkick 6
1154
- below90? ? {to: op_value, include_upper: false} : {lt: op_value}
1172
+ {lt: op_value}
1155
1173
  when :lte
1156
- # TODO always use lte in Searchkick 6
1157
- below90? ? {to: op_value, include_upper: true} : {lte: op_value}
1174
+ {lte: op_value}
1158
1175
  else
1159
1176
  raise ArgumentError, "Unknown where operator: #{op.inspect}"
1160
1177
  end
@@ -1201,14 +1218,7 @@ module Searchkick
1201
1218
  source = "#{source}.*"
1202
1219
  end
1203
1220
 
1204
- if below710?
1205
- if value.casefold?
1206
- raise ArgumentError, "Case-insensitive flag does not work with Elasticsearch < 7.10"
1207
- end
1208
- {regexp: {field => {value: source, flags: "NONE"}}}
1209
- else
1210
- {regexp: {field => {value: source, flags: "NONE", case_insensitive: value.casefold?}}}
1211
- end
1221
+ {regexp: {field => {value: source, flags: "NONE", case_insensitive: value.casefold?}}}
1212
1222
  else
1213
1223
  # TODO add this for other values
1214
1224
  if value.as_json.is_a?(Enumerable)
@@ -1299,25 +1309,5 @@ module Searchkick
1299
1309
  def body_options
1300
1310
  options[:body_options] || {}
1301
1311
  end
1302
-
1303
- def below73?
1304
- Searchkick.server_below?("7.3.0")
1305
- end
1306
-
1307
- def below75?
1308
- Searchkick.server_below?("7.5.0")
1309
- end
1310
-
1311
- def below710?
1312
- Searchkick.server_below?("7.10.0")
1313
- end
1314
-
1315
- def below80?
1316
- Searchkick.server_below?("8.0.0")
1317
- end
1318
-
1319
- def below90?
1320
- Searchkick.server_below?("9.0.0")
1321
- end
1322
1312
  end
1323
1313
  end
@@ -58,6 +58,25 @@ module Searchkick
58
58
  end
59
59
  end
60
60
 
61
+ index.conversions_v2_fields.each do |conversions_field|
62
+ key = source.key?(conversions_field) ? conversions_field : conversions_field.to_sym
63
+ if !partial_reindex || source[key]
64
+ if index.options[:case_sensitive]
65
+ source[key] =
66
+ (source[key] || {}).reduce(Hash.new(0)) do |memo, (k, v)|
67
+ memo[k.to_s.gsub(".", "*")] += v
68
+ memo
69
+ end
70
+ else
71
+ source[key] =
72
+ (source[key] || {}).reduce(Hash.new(0)) do |memo, (k, v)|
73
+ memo[k.to_s.downcase.gsub(".", "*")] += v
74
+ memo
75
+ end
76
+ end
77
+ end
78
+ end
79
+
61
80
  # hack to prevent generator field doesn't exist error
62
81
  if !partial_reindex
63
82
  index.suggest_fields.each do |field|
@@ -6,7 +6,7 @@ module Searchkick
6
6
  @index = index
7
7
  end
8
8
 
9
- def reindex(records, mode:, method_name:, full: false, single: false)
9
+ def reindex(records, mode:, method_name:, ignore_missing:, full: false, single: false, job_options: nil)
10
10
  # prevents exists? check if records is a relation
11
11
  records = records.to_a
12
12
  return if records.empty?
@@ -17,6 +17,14 @@ module Searchkick
17
17
  raise Error, "Active Job not found"
18
18
  end
19
19
 
20
+ job_options ||= {}
21
+
22
+ # only add if set for backwards compatibility
23
+ extra_options = {}
24
+ if ignore_missing
25
+ extra_options[:ignore_missing] = ignore_missing
26
+ end
27
+
20
28
  # we could likely combine ReindexV2Job, BulkReindexJob, and ProcessBatchJob
21
29
  # but keep them separate for now
22
30
  if single
@@ -28,19 +36,21 @@ module Searchkick
28
36
  routing = record.search_routing
29
37
  end
30
38
 
31
- Searchkick::ReindexV2Job.perform_later(
39
+ Searchkick::ReindexV2Job.set(**job_options).perform_later(
32
40
  record.class.name,
33
41
  record.id.to_s,
34
42
  method_name ? method_name.to_s : nil,
35
43
  routing: routing,
36
- index_name: index.name
44
+ index_name: index.name,
45
+ **extra_options
37
46
  )
38
47
  else
39
- Searchkick::BulkReindexJob.perform_later(
48
+ Searchkick::BulkReindexJob.set(**job_options).perform_later(
40
49
  class_name: records.first.class.searchkick_options[:class_name],
41
50
  record_ids: records.map { |r| r.id.to_s },
42
51
  index_name: index.name,
43
- method_name: method_name ? method_name.to_s : nil
52
+ method_name: method_name ? method_name.to_s : nil,
53
+ **extra_options
44
54
  )
45
55
  end
46
56
  when :queue
@@ -51,7 +61,7 @@ module Searchkick
51
61
  index.reindex_queue.push_records(records)
52
62
  when true, :inline
53
63
  index_records, other_records = records.partition { |r| index_record?(r) }
54
- import_inline(index_records, !full ? other_records : [], method_name: method_name, single: single)
64
+ import_inline(index_records, !full ? other_records : [], method_name: method_name, ignore_missing: ignore_missing, single: single)
55
65
  else
56
66
  raise ArgumentError, "Invalid value for mode"
57
67
  end
@@ -60,7 +70,7 @@ module Searchkick
60
70
  true
61
71
  end
62
72
 
63
- def reindex_items(klass, items, method_name:, single: false)
73
+ def reindex_items(klass, items, method_name:, ignore_missing:, single: false)
64
74
  routing = items.to_h { |r| [r[:id], r[:routing]] }
65
75
  record_ids = routing.keys
66
76
 
@@ -76,7 +86,7 @@ module Searchkick
76
86
  construct_record(klass, id, routing[id])
77
87
  end
78
88
 
79
- import_inline(records, delete_records, method_name: method_name, single: single)
89
+ import_inline(records, delete_records, method_name: method_name, ignore_missing: ignore_missing, single: single)
80
90
  end
81
91
 
82
92
  private
@@ -86,13 +96,13 @@ module Searchkick
86
96
  end
87
97
 
88
98
  # import in single request with retries
89
- def import_inline(index_records, delete_records, method_name:, single:)
99
+ def import_inline(index_records, delete_records, method_name:, ignore_missing:, single:)
90
100
  return if index_records.empty? && delete_records.empty?
91
101
 
92
102
  maybe_bulk(index_records, delete_records, method_name, single) do
93
103
  if index_records.any?
94
104
  if method_name
95
- index.bulk_update(index_records, method_name)
105
+ index.bulk_update(index_records, method_name, ignore_missing: ignore_missing)
96
106
  else
97
107
  index.bulk_index(index_records)
98
108
  end
@@ -33,17 +33,7 @@ module Searchkick
33
33
 
34
34
  # TODO use reliable queuing
35
35
  def reserve(limit: 1000)
36
- if supports_rpop_with_count?
37
- Searchkick.with_redis { |r| r.call("RPOP", redis_key, limit) }.to_a
38
- else
39
- record_ids = []
40
- Searchkick.with_redis do |r|
41
- while record_ids.size < limit && (record_id = r.call("RPOP", redis_key))
42
- record_ids << record_id
43
- end
44
- end
45
- record_ids
46
- end
36
+ Searchkick.with_redis { |r| r.call("RPOP", redis_key, limit) }.to_a
47
37
  end
48
38
 
49
39
  def clear
@@ -60,19 +50,6 @@ module Searchkick
60
50
  "searchkick:reindex_queue:#{name}"
61
51
  end
62
52
 
63
- def supports_rpop_with_count?
64
- redis_version >= Gem::Version.new("6.2")
65
- end
66
-
67
- def redis_version
68
- @redis_version ||=
69
- Searchkick.with_redis do |r|
70
- info = r.call("INFO")
71
- matches = /redis_version:(\S+)/.match(info)
72
- Gem::Version.new(matches[1])
73
- end
74
- end
75
-
76
53
  def escape(value)
77
54
  value.to_s.gsub("|", "||")
78
55
  end
@@ -1,8 +1,8 @@
1
1
  module Searchkick
2
- class ReindexV2Job < ActiveJob::Base
2
+ class ReindexV2Job < Searchkick.parent_job.constantize
3
3
  queue_as { Searchkick.queue_name }
4
4
 
5
- def perform(class_name, id, method_name = nil, routing: nil, index_name: nil)
5
+ def perform(class_name, id, method_name = nil, routing: nil, index_name: nil, ignore_missing: nil)
6
6
  model = Searchkick.load_model(class_name, allow_child: true)
7
7
  index = model.searchkick_index(name: index_name)
8
8
  # use should_index? to decide whether to index (not default scope)
@@ -11,7 +11,7 @@ module Searchkick
11
11
  # but keep for now for backwards compatibility
12
12
  model = model.unscoped if model.respond_to?(:unscoped)
13
13
  items = [{id: id, routing: routing}]
14
- RecordIndexer.new(index).reindex_items(model, items, method_name: method_name, single: true)
14
+ RecordIndexer.new(index).reindex_items(model, items, method_name: method_name, ignore_missing: ignore_missing, single: true)
15
15
  end
16
16
  end
17
17
  end