chewy 8.1.0 → 8.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 702dbe73f6b523156348491049ed540490d755d66c3242a2422c4da92090346c
4
- data.tar.gz: 730beaf500e9d4feeafc5033bd028abc67662f9ae3299a95aa5d3f323f6ee011
3
+ metadata.gz: 4797e925ff275cb6b742ef6dd7be3a03d6033f73c93453305d08383c11252c00
4
+ data.tar.gz: 46b165cfeda3df1558221bc668a7749b1c98ec8a937e4956f710dfbe93934cb1
5
5
  SHA512:
6
- metadata.gz: 87105fad14e8959fabbad411e3b22af813b6261c0cff1a45f14205819e5564f03d936acf6fcd909ac152d98325c2619628c3732fa1eb628d6edb42a5b9670a95
7
- data.tar.gz: b9f693b6e4180e9c2023456439e1de74c86dff2a453f50be369c50f30f7b419eab75173f2071707cf922457dc69fb9fc26cf0e82ad79034e3eea79212091a791
6
+ metadata.gz: 6451084c18ae23ee908f0ba6c00cffa2cc5abcec29ecc4c911f7e8340081f4b0b89c66d91f2c9dda988c412bcd5a995705ff219d00147c4d52209749f375ff84
7
+ data.tar.gz: b2dd7dda7bae223e450165ff35be42ea814b57d8f06cd0546970a0aed3bed113f290ccc233724248a0cdce3af7b9ef8ac60d2e0e155b1dc6f19376375ccb1990
data/CHANGELOG.md CHANGED
@@ -8,6 +8,23 @@
8
8
 
9
9
  ### Changes
10
10
 
11
+ ## 8.2.0 (2026-05-29)
12
+
13
+ ### New Features
14
+
15
+ * Add `progressbar:` option to `import`/`import!` and a `PROGRESS=1` rake env toggle for `chewy:reset` / `chewy:update`. The bar is opt-in (default `false`), supports a `:unbounded` spinner mode that skips the `import_count` query, and is safe in parallel mode — workers stay process-based, the bar is incremented in the parent via `Parallel`'s `finish:` callback. Reintroduces the feature originally added in [#787](https://github.com/toptal/chewy/pull/787) and reverted in [#800](https://github.com/toptal/chewy/pull/800) without the GVL regression.
16
+
17
+ ### Bug Fixes
18
+
19
+ * Fix race condition during `reset!` where the unsuffixed concrete index could be recreated by a concurrent process between the delete and the alias creation, causing the alias creation to fail with an index/alias name collision.
20
+ The delete + alias-add are now performed in a single atomic `_aliases` cluster state update via the `remove_index` action.
21
+ No public API or layout change.
22
+ * [#992](https://github.com/toptal/chewy/issues/992): `import(update_fields: [])` is now a no-op (zero fields to update) instead of triggering a full document reindex. The default of `update_fields` is now `nil` (still performs a full reindex). Behavior change: any caller passing an explicit empty array previously got a silent full reimport; they will now skip the update.
23
+
24
+ ### Changes
25
+
26
+ * [#1024](https://github.com/toptal/chewy/pull/1024): Replace deprecated `ZRANGEBYSCORE` with `ZRANGE ... BYSCORE` in the `delayed_sidekiq` worker Lua script. `ZRANGEBYSCORE` has been deprecated in Redis since 6.2.0.
27
+
11
28
  ## 8.1.0 (2026-05-28)
12
29
 
13
30
  ### New Features
@@ -7,12 +7,13 @@ module Chewy
7
7
  extend ActiveSupport::Concern
8
8
 
9
9
  module ClassMethods
10
- # Checks index existance. Returns true or false
10
+ # Checks index existance. Supports suffixes. Returns true or false
11
11
  #
12
12
  # UsersIndex.exists? #=> true
13
+ # UsersIndex.exists?('11-2024') #=> false
13
14
  #
14
- def exists?
15
- client.indices.exists(index: index_name)
15
+ def exists?(suffix = nil)
16
+ client.indices.exists(index: index_name(suffix: suffix))
16
17
  end
17
18
 
18
19
  # Creates index and applies mappings and settings.
@@ -163,16 +164,17 @@ module Chewy
163
164
  ))
164
165
  original_index_settings suffixed_name
165
166
 
166
- delete if indexes.blank?
167
- client.indices.update_aliases body: {actions: [
168
- *indexes.map do |index|
169
- {remove: {index: index, alias: general_name}}
170
- end,
171
- {add: {index: suffixed_name, alias: general_name}}
172
- ]}
167
+ actions = indexes.map { |index| {remove: {index: index, alias: general_name}} }
168
+ actions << {add: {index: suffixed_name, alias: general_name}}
169
+ if indexes.blank? && exists?
170
+ index_names = client.indices.get_alias(index: general_name).keys
171
+ actions << {remove_index: {index: index_names.join(',')}}
172
+ end
173
+
174
+ client.indices.update_aliases body: {actions: actions}
173
175
  client.indices.delete index: indexes if indexes.present?
174
176
 
175
- self.journal.apply(start_time, **import_options) if apply_journal
177
+ self.journal.apply(start_time, **import_options.except(:progressbar)) if apply_journal
176
178
  result
177
179
  else
178
180
  purge!
@@ -146,6 +146,23 @@ module Chewy
146
146
  collection.each_slice(options[:batch_size], &block)
147
147
  end
148
148
 
149
+ # Returns the count of objects that would be imported. Used by the
150
+ # progressbar feature to set the total. Mirrors {#import_args} input
151
+ # handling but does not enumerate batches.
152
+ #
153
+ # @return [Integer]
154
+ def import_count(*args)
155
+ args = args.dup
156
+ args.extract_options!
157
+ collection = if args.empty? && @target.respond_to?(import_all_method)
158
+ @target.send(import_all_method)
159
+ else
160
+ args.flatten(1).compact
161
+ end
162
+
163
+ collection.count
164
+ end
165
+
149
166
  # This method is used internally by the request DSL when the
150
167
  # collection of ORM/ODM objects is requested.
151
168
  #
@@ -96,6 +96,26 @@ module Chewy
96
96
  end
97
97
  alias_method :import_references, :import_fields
98
98
 
99
+ # Returns the count of records that would be imported. Used by the
100
+ # progressbar feature to set the total. Accepts the same shapes as
101
+ # {#import}: nothing (uses default scope), a relation, or an array
102
+ # of ids/objects.
103
+ #
104
+ # @return [Integer]
105
+ def import_count(*args)
106
+ args = args.dup
107
+ args.extract_options!
108
+ collection = if args.empty?
109
+ default_scope
110
+ elsif args.first.is_a?(relation_class)
111
+ args.first
112
+ else
113
+ args.flatten.compact
114
+ end
115
+
116
+ collection.count
117
+ end
118
+
99
119
  def load(ids, **options)
100
120
  scope = all_scope_where_ids_in(ids)
101
121
  additional_scope = options[options[:_index].to_sym].try(:[], :scope) || options[:scope]
@@ -13,11 +13,11 @@ module Chewy
13
13
  # @param to_index [Array<Object>] objects to index
14
14
  # @param delete [Array<Object>] objects or ids to delete
15
15
  # @param fields [Array<Symbol, String>] and array of fields for documents update
16
- def initialize(index, to_index: [], delete: [], fields: [], context: {})
16
+ def initialize(index, to_index: [], delete: [], fields: nil, context: {})
17
17
  @index = index
18
18
  @to_index = to_index
19
19
  @delete = delete
20
- @fields = fields.map!(&:to_sym)
20
+ @fields = fields&.map(&:to_sym)
21
21
  @context = context
22
22
  end
23
23
 
@@ -47,6 +47,8 @@ module Chewy
47
47
  end
48
48
 
49
49
  def index_entry(object)
50
+ return [] if @fields&.empty?
51
+
50
52
  entry = {}
51
53
  entry[:_id] = index_object_ids[object] if index_object_ids[object]
52
54
  entry[:routing] = routing(object) if join_field?
@@ -264,7 +266,7 @@ module Chewy
264
266
  def parent_changed?(data, old_parent)
265
267
  return false unless old_parent
266
268
  return false unless join_field?
267
- return false unless @fields.include?(join_field.to_sym)
269
+ return false unless @fields&.include?(join_field.to_sym)
268
270
  return false unless data.key?(join_field)
269
271
 
270
272
  # The join field value can be a hash, e.g.:
@@ -0,0 +1,79 @@
1
+ module Chewy
2
+ class Index
3
+ module Import
4
+ # Thin wrapper around `ruby-progressbar` for import feedback.
5
+ #
6
+ # Unlike the original PR #787 implementation, this wrapper is only
7
+ # touched from the parent process: serial imports increment it directly,
8
+ # and parallel imports increment it via `Parallel`'s `finish:` callback
9
+ # (which runs in the parent under an internal mutex). The workers stay
10
+ # process-based, so there is no GVL contention as in PR #787 / #800.
11
+ #
12
+ # `Progressbar.build` returns a NULL object when the feature is disabled,
13
+ # so call sites do not need feature guards.
14
+ class Progressbar
15
+ NULL = Object.new
16
+ class << NULL
17
+ def increment(_); end
18
+ def total=(_); end
19
+ def finish; end
20
+ end
21
+
22
+ BOUNDED_FORMAT = '%t |%B| %p%% %c/%C %e'.freeze
23
+ UNBOUNDED_FORMAT = '%t %c (%a)'.freeze
24
+ TITLE = 'Importing'.freeze
25
+
26
+ # @param enabled [Boolean, :unbounded] feature flag. `:unbounded` shows
27
+ # a spinner with no total (skip `import_count`).
28
+ # @param total [Integer, nil] expected total; ignored when `:unbounded`.
29
+ # @return [Progressbar, NULL]
30
+ def self.build(enabled, total)
31
+ return NULL unless enabled
32
+
33
+ unless '::ProgressBar'.safe_constantize
34
+ raise 'The `ruby-progressbar` gem is required for import progress, ' \
35
+ "please add `gem 'ruby-progressbar'` to your Gemfile"
36
+ end
37
+
38
+ return new if enabled == :unbounded
39
+
40
+ new(normalize_total(total))
41
+ end
42
+
43
+ # Some ActiveRecord scopes (e.g., `.group(...)`) make `.count` return a
44
+ # Hash rather than an Integer. Coerce so we still get a usable total.
45
+ def self.normalize_total(total)
46
+ case total
47
+ when Hash then total.values.sum
48
+ when Integer then total
49
+ end
50
+ end
51
+
52
+ attr_reader :bar
53
+
54
+ def initialize(total = nil)
55
+ format = total ? BOUNDED_FORMAT : UNBOUNDED_FORMAT
56
+ @bar = ::ProgressBar.create(title: TITLE, total: total, format: format)
57
+ end
58
+
59
+ # Clamps to total when bounded — action_objects may include :delete
60
+ # entries (parent-child re-indexing, delete_if scope) that aren't
61
+ # counted by `adapter.import_count`, which would otherwise raise
62
+ # ProgressBar::InvalidProgressError.
63
+ def increment(by)
64
+ target = bar.progress + by
65
+ target = [bar.total, target].min if bar.total
66
+ bar.progress = target
67
+ end
68
+
69
+ def total=(value)
70
+ bar.total = value
71
+ end
72
+
73
+ def finish
74
+ bar.finish unless bar.finished?
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
@@ -31,7 +31,7 @@ module Chewy
31
31
 
32
32
  DEFAULT_OPTIONS = {
33
33
  refresh: true,
34
- update_fields: [],
34
+ update_fields: nil,
35
35
  update_failover: true,
36
36
  batch_size: Chewy::Index::Adapter::Base::BATCH_SIZE
37
37
  }.freeze
@@ -2,6 +2,7 @@ require 'chewy/index/import/journal_builder'
2
2
  require 'chewy/index/import/bulk_builder'
3
3
  require 'chewy/index/import/bulk_request'
4
4
  require 'chewy/index/import/routine'
5
+ require 'chewy/index/import/progressbar'
5
6
 
6
7
  module Chewy
7
8
  class Index
@@ -11,20 +12,22 @@ module Chewy
11
12
  IMPORT_WORKER = lambda do |index, options, total, ids, iteration|
12
13
  ::Process.setproctitle("chewy [#{index}]: import data (#{iteration + 1}/#{total})")
13
14
  routine = Routine.new(index, **options)
15
+ processed = 0
14
16
  index.adapter.import(*ids, routine.options) do |action_objects|
15
17
  routine.process(**action_objects)
18
+ processed += action_objects.sum { |_, v| v.size }
16
19
  end
17
- {errors: routine.errors, import: routine.stats, leftovers: routine.leftovers}
20
+ {errors: routine.errors, import: routine.stats, leftovers: routine.leftovers, processed: processed}
18
21
  end
19
22
 
20
23
  LEFTOVERS_WORKER = lambda do |index, options, total, body, iteration|
21
24
  ::Process.setproctitle("chewy [#{index}]: import leftovers (#{iteration + 1}/#{total})")
22
25
  routine = Routine.new(index, **options)
23
26
  routine.perform_bulk(body)
24
- routine.errors
27
+ {errors: routine.errors}
25
28
  end
26
29
 
27
- module ClassMethods
30
+ module ClassMethods # rubocop:disable Metrics/ModuleLength
28
31
  # @!method import(*collection, **options)
29
32
  # Basically, one of the main methods for an index. Performs any objects import
30
33
  # to the index. Does all the objects handling routines.
@@ -68,9 +71,15 @@ module Chewy
68
71
  # @option options [Integer] batch_size passed to the adapter import method, used to split imported objects in chunks, 1000 by default
69
72
  # @option options [Boolean] direct_import skips object reloading in ORM adapter, `false` by default
70
73
  # @option options [true, false] journal enables imported objects journaling, false by default
71
- # @option options [Array<Symbol, String>] update_fields list of fields for the partial import, empty by default
74
+ # @option options [Array<Symbol, String>] update_fields list of fields for partial import. `nil` (default) triggers full document reindex;
75
+ # an empty array (`[]`) is an explicit no-op (no fields updated).
72
76
  # @option options [true, false] update_failover enables full objects reimport in cases of partial update errors, `true` by default
73
77
  # @option options [true, Integer, Hash] parallel enables parallel import processing with the Parallel gem, accepts the number of workers or any Parallel gem acceptable options
78
+ # @option options [true, false, :unbounded] progressbar shows an import progressbar
79
+ # on stderr. `true` precomputes the total via `adapter.import_count` (one extra
80
+ # count query); `:unbounded` shows a spinner without computing the total. Default
81
+ # `false`. Safe in parallel mode: the bar is incremented in the parent process via
82
+ # `Parallel`'s `finish:` callback, workers stay process-based.
74
83
  # @return [true, false] false in case of errors
75
84
  def import(*args)
76
85
  intercept_import_using_strategy(*args).blank?
@@ -175,46 +184,91 @@ module Chewy
175
184
  end
176
185
 
177
186
  def import_linear(objects, routine)
187
+ bar = build_progressbar(routine, objects)
178
188
  ActiveSupport::Notifications.instrument 'import_objects.chewy', index: self do |payload|
179
189
  adapter.import(*objects, routine.options) do |action_objects|
180
190
  routine.process(**action_objects)
191
+ bar.increment(action_objects.sum { |_, v| v.size })
181
192
  end
182
193
  routine.perform_bulk(routine.leftovers)
183
194
  payload[:import] = routine.stats
184
195
  payload[:errors] = payload_errors(routine.errors) if routine.errors.present?
185
196
  payload[:errors]
186
197
  end
198
+ ensure
199
+ bar&.finish
187
200
  end
188
201
 
189
202
  def import_parallel(objects, routine)
190
203
  raise "The `parallel` gem is required for parallel import, please add `gem 'parallel'` to your Gemfile" unless '::Parallel'.safe_constantize
191
204
 
205
+ bar = build_progressbar(routine, objects)
192
206
  ActiveSupport::Notifications.instrument 'import_objects.chewy', index: self do |payload|
193
207
  batches = adapter.import_references(*objects, routine.options.slice(:batch_size)).to_a
194
208
 
195
209
  ::ActiveRecord::Base.connection.close if defined?(::ActiveRecord::Base)
196
210
  results = ::Parallel.map_with_index(
197
211
  batches,
198
- routine.parallel_options,
212
+ parallel_options_with_progress(routine.parallel_options, bar),
199
213
  &IMPORT_WORKER.curry[self, routine.options, batches.size]
200
214
  )
201
215
  ::ActiveRecord::Base.connection.reconnect! if defined?(::ActiveRecord::Base)
202
216
  errors, import, leftovers = process_parallel_import_results(results)
203
-
204
- if leftovers.present?
205
- batches = leftovers.each_slice(routine.options[:batch_size])
206
- results = ::Parallel.map_with_index(
207
- batches,
208
- routine.parallel_options,
209
- &LEFTOVERS_WORKER.curry[self, routine.options, batches.size]
210
- )
211
- errors.concat(results.flatten(1))
212
- end
217
+ errors.concat(process_parallel_leftovers(leftovers, routine)) if leftovers.present?
213
218
 
214
219
  payload[:import] = import
215
220
  payload[:errors] = payload_errors(errors) if errors.present?
216
221
  payload[:errors]
217
222
  end
223
+ ensure
224
+ bar&.finish
225
+ end
226
+
227
+ def process_parallel_leftovers(leftovers, routine)
228
+ batches = leftovers.each_slice(routine.options[:batch_size]).to_a
229
+ results = ::Parallel.map_with_index(
230
+ batches,
231
+ routine.parallel_options,
232
+ &LEFTOVERS_WORKER.curry[self, routine.options, batches.size]
233
+ )
234
+ results.flat_map { |r| r[:errors] }
235
+ end
236
+
237
+ # Builds Parallel options with a `finish:` callback that increments the
238
+ # progressbar after each worker batch returns. The callback runs in the
239
+ # parent (main) thread under Parallel's internal mutex (parallel-1.x),
240
+ # so workers stay process-based and there is no worker-side
241
+ # synchronization — the regression that triggered the PR #800 revert.
242
+ #
243
+ # If the caller already supplied a `finish:` callback in
244
+ # `parallel_options`, both run; user callback first, then the bar.
245
+ def parallel_options_with_progress(parallel_options, bar)
246
+ user_finish = parallel_options[:finish]
247
+ progress = lambda do |item, i, result|
248
+ user_finish&.call(item, i, result)
249
+ bar.increment(result[:processed]) if result.is_a?(Hash) && result[:processed]
250
+ end
251
+ parallel_options.merge(finish: progress)
252
+ end
253
+
254
+ def build_progressbar(routine, objects)
255
+ enabled = routine.options[:progressbar]
256
+ total = enabled == true ? safe_import_count(objects) : nil
257
+ Progressbar.build(enabled, total)
258
+ end
259
+
260
+ # Returns nil when the adapter cannot or should not be counted:
261
+ # missing `import_count` on a custom adapter, a grouped scope that
262
+ # raises, or any unexpected count failure. A nil total makes
263
+ # `Progressbar.new` render a spinner instead of a bounded bar —
264
+ # avoids aborting the import just because the progressbar can't
265
+ # size itself.
266
+ def safe_import_count(objects)
267
+ return nil unless adapter.respond_to?(:import_count)
268
+
269
+ adapter.import_count(*objects)
270
+ rescue StandardError
271
+ nil
218
272
  end
219
273
 
220
274
  def process_parallel_import_results(results)
@@ -21,6 +21,8 @@ module Chewy
21
21
 
22
22
  DELETE_BY_QUERY_OPTIONS = %w[WAIT_FOR_COMPLETION REQUESTS_PER_SECOND SCROLL_SIZE].freeze
23
23
  FALSE_VALUES = %w[0 f false off].freeze
24
+ TRUE_VALUES = %w[1 t true on yes].freeze
25
+ UNBOUNDED_VALUES = %w[unbounded].freeze
24
26
 
25
27
  class << self
26
28
  # Performs zero-downtime reindexing of all documents for the specified indexes
@@ -105,7 +107,7 @@ module Chewy
105
107
  indexes_from(only: only, except: except).each_with_object([]) do |index, updated_indexes|
106
108
  if index.exists?
107
109
  output.puts "Updating #{index}"
108
- index.import(parallel: parallel)
110
+ index.import(parallel: parallel, progressbar: progressbar_option)
109
111
  updated_indexes.push(index)
110
112
  else
111
113
  output.puts "Skipping #{index}, it does not exists (use rake chewy:reset[#{index.derivable_name}] to create and update it)"
@@ -336,7 +338,22 @@ module Chewy
336
338
 
337
339
  def reset_one(index, output, parallel: false)
338
340
  output.puts "Resetting #{index}"
339
- index.reset!((Time.now.to_f * 1000).round, parallel: parallel, apply_journal: journal_exists?)
341
+ index.reset!((Time.now.to_f * 1000).round, parallel: parallel, apply_journal: journal_exists?, progressbar: progressbar_option)
342
+ end
343
+
344
+ def progressbar_option
345
+ value = ENV.fetch('PROGRESS', nil)
346
+ return false if value.nil? || value.empty?
347
+
348
+ case value.downcase
349
+ when *FALSE_VALUES then false
350
+ when *UNBOUNDED_VALUES then :unbounded
351
+ when *TRUE_VALUES then true
352
+ else
353
+ warn "PROGRESS=#{value.inspect} not recognized; treating as enabled. " \
354
+ "Use #{TRUE_VALUES.join('/')}, #{UNBOUNDED_VALUES.join('/')}, or #{FALSE_VALUES.join('/')}."
355
+ true
356
+ end
340
357
  end
341
358
 
342
359
  def warn_missing_index(output)
@@ -13,7 +13,7 @@ module Chewy
13
13
  local timechunks_key = prefix .. ":" .. type .. ":timechunks"
14
14
 
15
15
  -- Get timechunk_keys with scores less than or equal to the specified score
16
- local timechunk_keys = redis.call('zrangebyscore', timechunks_key, '-inf', score)
16
+ local timechunk_keys = redis.call('zrange', timechunks_key, '-inf', score, 'byscore')
17
17
 
18
18
  -- Get all members from the sets associated with the timechunk_keys
19
19
  local members = {}
data/lib/chewy/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Chewy
2
- VERSION = '8.1.0'.freeze
2
+ VERSION = '8.2.0'.freeze
3
3
  end
data/lib/chewy.rb CHANGED
@@ -31,6 +31,7 @@ end
31
31
  try_require 'kaminari'
32
32
  try_require 'kaminari/core'
33
33
  try_require 'parallel'
34
+ try_require 'ruby-progressbar'
34
35
 
35
36
  ActiveSupport.on_load(:active_record) do
36
37
  try_require 'kaminari/activerecord'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chewy
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.1.0
4
+ version: 8.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Toptal, LLC
@@ -88,6 +88,7 @@ files:
88
88
  - lib/chewy/index/import/bulk_builder.rb
89
89
  - lib/chewy/index/import/bulk_request.rb
90
90
  - lib/chewy/index/import/journal_builder.rb
91
+ - lib/chewy/index/import/progressbar.rb
91
92
  - lib/chewy/index/import/routine.rb
92
93
  - lib/chewy/index/mapping.rb
93
94
  - lib/chewy/index/observe.rb