chewy 7.2.7 → 7.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/CODEOWNERS +1 -1
- data/.github/workflows/ruby.yml +3 -2
- data/CHANGELOG.md +20 -0
- data/README.md +85 -9
- data/chewy.gemspec +1 -0
- data/lib/chewy/index/crutch.rb +15 -7
- data/lib/chewy/index/import.rb +29 -2
- data/lib/chewy/index.rb +25 -0
- data/lib/chewy/rake_helper.rb +1 -0
- data/lib/chewy/strategy/delayed_sidekiq/scheduler.rb +148 -0
- data/lib/chewy/strategy/delayed_sidekiq/worker.rb +52 -0
- data/lib/chewy/strategy/delayed_sidekiq.rb +17 -0
- data/lib/chewy/strategy.rb +1 -0
- data/lib/chewy/version.rb +1 -1
- data/lib/tasks/chewy.rake +1 -1
- data/spec/chewy/rake_helper_spec.rb +19 -0
- data/spec/chewy/strategy/delayed_sidekiq_spec.rb +190 -0
- metadata +22 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 79428eb9af436a7a4c80a2db3c034583768eacc45d00074525f8e6559341ae20
|
4
|
+
data.tar.gz: 23377b8abb6ebfa81f8140990916022f8bbca769b5ad4474a076c8d890ece979
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 89b6aa83fa54a6b5056623f8255eb34675ad9c3e1494915aa324af752d1a72f9c010fcbcbb3c929a232b32811bbc36956624848efabc09cb382c59fbcee54940
|
7
|
+
data.tar.gz: 90e1da06cf5c38903ca52c6b5379a9271cf277e3c70604b80d77d9ed13496b15ac3874249f2bf67fca7aec3eb9ac9db5e9a20de95eb1da7bb1bfa84307c9c846
|
data/.github/CODEOWNERS
CHANGED
@@ -1 +1 @@
|
|
1
|
-
.github/workflows @toptal/
|
1
|
+
.github/workflows @toptal/platform-sre
|
data/.github/workflows/ruby.yml
CHANGED
@@ -6,7 +6,8 @@ on:
|
|
6
6
|
pull_request:
|
7
7
|
types: [
|
8
8
|
synchronize, # PR was updated
|
9
|
-
opened # PR was open
|
9
|
+
opened, # PR was open
|
10
|
+
reopened # PR was reopened
|
10
11
|
]
|
11
12
|
|
12
13
|
jobs:
|
@@ -41,7 +42,7 @@ jobs:
|
|
41
42
|
strategy:
|
42
43
|
fail-fast: false
|
43
44
|
matrix:
|
44
|
-
ruby: [ '3.0', 3.1 ]
|
45
|
+
ruby: [ '3.0', '3.1', '3.2' ]
|
45
46
|
gemfile: [ rails.6.1.activerecord, rails.7.0.activerecord ]
|
46
47
|
name: ${{ matrix.ruby }}-${{ matrix.gemfile }}
|
47
48
|
|
data/CHANGELOG.md
CHANGED
@@ -8,6 +8,26 @@
|
|
8
8
|
|
9
9
|
### Bugs Fixed
|
10
10
|
|
11
|
+
## 7.3.1 (2023-04-20)
|
12
|
+
|
13
|
+
### Bugs Fixed
|
14
|
+
|
15
|
+
* [#874](https://github.com/toptal/chewy/pull/874): Fix `chewy:journal:clean` task for ruby 3.x. ([@muk-ai](https://github.com/muk-ai))
|
16
|
+
* [#882](https://github.com/toptal/chewy/pull/882): Fix memory leak during `chewy:reset` for ruby 3.2 ([@konalegi](https://github.com/konalegi))
|
17
|
+
|
18
|
+
## 7.3.0 (2023-04-03)
|
19
|
+
|
20
|
+
### New Features
|
21
|
+
|
22
|
+
* [#869](https://github.com/toptal/chewy/pull/869): New strategy - `delayed_sidekiq`. Allow passing `strategy: :delayed_sidekiq` option to `SomeIndex.import([1, ...], strategy: :delayed_sidekiq)`. The strategy is compatible with `update_fields` option as well. ([@skcc321][])
|
23
|
+
* [#879](https://github.com/toptal/chewy/pull/879): Configure CI to check for ruby 3.2 compatibility. ([@konalegi][])
|
24
|
+
|
25
|
+
### Changes
|
26
|
+
|
27
|
+
### Bugs Fixed
|
28
|
+
|
29
|
+
* [#856](https://github.com/toptal/chewy/pull/856): Fix return value of subscribed_task_stats used in rake tasks. ([@fabiormoura][])
|
30
|
+
|
11
31
|
## 7.2.7 (2022-11-15)
|
12
32
|
|
13
33
|
### New Features
|
data/README.md
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
# Chewy
|
7
7
|
|
8
|
-
Chewy is an ODM (Object Document Mapper), built on top of
|
8
|
+
Chewy is an ODM (Object Document Mapper), built on top of [the official Elasticsearch client](https://github.com/elastic/elasticsearch-ruby).
|
9
9
|
|
10
10
|
## Why Chewy?
|
11
11
|
|
@@ -458,7 +458,7 @@ field :hierarchy_link, type: :join, relations: {question: %i[answer comment], an
|
|
458
458
|
```
|
459
459
|
assuming you have `comment_type` and `commented_id` fields in your model.
|
460
460
|
|
461
|
-
Note that when you reindex a parent,
|
461
|
+
Note that when you reindex a parent, its children and grandchildren will be reindexed as well.
|
462
462
|
This may require additional queries to the primary database and to elastisearch.
|
463
463
|
|
464
464
|
Also note that the join field doesn't support crutches (it should be a field directly defined on the model).
|
@@ -503,7 +503,7 @@ class ProductsIndex < Chewy::Index
|
|
503
503
|
|
504
504
|
field :name
|
505
505
|
# simply use crutch-fetched data as a value:
|
506
|
-
field :category_names, value: ->(product, crutches) { crutches
|
506
|
+
field :category_names, value: ->(product, crutches) { crutches[:categories][product.id] }
|
507
507
|
end
|
508
508
|
```
|
509
509
|
|
@@ -525,7 +525,7 @@ So Chewy Crutches™ technology is able to increase your indexing performance in
|
|
525
525
|
|
526
526
|
### Witchcraft™ technology
|
527
527
|
|
528
|
-
One more experimental technology to increase import performance. As far as you know, chewy defines value proc for every imported field in mapping, so at the import time each of
|
528
|
+
One more experimental technology to increase import performance. As far as you know, chewy defines value proc for every imported field in mapping, so at the import time each of these procs is executed on imported object to extract result document to import. It would be great for performance to use one huge whole-document-returning proc instead. So basically the idea or Witchcraft™ technology is to compile a single document-returning proc from the index definition.
|
529
529
|
|
530
530
|
```ruby
|
531
531
|
index_scope Product
|
@@ -569,7 +569,7 @@ Obviously not every type of definition might be compiled. There are some restric
|
|
569
569
|
end
|
570
570
|
```
|
571
571
|
|
572
|
-
However, it is quite possible that your index definition will be supported by Witchcraft™ technology out of the box in
|
572
|
+
However, it is quite possible that your index definition will be supported by Witchcraft™ technology out of the box in most of the cases.
|
573
573
|
|
574
574
|
### Raw Import
|
575
575
|
|
@@ -675,7 +675,7 @@ end
|
|
675
675
|
|
676
676
|
You may be wondering why do you need it? The answer is simple: not to lose the data.
|
677
677
|
|
678
|
-
Imagine that you reset your index in a zero-downtime manner (to separate index), and
|
678
|
+
Imagine that you reset your index in a zero-downtime manner (to separate index), and in the meantime somebody keeps updating the data frequently (to old index). So all these actions will be written to the journal index and you'll be able to apply them after index reset using the `Chewy::Journal` interface.
|
679
679
|
|
680
680
|
When enabled, journal can grow to enormous size, consider setting up cron job that would clean it occasionally using [`chewy:journal:clean` rake task](#chewyjournal).
|
681
681
|
|
@@ -774,6 +774,80 @@ The default queue name is `chewy`, you can customize it in settings: `sidekiq.qu
|
|
774
774
|
Chewy.settings[:sidekiq] = {queue: :low}
|
775
775
|
```
|
776
776
|
|
777
|
+
#### `:delayed_sidekiq`
|
778
|
+
|
779
|
+
It accumulates ids of records to be reindexed during the latency window in redis and then does the reindexing of all accumulated records at once.
|
780
|
+
The strategy is very useful in case of frequently mutated records.
|
781
|
+
It supports `update_fields` option, so it will try to select just enough data from the DB
|
782
|
+
|
783
|
+
There are three options that can be defined in the index:
|
784
|
+
```ruby
|
785
|
+
class CitiesIndex...
|
786
|
+
strategy_config delayed_sidekiq: {
|
787
|
+
latency: 3,
|
788
|
+
margin: 2,
|
789
|
+
ttl: 60 * 60 * 24,
|
790
|
+
reindex_wrapper: ->(&reindex) {
|
791
|
+
ActiveRecord::Base.connected_to(role: :reading) { reindex.call }
|
792
|
+
}
|
793
|
+
# latency - will prevent scheduling identical jobs
|
794
|
+
# margin - main purpose is to cover db replication lag by the margin
|
795
|
+
# ttl - a chunk expiration time (in seconds)
|
796
|
+
# reindex_wrapper - lambda that accepts block to wrap that reindex process AR connection block.
|
797
|
+
}
|
798
|
+
|
799
|
+
...
|
800
|
+
end
|
801
|
+
```
|
802
|
+
|
803
|
+
Also you can define defaults in the `initializers/chewy.rb`
|
804
|
+
```ruby
|
805
|
+
Chewy.settings = {
|
806
|
+
strategy_config: {
|
807
|
+
delayed_sidekiq: {
|
808
|
+
latency: 3,
|
809
|
+
margin: 2,
|
810
|
+
ttl: 60 * 60 * 24,
|
811
|
+
reindex_wrapper: ->(&reindex) {
|
812
|
+
ActiveRecord::Base.connected_to(role: :reading) { reindex.call }
|
813
|
+
}
|
814
|
+
}
|
815
|
+
}
|
816
|
+
}
|
817
|
+
|
818
|
+
```
|
819
|
+
or in `config/chewy.yml`
|
820
|
+
```ruby
|
821
|
+
strategy_config:
|
822
|
+
delayed_sidekiq:
|
823
|
+
latency: 3
|
824
|
+
margin: 2
|
825
|
+
ttl: <%= 60 * 60 * 24 %>
|
826
|
+
# reindex_wrapper setting is not possible here!!! use the initializer instead
|
827
|
+
```
|
828
|
+
|
829
|
+
You can use the strategy identically to other strategies
|
830
|
+
```ruby
|
831
|
+
Chewy.strategy(:delayed_sidekiq) do
|
832
|
+
City.popular.map(&:do_some_update_action!)
|
833
|
+
end
|
834
|
+
```
|
835
|
+
|
836
|
+
The default queue name is `chewy`, you can customize it in settings: `sidekiq.queue_name`
|
837
|
+
```
|
838
|
+
Chewy.settings[:sidekiq] = {queue: :low}
|
839
|
+
```
|
840
|
+
|
841
|
+
Explicit call of the reindex using `:delayed_sidekiq strategy`
|
842
|
+
```ruby
|
843
|
+
CitiesIndex.import([1, 2, 3], strategy: :delayed_sidekiq)
|
844
|
+
```
|
845
|
+
|
846
|
+
Explicit call of the reindex using `:delayed_sidekiq` strategy with `:update_fields` support
|
847
|
+
```ruby
|
848
|
+
CitiesIndex.import([1, 2, 3], update_fields: [:name], strategy: :delayed_sidekiq)
|
849
|
+
```
|
850
|
+
|
777
851
|
#### `:active_job`
|
778
852
|
|
779
853
|
This does the same thing as `:atomic`, but using ActiveJob. This will inherit the ActiveJob configuration settings including the `active_job.queue_adapter` setting for the environment. Patch `Chewy::Strategy::ActiveJob::Worker` for index updates improving.
|
@@ -810,7 +884,9 @@ It is convenient for use in e.g. the Rails console with non-block notation:
|
|
810
884
|
|
811
885
|
#### `:bypass`
|
812
886
|
|
813
|
-
|
887
|
+
When the bypass strategy is active the index will not be automatically updated on object save.
|
888
|
+
|
889
|
+
For example, on `City.first.save!` the cities index would not be updated.
|
814
890
|
|
815
891
|
#### Nesting
|
816
892
|
|
@@ -888,7 +964,7 @@ Chewy has notifying the following events:
|
|
888
964
|
{index: 30, delete: 5}
|
889
965
|
```
|
890
966
|
|
891
|
-
* `payload[:errors]`: might not
|
967
|
+
* `payload[:errors]`: might not exist. Contains grouped errors with objects ids list:
|
892
968
|
|
893
969
|
```ruby
|
894
970
|
{index: {
|
@@ -1020,7 +1096,7 @@ Request DSL also provides additional scope actions, like `delete_all`, `exists?`
|
|
1020
1096
|
|
1021
1097
|
#### Pagination
|
1022
1098
|
|
1023
|
-
The request DSL supports pagination with `Kaminari`. An extension is enabled on
|
1099
|
+
The request DSL supports pagination with `Kaminari`. An extension is enabled on initialization if `Kaminari` is available. See [Chewy::Search](lib/chewy/search.rb) and [Chewy::Search::Pagination::Kaminari](lib/chewy/search/pagination/kaminari.rb) for details.
|
1024
1100
|
|
1025
1101
|
#### Named scopes
|
1026
1102
|
|
data/chewy.gemspec
CHANGED
@@ -19,6 +19,7 @@ Gem::Specification.new do |spec| # rubocop:disable Metrics/BlockLength
|
|
19
19
|
|
20
20
|
spec.add_development_dependency 'database_cleaner'
|
21
21
|
spec.add_development_dependency 'elasticsearch-extensions'
|
22
|
+
spec.add_development_dependency 'mock_redis'
|
22
23
|
spec.add_development_dependency 'rake'
|
23
24
|
spec.add_development_dependency 'rspec', '>= 3.7.0'
|
24
25
|
spec.add_development_dependency 'rspec-collection_matchers'
|
data/lib/chewy/index/crutch.rb
CHANGED
@@ -12,13 +12,21 @@ module Chewy
|
|
12
12
|
def initialize(index, collection)
|
13
13
|
@index = index
|
14
14
|
@collection = collection
|
15
|
-
@
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
15
|
+
@crutches_instances = {}
|
16
|
+
end
|
17
|
+
|
18
|
+
def method_missing(name, *, **)
|
19
|
+
return self[name] if @index._crutches.key?(name)
|
20
|
+
|
21
|
+
super
|
22
|
+
end
|
23
|
+
|
24
|
+
def respond_to_missing?(name, include_private = false)
|
25
|
+
@index._crutches.key?(name) || super
|
26
|
+
end
|
27
|
+
|
28
|
+
def [](name)
|
29
|
+
@crutches_instances[name] ||= @index._crutches[:"#{name}"].call(@collection)
|
22
30
|
end
|
23
31
|
end
|
24
32
|
|
data/lib/chewy/index/import.rb
CHANGED
@@ -73,7 +73,7 @@ module Chewy
|
|
73
73
|
# @option options [true, Integer, Hash] parallel enables parallel import processing with the Parallel gem, accepts the number of workers or any Parallel gem acceptable options
|
74
74
|
# @return [true, false] false in case of errors
|
75
75
|
ruby2_keywords def import(*args)
|
76
|
-
|
76
|
+
intercept_import_using_strategy(*args).blank?
|
77
77
|
end
|
78
78
|
|
79
79
|
# @!method import!(*collection, **options)
|
@@ -84,7 +84,8 @@ module Chewy
|
|
84
84
|
#
|
85
85
|
# @raise [Chewy::ImportFailed] in case of errors
|
86
86
|
ruby2_keywords def import!(*args)
|
87
|
-
errors =
|
87
|
+
errors = intercept_import_using_strategy(*args)
|
88
|
+
|
88
89
|
raise Chewy::ImportFailed.new(self, errors) if errors.present?
|
89
90
|
|
90
91
|
true
|
@@ -126,6 +127,32 @@ module Chewy
|
|
126
127
|
|
127
128
|
private
|
128
129
|
|
130
|
+
def intercept_import_using_strategy(*args)
|
131
|
+
args_clone = args.deep_dup
|
132
|
+
options = args_clone.extract_options!
|
133
|
+
strategy = options.delete(:strategy)
|
134
|
+
|
135
|
+
return import_routine(*args) if strategy.blank?
|
136
|
+
|
137
|
+
ids = args_clone.flatten
|
138
|
+
return {} if ids.blank?
|
139
|
+
return {argument: {"#{strategy} supports ids only!" => ids}} unless ids.all? do |id|
|
140
|
+
id.respond_to?(:to_i)
|
141
|
+
end
|
142
|
+
|
143
|
+
case strategy
|
144
|
+
when :delayed_sidekiq
|
145
|
+
begin
|
146
|
+
Chewy::Strategy::DelayedSidekiq::Scheduler.new(self, ids, options).postpone
|
147
|
+
{} # success. errors handling convention
|
148
|
+
rescue StandardError => e
|
149
|
+
{scheduler: {e.message => ids}}
|
150
|
+
end
|
151
|
+
else
|
152
|
+
{argument: {"unsupported strategy: '#{strategy}'" => ids}}
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
129
156
|
def import_routine(*args)
|
130
157
|
return if !args.first.nil? && empty_objects_or_scope?(args.first)
|
131
158
|
|
data/lib/chewy/index.rb
CHANGED
@@ -20,6 +20,10 @@ module Chewy
|
|
20
20
|
pipeline raw_import refresh replication
|
21
21
|
].freeze
|
22
22
|
|
23
|
+
STRATEGY_OPTIONS = {
|
24
|
+
delayed_sidekiq: %i[latency margin ttl reindex_wrapper]
|
25
|
+
}.freeze
|
26
|
+
|
23
27
|
include Search
|
24
28
|
include Actions
|
25
29
|
include Aliases
|
@@ -221,6 +225,27 @@ module Chewy
|
|
221
225
|
params.assert_valid_keys(IMPORT_OPTIONS_KEYS)
|
222
226
|
self._default_import_options = _default_import_options.merge(params)
|
223
227
|
end
|
228
|
+
|
229
|
+
def strategy_config(params = {})
|
230
|
+
@strategy_config ||= begin
|
231
|
+
config_struct = Struct.new(*STRATEGY_OPTIONS.keys).new
|
232
|
+
|
233
|
+
STRATEGY_OPTIONS.each_with_object(config_struct) do |(strategy, options), res|
|
234
|
+
res[strategy] = case strategy
|
235
|
+
when :delayed_sidekiq
|
236
|
+
Struct.new(*STRATEGY_OPTIONS[strategy]).new.tap do |config|
|
237
|
+
options.each do |option|
|
238
|
+
config[option] = params.dig(strategy, option) || Chewy.configuration.dig(:strategy_config, strategy, option)
|
239
|
+
end
|
240
|
+
|
241
|
+
config[:reindex_wrapper] ||= ->(&reindex) { reindex.call } # default wrapper
|
242
|
+
end
|
243
|
+
else
|
244
|
+
raise NotImplementedError, "Unsupported strategy: '#{strategy}'"
|
245
|
+
end
|
246
|
+
end
|
247
|
+
end
|
248
|
+
end
|
224
249
|
end
|
225
250
|
end
|
226
251
|
end
|
data/lib/chewy/rake_helper.rb
CHANGED
@@ -270,6 +270,7 @@ module Chewy
|
|
270
270
|
ActiveSupport::Notifications.subscribed(JOURNAL_CALLBACK.curry[output], 'apply_journal.chewy') do
|
271
271
|
ActiveSupport::Notifications.subscribed(IMPORT_CALLBACK.curry[output], 'import_objects.chewy', &block)
|
272
272
|
end
|
273
|
+
ensure
|
273
274
|
output.puts "Total: #{human_duration(Time.now - start)}"
|
274
275
|
end
|
275
276
|
|
@@ -0,0 +1,148 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../../index'
|
4
|
+
|
5
|
+
# The class is responsible for accumulating in redis [type, ids]
|
6
|
+
# that were requested to be reindexed during `latency` seconds.
|
7
|
+
# The reindex job is going to be scheduled after a `latency` seconds.
|
8
|
+
# that job is going to read accumulated [type, ids] from the redis
|
9
|
+
# and reindex all them at once.
|
10
|
+
module Chewy
|
11
|
+
class Strategy
|
12
|
+
class DelayedSidekiq
|
13
|
+
require_relative 'worker'
|
14
|
+
|
15
|
+
class Scheduler
|
16
|
+
DEFAULT_TTL = 60 * 60 * 24 # in seconds
|
17
|
+
DEFAULT_LATENCY = 10
|
18
|
+
DEFAULT_MARGIN = 2
|
19
|
+
DEFAULT_QUEUE = 'chewy'
|
20
|
+
KEY_PREFIX = 'chewy:delayed_sidekiq'
|
21
|
+
FALLBACK_FIELDS = 'all'
|
22
|
+
FIELDS_IDS_SEPARATOR = ';'
|
23
|
+
IDS_SEPARATOR = ','
|
24
|
+
|
25
|
+
def initialize(type, ids, options = {})
|
26
|
+
@type = type
|
27
|
+
@ids = ids
|
28
|
+
@options = options
|
29
|
+
end
|
30
|
+
|
31
|
+
# the diagram:
|
32
|
+
#
|
33
|
+
# inputs:
|
34
|
+
# latency == 2
|
35
|
+
# reindex_time = Time.current
|
36
|
+
#
|
37
|
+
# Parallel OR Sequential triggers of reindex: | What is going on in reindex store (Redis):
|
38
|
+
# --------------------------------------------------------------------------------------------------
|
39
|
+
# |
|
40
|
+
# process 1 (reindex_time): | chewy:delayed_sidekiq:CitiesIndex:1679347866 = [1]
|
41
|
+
# Schedule.new(CitiesIndex, [1]).postpone | chewy:delayed_sidekiq:timechunks = [{ score: 1679347866, "chewy:delayed_sidekiq:CitiesIndex:1679347866"}]
|
42
|
+
# | & schedule a DelayedSidekiq::Worker at 1679347869 (at + 3)
|
43
|
+
# | it will zpop chewy:delayed_sidekiq:timechunks up to 1679347866 score and reindex all ids with zpoped keys
|
44
|
+
# | chewy:delayed_sidekiq:CitiesIndex:1679347866
|
45
|
+
# |
|
46
|
+
# |
|
47
|
+
# process 2 (reindex_time): | chewy:delayed_sidekiq:CitiesIndex:1679347866 = [1, 2]
|
48
|
+
# Schedule.new(CitiesIndex, [2]).postpone | chewy:delayed_sidekiq:timechunks = [{ score: 1679347866, "chewy:delayed_sidekiq:CitiesIndex:1679347866"}]
|
49
|
+
# | & do not schedule a new worker
|
50
|
+
# |
|
51
|
+
# |
|
52
|
+
# process 1 (reindex_time + (latency - 1).seconds): | chewy:delayed_sidekiq:CitiesIndex:1679347866 = [1, 2, 3]
|
53
|
+
# Schedule.new(CitiesIndex, [3]).postpone | chewy:delayed_sidekiq:timechunks = [{ score: 1679347866, "chewy:delayed_sidekiq:CitiesIndex:1679347866"}]
|
54
|
+
# | & do not schedule a new worker
|
55
|
+
# |
|
56
|
+
# |
|
57
|
+
# process 2 (reindex_time + (latency + 1).seconds): | chewy:delayed_sidekiq:CitiesIndex:1679347866 = [1, 2, 3]
|
58
|
+
# Schedule.new(CitiesIndex, [4]).postpone | chewy:delayed_sidekiq:CitiesIndex:1679347868 = [4]
|
59
|
+
# | chewy:delayed_sidekiq:timechunks = [
|
60
|
+
# | { score: 1679347866, "chewy:delayed_sidekiq:CitiesIndex:1679347866"}
|
61
|
+
# | { score: 1679347868, "chewy:delayed_sidekiq:CitiesIndex:1679347868"}
|
62
|
+
# | ]
|
63
|
+
# | & schedule a DelayedSidekiq::Worker at 1679347871 (at + 3)
|
64
|
+
# | it will zpop chewy:delayed_sidekiq:timechunks up to 1679347868 score and reindex all ids with zpoped keys
|
65
|
+
# | chewy:delayed_sidekiq:CitiesIndex:1679347866 (in case of failed previous reindex),
|
66
|
+
# | chewy:delayed_sidekiq:CitiesIndex:1679347868
|
67
|
+
def postpone
|
68
|
+
::Sidekiq.redis do |redis|
|
69
|
+
# warning: Redis#sadd will always return an Integer in Redis 5.0.0. Use Redis#sadd? instead
|
70
|
+
if redis.respond_to?(:sadd?)
|
71
|
+
redis.sadd?(timechunk_key, serialize_data)
|
72
|
+
else
|
73
|
+
redis.sadd(timechunk_key, serialize_data)
|
74
|
+
end
|
75
|
+
|
76
|
+
redis.expire(timechunk_key, ttl)
|
77
|
+
|
78
|
+
unless redis.zrank(timechunks_key, timechunk_key)
|
79
|
+
redis.zadd(timechunks_key, at, timechunk_key)
|
80
|
+
redis.expire(timechunks_key, ttl)
|
81
|
+
|
82
|
+
::Sidekiq::Client.push(
|
83
|
+
'queue' => sidekiq_queue,
|
84
|
+
'at' => at + margin,
|
85
|
+
'class' => Chewy::Strategy::DelayedSidekiq::Worker,
|
86
|
+
'args' => [type_name, at]
|
87
|
+
)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
private
|
93
|
+
|
94
|
+
attr_reader :type, :ids, :options
|
95
|
+
|
96
|
+
# this method returns predictable value that jumps by latency value
|
97
|
+
# another words each latency seconds it return the same value
|
98
|
+
def at
|
99
|
+
@at ||= begin
|
100
|
+
schedule_at = latency.seconds.from_now.to_f
|
101
|
+
|
102
|
+
(schedule_at - (schedule_at % latency)).to_i
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def fields
|
107
|
+
options[:update_fields].presence || [FALLBACK_FIELDS]
|
108
|
+
end
|
109
|
+
|
110
|
+
def timechunks_key
|
111
|
+
"#{KEY_PREFIX}:#{type_name}:timechunks"
|
112
|
+
end
|
113
|
+
|
114
|
+
def timechunk_key
|
115
|
+
"#{KEY_PREFIX}:#{type_name}:#{at}"
|
116
|
+
end
|
117
|
+
|
118
|
+
def serialize_data
|
119
|
+
[ids.join(IDS_SEPARATOR), fields.join(IDS_SEPARATOR)].join(FIELDS_IDS_SEPARATOR)
|
120
|
+
end
|
121
|
+
|
122
|
+
def type_name
|
123
|
+
type.name
|
124
|
+
end
|
125
|
+
|
126
|
+
def latency
|
127
|
+
strategy_config.latency || DEFAULT_LATENCY
|
128
|
+
end
|
129
|
+
|
130
|
+
def margin
|
131
|
+
strategy_config.margin || DEFAULT_MARGIN
|
132
|
+
end
|
133
|
+
|
134
|
+
def ttl
|
135
|
+
strategy_config.ttl || DEFAULT_TTL
|
136
|
+
end
|
137
|
+
|
138
|
+
def sidekiq_queue
|
139
|
+
Chewy.settings.dig(:sidekiq, :queue) || DEFAULT_QUEUE
|
140
|
+
end
|
141
|
+
|
142
|
+
def strategy_config
|
143
|
+
type.strategy_config.delayed_sidekiq
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Chewy
|
4
|
+
class Strategy
|
5
|
+
class DelayedSidekiq
|
6
|
+
class Worker
|
7
|
+
include ::Sidekiq::Worker
|
8
|
+
|
9
|
+
def perform(type, score, options = {})
|
10
|
+
options[:refresh] = !Chewy.disable_refresh_async if Chewy.disable_refresh_async
|
11
|
+
|
12
|
+
::Sidekiq.redis do |redis|
|
13
|
+
timechunks_key = "#{Scheduler::KEY_PREFIX}:#{type}:timechunks"
|
14
|
+
timechunk_keys = redis.zrangebyscore(timechunks_key, -1, score)
|
15
|
+
members = timechunk_keys.flat_map { |timechunk_key| redis.smembers(timechunk_key) }.compact
|
16
|
+
|
17
|
+
# extract ids and fields & do the reset of records
|
18
|
+
ids, fields = extract_ids_and_fields(members)
|
19
|
+
options[:update_fields] = fields if fields
|
20
|
+
|
21
|
+
index = type.constantize
|
22
|
+
index.strategy_config.delayed_sidekiq.reindex_wrapper.call do
|
23
|
+
options.any? ? index.import!(ids, **options) : index.import!(ids)
|
24
|
+
end
|
25
|
+
|
26
|
+
redis.del(timechunk_keys)
|
27
|
+
redis.zremrangebyscore(timechunks_key, -1, score)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def extract_ids_and_fields(members)
|
34
|
+
ids = []
|
35
|
+
fields = []
|
36
|
+
|
37
|
+
members.each do |member|
|
38
|
+
member_ids, member_fields = member.split(Scheduler::FIELDS_IDS_SEPARATOR).map do |v|
|
39
|
+
v.split(Scheduler::IDS_SEPARATOR)
|
40
|
+
end
|
41
|
+
ids |= member_ids
|
42
|
+
fields |= member_fields
|
43
|
+
end
|
44
|
+
|
45
|
+
fields = nil if fields.include?(Scheduler::FALLBACK_FIELDS)
|
46
|
+
|
47
|
+
[ids.map(&:to_i), fields]
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Chewy
|
4
|
+
class Strategy
|
5
|
+
class DelayedSidekiq < Sidekiq
|
6
|
+
require_relative 'delayed_sidekiq/scheduler'
|
7
|
+
|
8
|
+
def leave
|
9
|
+
@stash.each do |type, ids|
|
10
|
+
next if ids.empty?
|
11
|
+
|
12
|
+
DelayedSidekiq::Scheduler.new(type, ids).postpone
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/chewy/strategy.rb
CHANGED
data/lib/chewy/version.rb
CHANGED
data/lib/tasks/chewy.rake
CHANGED
@@ -96,7 +96,7 @@ namespace :chewy do
|
|
96
96
|
task clean: :environment do |_task, args|
|
97
97
|
delete_options = Chewy::RakeHelper.delete_by_query_options_from_env(ENV)
|
98
98
|
Chewy::RakeHelper.journal_clean(
|
99
|
-
[
|
99
|
+
**[
|
100
100
|
parse_journal_args(args.extras),
|
101
101
|
{delete_by_query_options: delete_options}
|
102
102
|
].reduce({}, :merge)
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'spec_helper'
|
2
|
+
require 'rake'
|
2
3
|
|
3
4
|
describe Chewy::RakeHelper, :orm do
|
4
5
|
before { Chewy.massacre }
|
@@ -456,6 +457,17 @@ Total: \\d+s\\Z
|
|
456
457
|
Total: \\d+s\\Z
|
457
458
|
OUTPUT
|
458
459
|
end
|
460
|
+
|
461
|
+
context 'execute "chewy:journal:clean" rake task' do
|
462
|
+
subject(:task) { Rake.application['chewy:journal:clean'] }
|
463
|
+
before do
|
464
|
+
Rake::DefaultLoader.new.load('lib/tasks/chewy.rake')
|
465
|
+
Rake::Task.define_task(:environment)
|
466
|
+
end
|
467
|
+
it 'does not raise error' do
|
468
|
+
expect { task.invoke }.to_not raise_error
|
469
|
+
end
|
470
|
+
end
|
459
471
|
end
|
460
472
|
|
461
473
|
describe '.reindex' do
|
@@ -570,4 +582,11 @@ Total: \\d+s\\Z
|
|
570
582
|
end
|
571
583
|
end
|
572
584
|
end
|
585
|
+
|
586
|
+
describe '.subscribed_task_stats' do
|
587
|
+
specify do
|
588
|
+
block_output = described_class.subscribed_task_stats(StringIO.new) { 'expected output' }
|
589
|
+
expect(block_output).to eq('expected output')
|
590
|
+
end
|
591
|
+
end
|
573
592
|
end
|
@@ -0,0 +1,190 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
if defined?(Sidekiq)
|
4
|
+
require 'sidekiq/testing'
|
5
|
+
require 'mock_redis'
|
6
|
+
|
7
|
+
describe Chewy::Strategy::DelayedSidekiq do
|
8
|
+
around do |example|
|
9
|
+
Chewy.strategy(:bypass) { example.run }
|
10
|
+
end
|
11
|
+
|
12
|
+
before do
|
13
|
+
redis = MockRedis.new
|
14
|
+
allow(Sidekiq).to receive(:redis).and_yield(redis)
|
15
|
+
Sidekiq::Worker.clear_all
|
16
|
+
end
|
17
|
+
|
18
|
+
before do
|
19
|
+
stub_model(:city) do
|
20
|
+
update_index('cities') { self }
|
21
|
+
end
|
22
|
+
|
23
|
+
stub_index(:cities) do
|
24
|
+
index_scope City
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
let(:city) { City.create!(name: 'hello') }
|
29
|
+
let(:other_city) { City.create!(name: 'world') }
|
30
|
+
|
31
|
+
it 'does not trigger immediate reindex due to it`s async nature' do
|
32
|
+
expect { [city, other_city].map(&:save!) }
|
33
|
+
.not_to update_index(CitiesIndex, strategy: :delayed_sidekiq)
|
34
|
+
end
|
35
|
+
|
36
|
+
it "respects 'refresh: false' options" do
|
37
|
+
allow(Chewy).to receive(:disable_refresh_async).and_return(true)
|
38
|
+
expect(CitiesIndex).to receive(:import!).with([city.id, other_city.id], refresh: false)
|
39
|
+
scheduler = Chewy::Strategy::DelayedSidekiq::Scheduler.new(CitiesIndex, [city.id, other_city.id])
|
40
|
+
scheduler.postpone
|
41
|
+
Chewy::Strategy::DelayedSidekiq::Worker.drain
|
42
|
+
end
|
43
|
+
|
44
|
+
context 'with default config' do
|
45
|
+
it 'does schedule a job that triggers reindex with default options' do
|
46
|
+
Timecop.freeze do
|
47
|
+
expect(Sidekiq::Client).to receive(:push).with(
|
48
|
+
hash_including(
|
49
|
+
'queue' => 'chewy',
|
50
|
+
'at' => (Time.current.to_i.ceil(-1) + 2.seconds).to_i,
|
51
|
+
'class' => Chewy::Strategy::DelayedSidekiq::Worker,
|
52
|
+
'args' => ['CitiesIndex', an_instance_of(Integer)]
|
53
|
+
)
|
54
|
+
).and_call_original
|
55
|
+
|
56
|
+
expect($stdout).not_to receive(:puts)
|
57
|
+
|
58
|
+
Sidekiq::Testing.inline! do
|
59
|
+
expect { [city, other_city].map(&:save!) }
|
60
|
+
.to update_index(CitiesIndex, strategy: :delayed_sidekiq)
|
61
|
+
.and_reindex(city, other_city).only
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
context 'with custom config' do
|
68
|
+
before do
|
69
|
+
CitiesIndex.strategy_config(
|
70
|
+
delayed_sidekiq: {
|
71
|
+
reindex_wrapper: lambda { |&reindex|
|
72
|
+
puts 'hello'
|
73
|
+
reindex.call
|
74
|
+
},
|
75
|
+
margin: 5,
|
76
|
+
latency: 60
|
77
|
+
}
|
78
|
+
)
|
79
|
+
end
|
80
|
+
|
81
|
+
it 'respects :strategy_config options' do
|
82
|
+
Timecop.freeze do
|
83
|
+
expect(Sidekiq::Client).to receive(:push).with(
|
84
|
+
hash_including(
|
85
|
+
'queue' => 'chewy',
|
86
|
+
'at' => (60.seconds.from_now.change(sec: 0) + 5.seconds).to_i,
|
87
|
+
'class' => Chewy::Strategy::DelayedSidekiq::Worker,
|
88
|
+
'args' => ['CitiesIndex', an_instance_of(Integer)]
|
89
|
+
)
|
90
|
+
).and_call_original
|
91
|
+
|
92
|
+
expect($stdout).to receive(:puts).with('hello') # check that reindex_wrapper works
|
93
|
+
|
94
|
+
Sidekiq::Testing.inline! do
|
95
|
+
expect { [city, other_city].map(&:save!) }
|
96
|
+
.to update_index(CitiesIndex, strategy: :delayed_sidekiq)
|
97
|
+
.and_reindex(city, other_city).only
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
context 'two reindex call within the timewindow' do
|
104
|
+
it 'accumulates all ids does the reindex one time' do
|
105
|
+
Timecop.freeze do
|
106
|
+
expect(CitiesIndex).to receive(:import!).with([other_city.id, city.id]).once
|
107
|
+
scheduler = Chewy::Strategy::DelayedSidekiq::Scheduler.new(CitiesIndex, [city.id])
|
108
|
+
scheduler.postpone
|
109
|
+
scheduler = Chewy::Strategy::DelayedSidekiq::Scheduler.new(CitiesIndex, [other_city.id])
|
110
|
+
scheduler.postpone
|
111
|
+
Chewy::Strategy::DelayedSidekiq::Worker.drain
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
context 'one call with update_fields another one without update_fields' do
|
116
|
+
it 'does reindex of all fields' do
|
117
|
+
Timecop.freeze do
|
118
|
+
expect(CitiesIndex).to receive(:import!).with([other_city.id, city.id]).once
|
119
|
+
scheduler = Chewy::Strategy::DelayedSidekiq::Scheduler.new(CitiesIndex, [city.id], update_fields: ['name'])
|
120
|
+
scheduler.postpone
|
121
|
+
scheduler = Chewy::Strategy::DelayedSidekiq::Scheduler.new(CitiesIndex, [other_city.id])
|
122
|
+
scheduler.postpone
|
123
|
+
Chewy::Strategy::DelayedSidekiq::Worker.drain
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
context 'both calls with different update fields' do
|
129
|
+
it 'deos reindex with union of fields' do
|
130
|
+
Timecop.freeze do
|
131
|
+
expect(CitiesIndex).to receive(:import!).with([other_city.id, city.id], update_fields: %w[description name]).once
|
132
|
+
scheduler = Chewy::Strategy::DelayedSidekiq::Scheduler.new(CitiesIndex, [city.id], update_fields: ['name'])
|
133
|
+
scheduler.postpone
|
134
|
+
scheduler = Chewy::Strategy::DelayedSidekiq::Scheduler.new(CitiesIndex, [other_city.id], update_fields: ['description'])
|
135
|
+
scheduler.postpone
|
136
|
+
Chewy::Strategy::DelayedSidekiq::Worker.drain
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
context 'two calls within different timewindows' do
|
143
|
+
it 'does two separate reindexes' do
|
144
|
+
Timecop.freeze do
|
145
|
+
expect(CitiesIndex).to receive(:import!).with([city.id]).once
|
146
|
+
expect(CitiesIndex).to receive(:import!).with([other_city.id]).once
|
147
|
+
Timecop.travel(20.seconds.ago) do
|
148
|
+
scheduler = Chewy::Strategy::DelayedSidekiq::Scheduler.new(CitiesIndex, [city.id])
|
149
|
+
scheduler.postpone
|
150
|
+
end
|
151
|
+
scheduler = Chewy::Strategy::DelayedSidekiq::Scheduler.new(CitiesIndex, [other_city.id])
|
152
|
+
scheduler.postpone
|
153
|
+
Chewy::Strategy::DelayedSidekiq::Worker.drain
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
context 'first call has update_fields' do
|
159
|
+
it 'does first reindex with the expected update_fields and second without update_fields' do
|
160
|
+
Timecop.freeze do
|
161
|
+
expect(CitiesIndex).to receive(:import!).with([city.id], update_fields: ['name']).once
|
162
|
+
expect(CitiesIndex).to receive(:import!).with([other_city.id]).once
|
163
|
+
Timecop.travel(20.seconds.ago) do
|
164
|
+
scheduler = Chewy::Strategy::DelayedSidekiq::Scheduler.new(CitiesIndex, [city.id], update_fields: ['name'])
|
165
|
+
scheduler.postpone
|
166
|
+
end
|
167
|
+
scheduler = Chewy::Strategy::DelayedSidekiq::Scheduler.new(CitiesIndex, [other_city.id])
|
168
|
+
scheduler.postpone
|
169
|
+
Chewy::Strategy::DelayedSidekiq::Worker.drain
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
context 'both calls have update_fields option' do
|
175
|
+
it 'does both reindexes with their expected update_fields option' do
|
176
|
+
Timecop.freeze do
|
177
|
+
expect(CitiesIndex).to receive(:import!).with([city.id], update_fields: ['name']).once
|
178
|
+
expect(CitiesIndex).to receive(:import!).with([other_city.id], update_fields: ['description']).once
|
179
|
+
Timecop.travel(20.seconds.ago) do
|
180
|
+
scheduler = Chewy::Strategy::DelayedSidekiq::Scheduler.new(CitiesIndex, [city.id], update_fields: ['name'])
|
181
|
+
scheduler.postpone
|
182
|
+
end
|
183
|
+
scheduler = Chewy::Strategy::DelayedSidekiq::Scheduler.new(CitiesIndex, [other_city.id], update_fields: ['description'])
|
184
|
+
scheduler.postpone
|
185
|
+
Chewy::Strategy::DelayedSidekiq::Worker.drain
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chewy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 7.
|
4
|
+
version: 7.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Toptal, LLC
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2023-04-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: database_cleaner
|
@@ -39,6 +39,20 @@ dependencies:
|
|
39
39
|
- - ">="
|
40
40
|
- !ruby/object:Gem::Version
|
41
41
|
version: '0'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: mock_redis
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '0'
|
49
|
+
type: :development
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
42
56
|
- !ruby/object:Gem::Dependency
|
43
57
|
name: rake
|
44
58
|
requirement: !ruby/object:Gem::Requirement
|
@@ -344,6 +358,9 @@ files:
|
|
344
358
|
- lib/chewy/strategy/atomic_no_refresh.rb
|
345
359
|
- lib/chewy/strategy/base.rb
|
346
360
|
- lib/chewy/strategy/bypass.rb
|
361
|
+
- lib/chewy/strategy/delayed_sidekiq.rb
|
362
|
+
- lib/chewy/strategy/delayed_sidekiq/scheduler.rb
|
363
|
+
- lib/chewy/strategy/delayed_sidekiq/worker.rb
|
347
364
|
- lib/chewy/strategy/lazy_sidekiq.rb
|
348
365
|
- lib/chewy/strategy/sidekiq.rb
|
349
366
|
- lib/chewy/strategy/urgent.rb
|
@@ -437,6 +454,7 @@ files:
|
|
437
454
|
- spec/chewy/strategy/active_job_spec.rb
|
438
455
|
- spec/chewy/strategy/atomic_no_refresh_spec.rb
|
439
456
|
- spec/chewy/strategy/atomic_spec.rb
|
457
|
+
- spec/chewy/strategy/delayed_sidekiq_spec.rb
|
440
458
|
- spec/chewy/strategy/lazy_sidekiq_spec.rb
|
441
459
|
- spec/chewy/strategy/sidekiq_spec.rb
|
442
460
|
- spec/chewy/strategy_spec.rb
|
@@ -464,7 +482,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
464
482
|
- !ruby/object:Gem::Version
|
465
483
|
version: '0'
|
466
484
|
requirements: []
|
467
|
-
rubygems_version: 3.
|
485
|
+
rubygems_version: 3.3.26
|
468
486
|
signing_key:
|
469
487
|
specification_version: 4
|
470
488
|
summary: Elasticsearch ODM client wrapper
|
@@ -554,6 +572,7 @@ test_files:
|
|
554
572
|
- spec/chewy/strategy/active_job_spec.rb
|
555
573
|
- spec/chewy/strategy/atomic_no_refresh_spec.rb
|
556
574
|
- spec/chewy/strategy/atomic_spec.rb
|
575
|
+
- spec/chewy/strategy/delayed_sidekiq_spec.rb
|
557
576
|
- spec/chewy/strategy/lazy_sidekiq_spec.rb
|
558
577
|
- spec/chewy/strategy/sidekiq_spec.rb
|
559
578
|
- spec/chewy/strategy_spec.rb
|