chewy 7.2.4 → 7.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/.github/CODEOWNERS +1 -0
  3. data/.github/dependabot.yml +42 -0
  4. data/.github/workflows/ruby.yml +26 -32
  5. data/.rubocop.yml +4 -1
  6. data/CHANGELOG.md +144 -0
  7. data/Gemfile +4 -4
  8. data/README.md +165 -10
  9. data/chewy.gemspec +4 -17
  10. data/gemfiles/base.gemfile +12 -0
  11. data/gemfiles/rails.6.1.activerecord.gemfile +2 -1
  12. data/gemfiles/rails.7.0.activerecord.gemfile +2 -1
  13. data/gemfiles/{rails.5.2.activerecord.gemfile → rails.7.1.activerecord.gemfile} +6 -3
  14. data/lib/chewy/config.rb +22 -14
  15. data/lib/chewy/elastic_client.rb +31 -0
  16. data/lib/chewy/errors.rb +5 -2
  17. data/lib/chewy/fields/base.rb +1 -1
  18. data/lib/chewy/fields/root.rb +1 -1
  19. data/lib/chewy/index/adapter/active_record.rb +13 -3
  20. data/lib/chewy/index/adapter/object.rb +3 -3
  21. data/lib/chewy/index/adapter/orm.rb +2 -2
  22. data/lib/chewy/index/crutch.rb +15 -7
  23. data/lib/chewy/index/import/bulk_builder.rb +6 -7
  24. data/lib/chewy/index/import/routine.rb +1 -1
  25. data/lib/chewy/index/import.rb +31 -4
  26. data/lib/chewy/index/observe/active_record_methods.rb +87 -0
  27. data/lib/chewy/index/observe/callback.rb +34 -0
  28. data/lib/chewy/index/observe.rb +3 -58
  29. data/lib/chewy/index/syncer.rb +1 -1
  30. data/lib/chewy/index.rb +25 -0
  31. data/lib/chewy/journal.rb +17 -6
  32. data/lib/chewy/log_subscriber.rb +5 -1
  33. data/lib/chewy/minitest/helpers.rb +1 -1
  34. data/lib/chewy/minitest/search_index_receiver.rb +3 -1
  35. data/lib/chewy/rake_helper.rb +74 -13
  36. data/lib/chewy/rspec/update_index.rb +13 -6
  37. data/lib/chewy/runtime/version.rb +1 -1
  38. data/lib/chewy/search/parameters/collapse.rb +16 -0
  39. data/lib/chewy/search/parameters/indices.rb +1 -1
  40. data/lib/chewy/search/parameters/knn.rb +16 -0
  41. data/lib/chewy/search/parameters/storage.rb +1 -1
  42. data/lib/chewy/search/parameters.rb +3 -3
  43. data/lib/chewy/search/request.rb +45 -11
  44. data/lib/chewy/search.rb +6 -3
  45. data/lib/chewy/stash.rb +3 -3
  46. data/lib/chewy/strategy/atomic_no_refresh.rb +18 -0
  47. data/lib/chewy/strategy/base.rb +10 -0
  48. data/lib/chewy/strategy/delayed_sidekiq/scheduler.rb +168 -0
  49. data/lib/chewy/strategy/delayed_sidekiq/worker.rb +76 -0
  50. data/lib/chewy/strategy/delayed_sidekiq.rb +30 -0
  51. data/lib/chewy/strategy/lazy_sidekiq.rb +64 -0
  52. data/lib/chewy/strategy.rb +3 -0
  53. data/lib/chewy/version.rb +1 -1
  54. data/lib/chewy.rb +5 -8
  55. data/lib/tasks/chewy.rake +17 -1
  56. data/migration_guide.md +1 -1
  57. data/spec/chewy/config_spec.rb +2 -2
  58. data/spec/chewy/elastic_client_spec.rb +26 -0
  59. data/spec/chewy/fields/base_spec.rb +1 -0
  60. data/spec/chewy/index/actions_spec.rb +4 -4
  61. data/spec/chewy/index/adapter/active_record_spec.rb +62 -0
  62. data/spec/chewy/index/import/bulk_builder_spec.rb +7 -3
  63. data/spec/chewy/index/import_spec.rb +16 -3
  64. data/spec/chewy/index/observe/active_record_methods_spec.rb +68 -0
  65. data/spec/chewy/index/observe/callback_spec.rb +139 -0
  66. data/spec/chewy/index/observe_spec.rb +27 -0
  67. data/spec/chewy/journal_spec.rb +13 -49
  68. data/spec/chewy/minitest/helpers_spec.rb +3 -3
  69. data/spec/chewy/minitest/search_index_receiver_spec.rb +6 -4
  70. data/spec/chewy/rake_helper_spec.rb +155 -4
  71. data/spec/chewy/rspec/helpers_spec.rb +1 -1
  72. data/spec/chewy/search/pagination/kaminari_examples.rb +1 -1
  73. data/spec/chewy/search/pagination/kaminari_spec.rb +1 -1
  74. data/spec/chewy/search/parameters/collapse_spec.rb +5 -0
  75. data/spec/chewy/search/parameters/knn_spec.rb +5 -0
  76. data/spec/chewy/search/request_spec.rb +37 -0
  77. data/spec/chewy/search_spec.rb +9 -0
  78. data/spec/chewy/strategy/active_job_spec.rb +8 -8
  79. data/spec/chewy/strategy/atomic_no_refresh_spec.rb +60 -0
  80. data/spec/chewy/strategy/delayed_sidekiq_spec.rb +208 -0
  81. data/spec/chewy/strategy/lazy_sidekiq_spec.rb +214 -0
  82. data/spec/chewy/strategy/sidekiq_spec.rb +4 -4
  83. data/spec/chewy_spec.rb +7 -4
  84. data/spec/spec_helper.rb +1 -1
  85. metadata +32 -253
  86. data/gemfiles/rails.6.0.activerecord.gemfile +0 -11
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0de5ea12714d98c68dc3d74d1b6bb9debefea211a025749b8958a804a285bdcd
4
- data.tar.gz: f9256684493364e7f6b1ae1b3da4d6376624369df3ff950750a6148cd7d6da2f
3
+ metadata.gz: a15165f889275fecc6a0d590c3339ce0ac9b7944f916306fd6883e7c2be67747
4
+ data.tar.gz: 8efc6201add68bf1c378f598934b6c52c74bd9a62056e7aafdd357358d5cd2b0
5
5
  SHA512:
6
- metadata.gz: f35594db25143614d6c88ac2aef7081975502e0434993e9b60495d6f3d63d2f13fee246275bd5b6b6c5b1deae4db06f5b734bf73369e77aeea22c7141b177417
7
- data.tar.gz: 92dbe74fb416105b4c38dcb95b40646c53597d17c2cdd2800107c27a979da24abceccb0d6ab51b0b3783f769327a3a9fed323b9d504de65cc3541e90df0bd273
6
+ metadata.gz: cfc7f1297fc72fcbdfdbedbcd82c6afe12dd9e15583b4e3467a8a77da89cd1aa8f20b9d0c0a8dd1841275e2916715030e28e9ed10cf510d6985904be9574890e
7
+ data.tar.gz: 6917027945ce94dab50d2f6d679570be58e4bb86472f1d591805d915ca425ecbd24e4cc13e30a459fe2dae4d572002735b69cef138a5c6038c52803a251b8a54
@@ -0,0 +1 @@
1
+ .github/workflows @toptal/platform-sre
@@ -0,0 +1,42 @@
1
+ version: 2
2
+ registries:
3
+ toptal-github:
4
+ type: "git"
5
+ url: "https://github.com"
6
+ username: "x-access-token"
7
+ password: "${{secrets.DEPENDABOT_GITHUB_TOKEN}}"
8
+
9
+ updates:
10
+ - package-ecosystem: bundler
11
+ directory: "/"
12
+ schedule:
13
+ interval: "weekly"
14
+ day: "wednesday"
15
+ time: "07:00"
16
+ pull-request-branch-name:
17
+ separator: "-"
18
+ labels:
19
+ - "no-jira"
20
+ - "ruby"
21
+ - "dependencies"
22
+ reviewers:
23
+ - "toptal/devx"
24
+ registries:
25
+ - toptal-github
26
+ insecure-external-code-execution: allow
27
+ open-pull-requests-limit: 3
28
+ - package-ecosystem: "github-actions"
29
+ directory: "/"
30
+ schedule:
31
+ interval: "weekly"
32
+ day: "wednesday"
33
+ time: "07:00"
34
+ pull-request-branch-name:
35
+ separator: "-"
36
+ labels:
37
+ - "no-jira"
38
+ - "dependencies"
39
+ - "gha"
40
+ reviewers:
41
+ - "toptal/devx"
42
+ open-pull-requests-limit: 3
@@ -1,48 +1,42 @@
1
1
  name: CI
2
2
 
3
- on: [push]
3
+ on:
4
+ push:
5
+ branches: [master]
6
+ pull_request:
7
+ types: [
8
+ synchronize, # PR was updated
9
+ opened, # PR was open
10
+ reopened # PR was reopened
11
+ ]
4
12
 
5
13
  jobs:
6
- ruby-2:
7
- runs-on: ubuntu-latest
8
- strategy:
9
- fail-fast: false
10
- matrix:
11
- ruby: [2.6, 2.7]
12
- gemfile: [rails.5.2.activerecord, rails.6.0.activerecord, rails.6.1.activerecord]
13
- name: ${{ matrix.ruby }}-${{ matrix.gemfile }}
14
-
15
- env:
16
- BUNDLE_GEMFILE: gemfiles/${{ matrix.gemfile }}.gemfile
17
-
18
- steps:
19
- - uses: actions/checkout@v2
20
- - uses: ruby/setup-ruby@v1
21
- with:
22
- ruby-version: ${{ matrix.ruby }}
23
- bundler-cache: true
24
- - name: Run Elasticsearch
25
- uses: elastic/elastic-github-actions/elasticsearch@9de0f78f306e4ebc0838f057e6b754364685e759
26
- with:
27
- stack-version: 7.10.1
28
- port: 9250
29
- - name: Tests
30
- run: bundle exec rspec
31
-
32
14
  ruby-3:
33
15
  runs-on: ubuntu-latest
34
16
  strategy:
35
17
  fail-fast: false
36
18
  matrix:
37
- ruby: [ '3.0', 3.1 ]
38
- gemfile: [ rails.6.1.activerecord, rails.7.0.activerecord ]
19
+ ruby: [ '3.0', '3.1', '3.2' ]
20
+ gemfile: [rails.6.1.activerecord, rails.7.0.activerecord, rails.7.1.activerecord]
39
21
  name: ${{ matrix.ruby }}-${{ matrix.gemfile }}
40
22
 
41
23
  env:
42
24
  BUNDLE_GEMFILE: gemfiles/${{ matrix.gemfile }}.gemfile
43
25
 
26
+ services:
27
+ redis:
28
+ # Docker Hub image
29
+ image: redis
30
+ ports:
31
+ - '6379:6379'
32
+ # Set health checks to wait until redis has started
33
+ options: >-
34
+ --health-cmd "redis-cli ping"
35
+ --health-interval 10s
36
+ --health-timeout 5s
37
+ --health-retries 5
44
38
  steps:
45
- - uses: actions/checkout@v2
39
+ - uses: actions/checkout@v4
46
40
  - uses: ruby/setup-ruby@v1
47
41
  with:
48
42
  ruby-version: ${{ matrix.ruby }}
@@ -58,9 +52,9 @@ jobs:
58
52
  rubocop:
59
53
  runs-on: ubuntu-latest
60
54
  steps:
61
- - uses: actions/checkout@v2
55
+ - uses: actions/checkout@v4
62
56
  - uses: ruby/setup-ruby@v1
63
57
  with:
64
- ruby-version: 2.7
58
+ ruby-version: 3.0
65
59
  bundler-cache: true
66
60
  - run: bundle exec rubocop --format simple
data/.rubocop.yml CHANGED
@@ -2,7 +2,7 @@ inherit_from: .rubocop_todo.yml
2
2
 
3
3
  AllCops:
4
4
  NewCops: enable
5
- TargetRubyVersion: 2.6
5
+ TargetRubyVersion: 3.0
6
6
 
7
7
  Layout/AccessModifierIndentation:
8
8
  EnforcedStyle: outdent
@@ -59,3 +59,6 @@ Metrics/ModuleLength:
59
59
  Exclude:
60
60
  - 'lib/chewy/rake_helper.rb'
61
61
  - '**/*_spec.rb'
62
+
63
+ Style/ArgumentsForwarding:
64
+ Enabled: false
data/CHANGELOG.md CHANGED
@@ -8,6 +8,149 @@
8
8
 
9
9
  ### Bugs Fixed
10
10
 
11
+ ## 7.6.0 (2024-05-03)
12
+
13
+ ### Changes
14
+
15
+ * [#933](https://github.com/toptal/chewy/pull/933): Relax allowed `elasticsearch` dependency versions. ([@mjankowski][])
16
+
17
+ ### Bugs Fixed
18
+ * [#937](https://github.com/toptal/chewy/pull/937): Fix for race condition while using the `delayed_sidekiq` strategy. Also, fix for Redis bloating in case of reindexing error ([@skcc321](https://github.com/skcc321))
19
+
20
+ * [#947](https://github.com/toptal/chewy/pull/947): Fix intermittent time-based failure in delayed sidekiq spec. ([@mjankowski][])
21
+
22
+ ## 7.5.1 (2024-01-30)
23
+
24
+ ### New Features
25
+
26
+ * [#925](https://github.com/toptal/chewy/pull/925): Add configuration option for default scope cleanup behavior. ([@barthez][])
27
+
28
+ ### Changes
29
+
30
+ ### Bugs Fixed
31
+
32
+ ## 7.5.0 (2024-01-15)
33
+
34
+ ### New Features
35
+
36
+ * [#894](https://github.com/toptal/chewy/pull/894): Way of cleaning redis from artifacts left by `delayed_sidekiq` strategy which could potentially cause flaky tests. ([@Drowze](https://github.com/Drowze))
37
+ * [#919](https://github.com/toptal/chewy/pull/919): Add pre-request filter ([@konalegi][https://github.com/konalegi])
38
+
39
+ ## 7.4.0 (2023-12-13)
40
+
41
+ ### New Features
42
+
43
+ ### Changes
44
+
45
+ * [#911](https://github.com/toptal/chewy/pull/911): Remove ruby 2.x. ([@konalegi][https://github.com/konalegi])
46
+
47
+ ### Bugs Fixed
48
+
49
+ ## 7.3.6 (2023-12-13)
50
+
51
+ ### New Features
52
+
53
+ * [#890](https://github.com/toptal/chewy/pull/890): Add the [`knn`](https://www.elastic.co/guide/en/elasticsearch/reference/current/knn-search.html) option to the request. ([@jkostolansky][])
54
+
55
+ ### Changes
56
+
57
+ ### Bugs Fixed
58
+
59
+ ## 7.3.5 (2023-12-06)
60
+
61
+ ### New Features
62
+
63
+ * [#907](https://github.com/toptal/chewy/pull/907): Fix deprecation warning in LogSubscriber for Rails 7.1 ([@alejandroperea](https://github.com/alejandroperea))
64
+
65
+ ### Changes
66
+
67
+ ### Bugs Fixed
68
+
69
+ ## 7.3.4 (2023-08-29)
70
+
71
+ ### New Features
72
+
73
+ * [#888](https://github.com/toptal/chewy/pull/892): Rake task to create missing indexes ([@konalegi](https://github.com/konalegi))
74
+
75
+ ### Changes
76
+
77
+ ### Bugs Fixed
78
+
79
+ ## 7.3.3 (2023-07-07)
80
+
81
+ ### New Features
82
+
83
+ * [#888](https://github.com/toptal/chewy/pull/888/files): Skip journal creation on import ([@konalegi](https://github.com/konalegi))
84
+
85
+ ### Changes
86
+
87
+ ### Bugs Fixed
88
+
89
+ ## 7.3.2 (2023-04-20)
90
+
91
+ ### New Features
92
+
93
+ ### Changes
94
+
95
+ ### Bugs Fixed
96
+
97
+ * [#861](https://github.com/toptal/chewy/pull/861): Fix bug in mock_elasticsearch_response_sources ([@lafeber](https://github.com/lafeber))
98
+
99
+ ## 7.3.1 (2023-04-20)
100
+
101
+ ### Bugs Fixed
102
+
103
+ * [#874](https://github.com/toptal/chewy/pull/874): Fix `chewy:journal:clean` task for ruby 3.x. ([@muk-ai](https://github.com/muk-ai))
104
+ * [#882](https://github.com/toptal/chewy/pull/882): Fix memory leak during `chewy:reset` for ruby 3.2 ([@konalegi](https://github.com/konalegi))
105
+
106
+ ## 7.3.0 (2023-04-03)
107
+
108
+ ### New Features
109
+
110
+ * [#869](https://github.com/toptal/chewy/pull/869): New strategy - `delayed_sidekiq`. Allow passing `strategy: :delayed_sidekiq` option to `SomeIndex.import([1, ...], strategy: :delayed_sidekiq)`. The strategy is compatible with `update_fields` option as well. ([@skcc321][])
111
+ * [#879](https://github.com/toptal/chewy/pull/879): Configure CI to check for ruby 3.2 compatibility. ([@konalegi][])
112
+
113
+ ### Changes
114
+
115
+ ### Bugs Fixed
116
+
117
+ * [#856](https://github.com/toptal/chewy/pull/856): Fix return value of subscribed_task_stats used in rake tasks. ([@fabiormoura][])
118
+
119
+ ## 7.2.7 (2022-11-15)
120
+
121
+ ### New Features
122
+
123
+ * [#857](https://github.com/toptal/chewy/pull/857): Allow passing `wait_for_completion`, `request_per_second` and `scroll_size` options to `chewy:journal:clean` rake task and `delete_all` query builder method. ([@konalegi][])([@barthez][])
124
+
125
+ ### Changes
126
+
127
+ ### Bugs Fixed
128
+
129
+ * [#863](https://github.com/toptal/chewy/pull/863): Fix `crutches` call doesn't respect `update_fields` option. ([@skcc321][])
130
+
131
+ ## 7.2.6 (2022-06-13)
132
+
133
+ ### New Features
134
+
135
+ * [#841](https://github.com/toptal/chewy/pull/841): Add the [`collapse`](https://www.elastic.co/guide/en/elasticsearch/reference/current/collapse-search-results.html) option to the request. ([@jkostolansky][])
136
+
137
+ ### Bugs Fixed
138
+
139
+ * [#842](https://github.com/toptal/chewy/issues/842): Fix `ignore_blank` handling. ([@rabotyaga][])
140
+ * [#848](https://github.com/toptal/chewy/issues/848): Fix invalid journal pagination. ([@konalegi][])
141
+
142
+ ## 7.2.5 (2022-03-04)
143
+
144
+ ### New Features
145
+
146
+ * [#827](https://github.com/toptal/chewy/pull/827): Add `:lazy_sidekiq` strategy, that defers not only importing but also `update_index` callback evaluation for created and updated objects. ([@sl4vr][])
147
+ * [#827](https://github.com/toptal/chewy/pull/827): Add `:atomic_no_refresh` strategy. Like `:atomic`, but `refresh=false` parameter is set. ([@barthez][])
148
+ * [#827](https://github.com/toptal/chewy/pull/827): Add `:no_refresh` chain call to `update_index` matcher to ensure import was called with `refresh=false`. ([@barthez][])
149
+
150
+ ### Bugs Fixed
151
+
152
+ * [#835](https://github.com/toptal/chewy/pull/835): Support keyword arguments in named scopes. ([@milk1000cc][])
153
+
11
154
  ## 7.2.4 (2022-02-03)
12
155
 
13
156
  ### New Features
@@ -684,6 +827,7 @@
684
827
  [@jimmybaker]: https://github.com/jimmybaker
685
828
  [@jirikolarik]: https://github.com/jirikolarik
686
829
  [@jirutka]: https://github.com/jirutka
830
+ [@jkostolansky]: https://github.com/jkostolansky
687
831
  [@joeljunstrom]: https://github.com/joeljunstrom
688
832
  [@jondavidford]: https://github.com/jondavidford
689
833
  [@joonty]: https://github.com/joonty
data/Gemfile CHANGED
@@ -1,7 +1,5 @@
1
1
  source 'https://rubygems.org'
2
2
 
3
- gemspec
4
-
5
3
  gem 'activerecord'
6
4
 
7
5
  gem 'activejob', require: false
@@ -18,5 +16,7 @@ gem 'guard-rspec'
18
16
  gem 'redcarpet'
19
17
  gem 'yard'
20
18
 
21
- gem 'rexml' if RUBY_VERSION >= '3.0.0'
22
- gem 'ruby2_keywords' if RUBY_VERSION < '2.7'
19
+ gem 'rexml'
20
+
21
+ eval_gemfile 'gemfiles/base.gemfile'
22
+ gemspec
data/README.md CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  # Chewy
7
7
 
8
- Chewy is an ODM (Object Document Mapper), built on top of the [the official Elasticsearch client](https://github.com/elastic/elasticsearch-ruby).
8
+ Chewy is an ODM (Object Document Mapper), built on top of [the official Elasticsearch client](https://github.com/elastic/elasticsearch-ruby).
9
9
 
10
10
  ## Why Chewy?
11
11
 
@@ -43,7 +43,7 @@ Or install it yourself as:
43
43
 
44
44
  ### Ruby
45
45
 
46
- Chewy is compatible with MRI 2.6-3.0¹.
46
+ Chewy is compatible with MRI 3.0-3.2¹.
47
47
 
48
48
  > ¹ Ruby 3 is only supported with Rails 6.1
49
49
 
@@ -458,7 +458,7 @@ field :hierarchy_link, type: :join, relations: {question: %i[answer comment], an
458
458
  ```
459
459
  assuming you have `comment_type` and `commented_id` fields in your model.
460
460
 
461
- Note that when you reindex a parent, it's children and grandchildren will be reindexed as well.
461
+ Note that when you reindex a parent, its children and grandchildren will be reindexed as well.
462
462
  This may require additional queries to the primary database and to elastisearch.
463
463
 
464
464
  Also note that the join field doesn't support crutches (it should be a field directly defined on the model).
@@ -503,7 +503,7 @@ class ProductsIndex < Chewy::Index
503
503
 
504
504
  field :name
505
505
  # simply use crutch-fetched data as a value:
506
- field :category_names, value: ->(product, crutches) { crutches.categories[product.id] }
506
+ field :category_names, value: ->(product, crutches) { crutches[:categories][product.id] }
507
507
  end
508
508
  ```
509
509
 
@@ -525,7 +525,7 @@ So Chewy Crutches™ technology is able to increase your indexing performance in
525
525
 
526
526
  ### Witchcraft™ technology
527
527
 
528
- One more experimental technology to increase import performance. As far as you know, chewy defines value proc for every imported field in mapping, so at the import time each of this procs is executed on imported object to extract result document to import. It would be great for performance to use one huge whole-document-returning proc instead. So basically the idea or Witchcraft™ technology is to compile a single document-returning proc from the index definition.
528
+ One more experimental technology to increase import performance. As far as you know, chewy defines value proc for every imported field in mapping, so at the import time each of these procs is executed on imported object to extract result document to import. It would be great for performance to use one huge whole-document-returning proc instead. So basically the idea or Witchcraft™ technology is to compile a single document-returning proc from the index definition.
529
529
 
530
530
  ```ruby
531
531
  index_scope Product
@@ -569,7 +569,7 @@ Obviously not every type of definition might be compiled. There are some restric
569
569
  end
570
570
  ```
571
571
 
572
- However, it is quite possible that your index definition will be supported by Witchcraft™ technology out of the box in the most of the cases.
572
+ However, it is quite possible that your index definition will be supported by Witchcraft™ technology out of the box in most of the cases.
573
573
 
574
574
  ### Raw Import
575
575
 
@@ -675,7 +675,9 @@ end
675
675
 
676
676
  You may be wondering why do you need it? The answer is simple: not to lose the data.
677
677
 
678
- Imagine that you reset your index in a zero-downtime manner (to separate index), and at the meantime somebody keeps updating the data frequently (to old index). So all these actions will be written to the journal index and you'll be able to apply them after index reset using the `Chewy::Journal` interface.
678
+ Imagine that you reset your index in a zero-downtime manner (to separate index), and in the meantime somebody keeps updating the data frequently (to old index). So all these actions will be written to the journal index and you'll be able to apply them after index reset using the `Chewy::Journal` interface.
679
+
680
+ When enabled, journal can grow to enormous size, consider setting up cron job that would clean it occasionally using [`chewy:journal:clean` rake task](#chewyjournal).
679
681
 
680
682
  ### Index manipulation
681
683
 
@@ -694,6 +696,7 @@ UsersIndex.import User.where('rating > 100') # or import specified users scope
694
696
  UsersIndex.import User.where('rating > 100').to_a # or import specified users array
695
697
  UsersIndex.import [1, 2, 42] # pass even ids for import, it will be handled in the most effective way
696
698
  UsersIndex.import User.where('rating > 100'), update_fields: [:email] # if update fields are specified - it will update their values only with the `update` bulk action
699
+ UsersIndex.import! # raises an exception in case of any import errors
697
700
 
698
701
  UsersIndex.reset! # purges index and imports default data for all types
699
702
  ```
@@ -754,6 +757,106 @@ The default queue name is `chewy`, you can customize it in settings: `sidekiq.qu
754
757
  Chewy.settings[:sidekiq] = {queue: :low}
755
758
  ```
756
759
 
760
+ #### `:lazy_sidekiq`
761
+
762
+ This does the same thing as `:sidekiq`, but with lazy evaluation. Beware it does not allow you to use any non-persistent record state for indices and conditions because record will be re-fetched from database asynchronously using sidekiq. However for destroying records strategy will fallback to `:sidekiq` because it's not possible to re-fetch deleted records from database.
763
+
764
+ The purpose of this strategy is to improve the response time of the code that should update indexes, as it does not only defer actual ES calls to a background job but `update_index` callbacks evaluation (for created and updated objects) too. Similar to `:sidekiq`, index update is asynchronous so this strategy cannot be used when data and index synchronization is required.
765
+
766
+ ```ruby
767
+ Chewy.strategy(:lazy_sidekiq) do
768
+ City.popular.map(&:do_some_update_action!)
769
+ end
770
+ ```
771
+
772
+ The default queue name is `chewy`, you can customize it in settings: `sidekiq.queue_name`
773
+ ```
774
+ Chewy.settings[:sidekiq] = {queue: :low}
775
+ ```
776
+
777
+ #### `:delayed_sidekiq`
778
+
779
+ It accumulates IDs of records to be reindexed during the latency window in Redis and then performs the reindexing of all accumulated records at once.
780
+ This strategy is very useful in the case of frequently mutated records.
781
+ It supports the `update_fields` option, so it will attempt to select just enough data from the database.
782
+
783
+ Keep in mind, this strategy does not guarantee reindexing in the event of Sidekiq worker termination or an error during the reindexing phase.
784
+ This behavior is intentional to prevent continuous growth of Redis db.
785
+
786
+ There are three options that can be defined in the index:
787
+ ```ruby
788
+ class CitiesIndex...
789
+ strategy_config delayed_sidekiq: {
790
+ latency: 3,
791
+ margin: 2,
792
+ ttl: 60 * 60 * 24,
793
+ reindex_wrapper: ->(&reindex) {
794
+ ActiveRecord::Base.connected_to(role: :reading) { reindex.call }
795
+ }
796
+ # latency - will prevent scheduling identical jobs
797
+ # margin - main purpose is to cover db replication lag by the margin
798
+ # ttl - a chunk expiration time (in seconds)
799
+ # reindex_wrapper - lambda that accepts block to wrap that reindex process AR connection block.
800
+ }
801
+
802
+ ...
803
+ end
804
+ ```
805
+
806
+ Also you can define defaults in the `initializers/chewy.rb`
807
+ ```ruby
808
+ Chewy.settings = {
809
+ strategy_config: {
810
+ delayed_sidekiq: {
811
+ latency: 3,
812
+ margin: 2,
813
+ ttl: 60 * 60 * 24,
814
+ reindex_wrapper: ->(&reindex) {
815
+ ActiveRecord::Base.connected_to(role: :reading) { reindex.call }
816
+ }
817
+ }
818
+ }
819
+ }
820
+
821
+ ```
822
+ or in `config/chewy.yml`
823
+ ```ruby
824
+ strategy_config:
825
+ delayed_sidekiq:
826
+ latency: 3
827
+ margin: 2
828
+ ttl: <%= 60 * 60 * 24 %>
829
+ # reindex_wrapper setting is not possible here!!! use the initializer instead
830
+ ```
831
+
832
+ You can use the strategy identically to other strategies
833
+ ```ruby
834
+ Chewy.strategy(:delayed_sidekiq) do
835
+ City.popular.map(&:do_some_update_action!)
836
+ end
837
+ ```
838
+
839
+ The default queue name is `chewy`, you can customize it in settings: `sidekiq.queue_name`
840
+ ```
841
+ Chewy.settings[:sidekiq] = {queue: :low}
842
+ ```
843
+
844
+ Explicit call of the reindex using `:delayed_sidekiq strategy`
845
+ ```ruby
846
+ CitiesIndex.import([1, 2, 3], strategy: :delayed_sidekiq)
847
+ ```
848
+
849
+ Explicit call of the reindex using `:delayed_sidekiq` strategy with `:update_fields` support
850
+ ```ruby
851
+ CitiesIndex.import([1, 2, 3], update_fields: [:name], strategy: :delayed_sidekiq)
852
+ ```
853
+
854
+ While running tests with delayed_sidekiq strategy and Sidekiq is using a real redis instance that is NOT cleaned up in between tests (via e.g. `Sidekiq.redis(&:flushdb)`), you'll want to cleanup some redis keys in between tests to avoid state leaking and flaky tests. Chewy provides a convenience method for that:
855
+ ```ruby
856
+ # it might be a good idea to also add to your testing setup, e.g.: a rspec `before` hook
857
+ Chewy::Strategy::DelayedSidekiq.clear_timechunks!
858
+ ```
859
+
757
860
  #### `:active_job`
758
861
 
759
862
  This does the same thing as `:atomic`, but using ActiveJob. This will inherit the ActiveJob configuration settings including the `active_job.queue_adapter` setting for the environment. Patch `Chewy::Strategy::ActiveJob::Worker` for index updates improving.
@@ -790,7 +893,9 @@ It is convenient for use in e.g. the Rails console with non-block notation:
790
893
 
791
894
  #### `:bypass`
792
895
 
793
- The bypass strategy simply silences index updates.
896
+ When the bypass strategy is active the index will not be automatically updated on object save.
897
+
898
+ For example, on `City.first.save!` the cities index would not be updated.
794
899
 
795
900
  #### Nesting
796
901
 
@@ -868,7 +973,7 @@ Chewy has notifying the following events:
868
973
  {index: 30, delete: 5}
869
974
  ```
870
975
 
871
- * `payload[:errors]`: might not exists. Contains grouped errors with objects ids list:
976
+ * `payload[:errors]`: might not exist. Contains grouped errors with objects ids list:
872
977
 
873
978
  ```ruby
874
979
  {index: {
@@ -1000,7 +1105,7 @@ Request DSL also provides additional scope actions, like `delete_all`, `exists?`
1000
1105
 
1001
1106
  #### Pagination
1002
1107
 
1003
- The request DSL supports pagination with `Kaminari`. An extension is enabled on initializtion if `Kaminari` is available. See [Chewy::Search](lib/chewy/search.rb) and [Chewy::Search::Pagination::Kaminari](lib/chewy/search/pagination/kaminari.rb) for details.
1108
+ The request DSL supports pagination with `Kaminari`. An extension is enabled on initialization if `Kaminari` is available. See [Chewy::Search](lib/chewy/search.rb) and [Chewy::Search::Pagination::Kaminari](lib/chewy/search/pagination/kaminari.rb) for details.
1004
1109
 
1005
1110
  #### Named scopes
1006
1111
 
@@ -1101,6 +1206,10 @@ Right now the approach is that if some data had been updated, but index definiti
1101
1206
 
1102
1207
  Also, there is always full reset alternative with `rake chewy:reset`.
1103
1208
 
1209
+ #### `chewy:create_missing_indexes`
1210
+
1211
+ This rake task creates newly defined indexes in ElasticSearch and skips existing ones. Useful for production-like environments.
1212
+
1104
1213
  #### Parallelizing rake tasks
1105
1214
 
1106
1215
  Every task described above has its own parallel version. Every parallel rake task takes the number for processes for execution as the first argument and the rest of the arguments are exactly the same as for the non-parallel task version.
@@ -1126,6 +1235,17 @@ rake chewy:journal:apply["$(date -v-1H -u +%FT%TZ)"] # apply journaled changes f
1126
1235
  rake chewy:journal:apply["$(date -v-1H -u +%FT%TZ)",users] # apply journaled changes for the past hour on UsersIndex only
1127
1236
  ```
1128
1237
 
1238
+ When the size of the journal becomes very large, the classical way of deletion would be obstructive and resource consuming. Fortunately, Chewy internally uses [delete-by-query](https://www.elastic.co/guide/en/elasticsearch/reference/7.17/docs-delete-by-query.html#docs-delete-by-query-task-api) ES function which supports async execution with batching and [throttling](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-delete-by-query.html#docs-delete-by-query-throttle).
1239
+
1240
+ The available options, which can be set by ENV variables, are listed below:
1241
+ * `WAIT_FOR_COMPLETION` - a boolean flag. It controls async execution. It waits by default. When set to `false` (`0`, `f`, `false` or `off` in any case spelling is accepted as `false`), Elasticsearch performs some preflight checks, launches the request, and returns a task reference you can use to cancel the task or get its status.
1242
+ * `REQUESTS_PER_SECOND` - float. The throttle for this request in sub-requests per second. No throttling is enforced by default.
1243
+ * `SCROLL_SIZE` - integer. The number of documents to be deleted in single sub-request. The default batch size is 1000.
1244
+
1245
+ ```bash
1246
+ rake chewy:journal:clean WAIT_FOR_COMPLETION=false REQUESTS_PER_SECOND=10 SCROLL_SIZE=5000
1247
+ ```
1248
+
1129
1249
  ### RSpec integration
1130
1250
 
1131
1251
  Just add `require 'chewy/rspec'` to your spec_helper.rb and you will get additional features:
@@ -1164,6 +1284,41 @@ If you use `DatabaseCleaner` in your tests with [the `transaction` strategy](htt
1164
1284
  Chewy.use_after_commit_callbacks = !Rails.env.test?
1165
1285
  ```
1166
1286
 
1287
+ ### Pre-request Filter
1288
+
1289
+ Should you need to inspect the query prior to it being dispatched to ElasticSearch during any queries, you can use the `before_es_request_filter`. `before_es_request_filter` is a callable object, as demonstrated below:
1290
+
1291
+ ```ruby
1292
+ Chewy.before_es_request_filter = -> (method_name, args, kw_args) { ... }
1293
+ ```
1294
+
1295
+ While using the `before_es_request_filter`, please consider the following:
1296
+
1297
+ * `before_es_request_filter` acts as a simple proxy before any request made via the `ElasticSearch::Client`. The arguments passed to this filter include:
1298
+ * `method_name` - The name of the method being called. Examples are search, count, bulk and etc.
1299
+ * `args` and `kw_args` - These are the positional arguments provided in the method call.
1300
+ * The operation is synchronous, so avoid executing any heavy or time-consuming operations within the filter to prevent performance degradation.
1301
+ * The return value of the proc is disregarded. This filter is intended for inspection or modification of the query rather than generating a response.
1302
+ * Any exception raised inside the callback will propagate upward and halt the execution of the query. It is essential to handle potential errors adequately to ensure the stability of your search functionality.
1303
+
1304
+ ### Import scope clean-up behavior
1305
+
1306
+ Whenever you set the `import_scope` for the index, in the case of ActiveRecord,
1307
+ options for order, offset and limit will be removed. You can set the behavior of
1308
+ chewy, before the clean-up itself.
1309
+
1310
+ The default behavior is a warning sent to the Chewy logger (`:warn`). Another more
1311
+ restrictive option is raising an exception (`:raise`). Both options have a
1312
+ negative impact on performance since verifying whether the code uses any of
1313
+ these options requires building AREL query.
1314
+
1315
+ To avoid the loading time impact, you can ignore the check (`:ignore`) before
1316
+ the clean-up.
1317
+
1318
+ ```
1319
+ Chewy.import_scope_cleanup_behavior = :ignore
1320
+ ```
1321
+
1167
1322
  ## Contributing
1168
1323
 
1169
1324
  1. Fork it (http://github.com/toptal/chewy/fork)
data/chewy.gemspec CHANGED
@@ -2,7 +2,7 @@ lib = File.expand_path('lib', __dir__)
2
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
3
  require 'chewy/version'
4
4
 
5
- Gem::Specification.new do |spec| # rubocop:disable Metrics/BlockLength
5
+ Gem::Specification.new do |spec|
6
6
  spec.name = 'chewy'
7
7
  spec.version = Chewy::VERSION
8
8
  spec.authors = ['Toptal, LLC', 'pyromaniac']
@@ -14,23 +14,10 @@ Gem::Specification.new do |spec| # rubocop:disable Metrics/BlockLength
14
14
 
15
15
  spec.files = `git ls-files`.split($RS)
16
16
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
- spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
17
  spec.require_paths = ['lib']
19
18
 
20
- spec.add_development_dependency 'database_cleaner'
21
- spec.add_development_dependency 'elasticsearch-extensions'
22
- spec.add_development_dependency 'rake'
23
- spec.add_development_dependency 'rspec', '>= 3.7.0'
24
- spec.add_development_dependency 'rspec-collection_matchers'
25
- spec.add_development_dependency 'rspec-its'
26
- spec.add_development_dependency 'rubocop', '1.11'
27
- spec.add_development_dependency 'sqlite3'
28
- spec.add_development_dependency 'timecop'
29
-
30
- spec.add_development_dependency 'method_source'
31
- spec.add_development_dependency 'unparser'
32
-
33
- spec.add_dependency 'activesupport', '>= 5.2'
34
- spec.add_dependency 'elasticsearch', '>= 7.12.0', '< 7.14.0'
19
+ spec.add_dependency 'activesupport', '>= 5.2' # Remove with major version bump, 8.x
20
+ spec.add_dependency 'elasticsearch', '>= 7.14.0', '< 8'
35
21
  spec.add_dependency 'elasticsearch-dsl'
22
+ spec.metadata['rubygems_mfa_required'] = 'true'
36
23
  end
@@ -0,0 +1,12 @@
1
+ gem 'database_cleaner'
2
+ gem 'elasticsearch-extensions'
3
+ gem 'method_source'
4
+ gem 'rake'
5
+ gem 'redis', require: false
6
+ gem 'rspec', '>= 3.7.0'
7
+ gem 'rspec-collection_matchers'
8
+ gem 'rspec-its'
9
+ gem 'rubocop', '1.63.4'
10
+ gem 'sqlite3', '~> 1.4'
11
+ gem 'timecop'
12
+ gem 'unparser'
@@ -8,6 +8,7 @@ gem 'parallel', require: false
8
8
  gem 'rspec_junit_formatter', '~> 0.4.1'
9
9
  gem 'sidekiq', require: false
10
10
 
11
- gem 'rexml' if RUBY_VERSION >= '3.0.0'
11
+ gem 'rexml'
12
12
 
13
13
  gemspec path: '../'
14
+ eval_gemfile 'base.gemfile'