chewy 7.2.1 → 7.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (108) hide show
  1. checksums.yaml +4 -4
  2. data/.github/CODEOWNERS +1 -0
  3. data/.github/dependabot.yml +42 -0
  4. data/.github/workflows/ruby.yml +28 -26
  5. data/.rubocop.yml +4 -1
  6. data/CHANGELOG.md +196 -0
  7. data/Gemfile +4 -3
  8. data/README.md +203 -20
  9. data/chewy.gemspec +4 -18
  10. data/gemfiles/base.gemfile +12 -0
  11. data/gemfiles/rails.6.1.activerecord.gemfile +2 -1
  12. data/gemfiles/{rails.5.2.activerecord.gemfile → rails.7.0.activerecord.gemfile} +6 -3
  13. data/gemfiles/{rails.6.0.activerecord.gemfile → rails.7.1.activerecord.gemfile} +6 -3
  14. data/lib/chewy/config.rb +22 -14
  15. data/lib/chewy/elastic_client.rb +31 -0
  16. data/lib/chewy/errors.rb +11 -2
  17. data/lib/chewy/fields/base.rb +69 -13
  18. data/lib/chewy/fields/root.rb +2 -10
  19. data/lib/chewy/index/actions.rb +11 -16
  20. data/lib/chewy/index/adapter/active_record.rb +18 -3
  21. data/lib/chewy/index/adapter/object.rb +0 -10
  22. data/lib/chewy/index/adapter/orm.rb +4 -14
  23. data/lib/chewy/index/crutch.rb +15 -7
  24. data/lib/chewy/index/import/bulk_builder.rb +219 -32
  25. data/lib/chewy/index/import/bulk_request.rb +1 -1
  26. data/lib/chewy/index/import/routine.rb +3 -3
  27. data/lib/chewy/index/import.rb +45 -31
  28. data/lib/chewy/index/mapping.rb +2 -2
  29. data/lib/chewy/index/observe/active_record_methods.rb +87 -0
  30. data/lib/chewy/index/observe/callback.rb +34 -0
  31. data/lib/chewy/index/observe.rb +3 -58
  32. data/lib/chewy/index/syncer.rb +1 -1
  33. data/lib/chewy/index.rb +25 -0
  34. data/lib/chewy/journal.rb +17 -6
  35. data/lib/chewy/log_subscriber.rb +5 -1
  36. data/lib/chewy/minitest/helpers.rb +77 -0
  37. data/lib/chewy/minitest/search_index_receiver.rb +3 -1
  38. data/lib/chewy/rake_helper.rb +92 -11
  39. data/lib/chewy/rspec/build_query.rb +12 -0
  40. data/lib/chewy/rspec/helpers.rb +55 -0
  41. data/lib/chewy/rspec/update_index.rb +14 -7
  42. data/lib/chewy/rspec.rb +2 -0
  43. data/lib/chewy/runtime/version.rb +1 -1
  44. data/lib/chewy/runtime.rb +1 -1
  45. data/lib/chewy/search/parameters/collapse.rb +16 -0
  46. data/lib/chewy/search/parameters/ignore_unavailable.rb +27 -0
  47. data/lib/chewy/search/parameters/indices.rb +1 -1
  48. data/lib/chewy/search/parameters/knn.rb +16 -0
  49. data/lib/chewy/search/parameters/order.rb +6 -19
  50. data/lib/chewy/search/parameters/storage.rb +1 -1
  51. data/lib/chewy/search/parameters/track_total_hits.rb +16 -0
  52. data/lib/chewy/search/parameters.rb +4 -4
  53. data/lib/chewy/search/request.rb +74 -16
  54. data/lib/chewy/search/scoping.rb +1 -1
  55. data/lib/chewy/search.rb +5 -2
  56. data/lib/chewy/stash.rb +3 -3
  57. data/lib/chewy/strategy/active_job.rb +1 -1
  58. data/lib/chewy/strategy/atomic_no_refresh.rb +18 -0
  59. data/lib/chewy/strategy/base.rb +10 -0
  60. data/lib/chewy/strategy/delayed_sidekiq/scheduler.rb +168 -0
  61. data/lib/chewy/strategy/delayed_sidekiq/worker.rb +76 -0
  62. data/lib/chewy/strategy/delayed_sidekiq.rb +30 -0
  63. data/lib/chewy/strategy/lazy_sidekiq.rb +64 -0
  64. data/lib/chewy/strategy/sidekiq.rb +1 -1
  65. data/lib/chewy/strategy.rb +3 -0
  66. data/lib/chewy/version.rb +1 -1
  67. data/lib/chewy.rb +21 -14
  68. data/lib/tasks/chewy.rake +18 -2
  69. data/migration_guide.md +1 -1
  70. data/spec/chewy/config_spec.rb +2 -2
  71. data/spec/chewy/elastic_client_spec.rb +26 -0
  72. data/spec/chewy/fields/base_spec.rb +39 -18
  73. data/spec/chewy/index/actions_spec.rb +10 -10
  74. data/spec/chewy/index/adapter/active_record_spec.rb +88 -0
  75. data/spec/chewy/index/import/bulk_builder_spec.rb +309 -1
  76. data/spec/chewy/index/import/routine_spec.rb +5 -5
  77. data/spec/chewy/index/import_spec.rb +48 -26
  78. data/spec/chewy/index/observe/active_record_methods_spec.rb +68 -0
  79. data/spec/chewy/index/observe/callback_spec.rb +139 -0
  80. data/spec/chewy/index/observe_spec.rb +27 -0
  81. data/spec/chewy/journal_spec.rb +13 -49
  82. data/spec/chewy/minitest/helpers_spec.rb +111 -1
  83. data/spec/chewy/minitest/search_index_receiver_spec.rb +6 -4
  84. data/spec/chewy/rake_helper_spec.rb +170 -0
  85. data/spec/chewy/rspec/build_query_spec.rb +34 -0
  86. data/spec/chewy/rspec/helpers_spec.rb +61 -0
  87. data/spec/chewy/search/pagination/kaminari_examples.rb +1 -1
  88. data/spec/chewy/search/pagination/kaminari_spec.rb +1 -1
  89. data/spec/chewy/search/parameters/collapse_spec.rb +5 -0
  90. data/spec/chewy/search/parameters/ignore_unavailable_spec.rb +67 -0
  91. data/spec/chewy/search/parameters/knn_spec.rb +5 -0
  92. data/spec/chewy/search/parameters/order_spec.rb +18 -11
  93. data/spec/chewy/search/parameters/track_total_hits_spec.rb +5 -0
  94. data/spec/chewy/search/parameters_spec.rb +6 -1
  95. data/spec/chewy/search/request_spec.rb +58 -9
  96. data/spec/chewy/search_spec.rb +9 -0
  97. data/spec/chewy/strategy/active_job_spec.rb +8 -8
  98. data/spec/chewy/strategy/atomic_no_refresh_spec.rb +60 -0
  99. data/spec/chewy/strategy/delayed_sidekiq_spec.rb +208 -0
  100. data/spec/chewy/strategy/lazy_sidekiq_spec.rb +214 -0
  101. data/spec/chewy/strategy/sidekiq_spec.rb +4 -4
  102. data/spec/chewy_spec.rb +10 -7
  103. data/spec/spec_helper.rb +1 -2
  104. data/spec/support/active_record.rb +8 -1
  105. metadata +45 -264
  106. data/lib/chewy/backports/deep_dup.rb +0 -46
  107. data/lib/chewy/backports/duplicable.rb +0 -91
  108. data/lib/chewy/index/import/thread_safe_progress_bar.rb +0 -40
data/README.md CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  # Chewy
7
7
 
8
- Chewy is an ODM (Object Document Mapper), built on top of the [the official Elasticsearch client](https://github.com/elastic/elasticsearch-ruby).
8
+ Chewy is an ODM (Object Document Mapper), built on top of [the official Elasticsearch client](https://github.com/elastic/elasticsearch-ruby).
9
9
 
10
10
  ## Why Chewy?
11
11
 
@@ -43,7 +43,7 @@ Or install it yourself as:
43
43
 
44
44
  ### Ruby
45
45
 
46
- Chewy is compatible with MRI 2.6-3.0¹.
46
+ Chewy is compatible with MRI 3.0-3.2¹.
47
47
 
48
48
  > ¹ Ruby 3 is only supported with Rails 6.1
49
49
 
@@ -53,7 +53,7 @@ Chewy is compatible with MRI 2.6-3.0¹.
53
53
  | ------------- | ---------------------------------- |
54
54
  | 7.2.x | 7.x |
55
55
  | 7.1.x | 7.x |
56
- | 7.0.0 | 6.8, 7.x |
56
+ | 7.0.x | 6.8, 7.x |
57
57
  | 6.0.0 | 5.x, 6.x |
58
58
  | 5.x | 5.x, limited support for 1.x & 2.x |
59
59
 
@@ -340,7 +340,7 @@ Chewy.settings = {
340
340
  [See index settings here](https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-update-settings.html).
341
341
  [See root object settings here](https://www.elastic.co/guide/en/elasticsearch/reference/current/dynamic-field-mapping.html).
342
342
 
343
- See [mapping.rb](lib/chewy/type/mapping.rb) for more details.
343
+ See [mapping.rb](lib/chewy/index/mapping.rb) for more details.
344
344
 
345
345
  5. Add model-observing code
346
346
 
@@ -446,6 +446,23 @@ end
446
446
 
447
447
  See the section on *Script fields* for details on calculating distance in a search.
448
448
 
449
+ ### Join fields
450
+
451
+ You can use a [join field](https://www.elastic.co/guide/en/elasticsearch/reference/current/parent-join.html)
452
+ to implement parent-child relationships between documents.
453
+ It [replaces the old `parent_id` based parent-child mapping](https://www.elastic.co/guide/en/elasticsearch/reference/current/removal-of-types.html#parent-child-mapping-types)
454
+
455
+ To use it, you need to pass `relations` and `join` (with `type` and `id`) options:
456
+ ```ruby
457
+ field :hierarchy_link, type: :join, relations: {question: %i[answer comment], answer: :vote, vote: :subvote}, join: {type: :comment_type, id: :commented_id}
458
+ ```
459
+ assuming you have `comment_type` and `commented_id` fields in your model.
460
+
461
+ Note that when you reindex a parent, its children and grandchildren will be reindexed as well.
462
+ This may require additional queries to the primary database and to elastisearch.
463
+
464
+ Also note that the join field doesn't support crutches (it should be a field directly defined on the model).
465
+
449
466
  ### Crutches™ technology
450
467
 
451
468
  Assume you are defining your index like this (product has_many categories through product_categories):
@@ -486,7 +503,7 @@ class ProductsIndex < Chewy::Index
486
503
 
487
504
  field :name
488
505
  # simply use crutch-fetched data as a value:
489
- field :category_names, value: ->(product, crutches) { crutches.categories[product.id] }
506
+ field :category_names, value: ->(product, crutches) { crutches[:categories][product.id] }
490
507
  end
491
508
  ```
492
509
 
@@ -508,7 +525,7 @@ So Chewy Crutches™ technology is able to increase your indexing performance in
508
525
 
509
526
  ### Witchcraft™ technology
510
527
 
511
- One more experimental technology to increase import performance. As far as you know, chewy defines value proc for every imported field in mapping, so at the import time each of this procs is executed on imported object to extract result document to import. It would be great for performance to use one huge whole-document-returning proc instead. So basically the idea or Witchcraft™ technology is to compile a single document-returning proc from the index definition.
528
+ One more experimental technology to increase import performance. As far as you know, chewy defines value proc for every imported field in mapping, so at the import time each of these procs is executed on imported object to extract result document to import. It would be great for performance to use one huge whole-document-returning proc instead. So basically the idea or Witchcraft™ technology is to compile a single document-returning proc from the index definition.
512
529
 
513
530
  ```ruby
514
531
  index_scope Product
@@ -552,7 +569,7 @@ Obviously not every type of definition might be compiled. There are some restric
552
569
  end
553
570
  ```
554
571
 
555
- However, it is quite possible that your index definition will be supported by Witchcraft™ technology out of the box in the most of the cases.
572
+ However, it is quite possible that your index definition will be supported by Witchcraft™ technology out of the box in most of the cases.
556
573
 
557
574
  ### Raw Import
558
575
 
@@ -658,7 +675,9 @@ end
658
675
 
659
676
  You may be wondering why do you need it? The answer is simple: not to lose the data.
660
677
 
661
- Imagine that you reset your index in a zero-downtime manner (to separate index), and at the meantime somebody keeps updating the data frequently (to old index). So all these actions will be written to the journal index and you'll be able to apply them after index reset using the `Chewy::Journal` interface.
678
+ Imagine that you reset your index in a zero-downtime manner (to separate index), and in the meantime somebody keeps updating the data frequently (to old index). So all these actions will be written to the journal index and you'll be able to apply them after index reset using the `Chewy::Journal` interface.
679
+
680
+ When enabled, journal can grow to enormous size, consider setting up cron job that would clean it occasionally using [`chewy:journal:clean` rake task](#chewyjournal).
662
681
 
663
682
  ### Index manipulation
664
683
 
@@ -677,6 +696,7 @@ UsersIndex.import User.where('rating > 100') # or import specified users scope
677
696
  UsersIndex.import User.where('rating > 100').to_a # or import specified users array
678
697
  UsersIndex.import [1, 2, 42] # pass even ids for import, it will be handled in the most effective way
679
698
  UsersIndex.import User.where('rating > 100'), update_fields: [:email] # if update fields are specified - it will update their values only with the `update` bulk action
699
+ UsersIndex.import! # raises an exception in case of any import errors
680
700
 
681
701
  UsersIndex.reset! # purges index and imports default data for all types
682
702
  ```
@@ -737,6 +757,106 @@ The default queue name is `chewy`, you can customize it in settings: `sidekiq.qu
737
757
  Chewy.settings[:sidekiq] = {queue: :low}
738
758
  ```
739
759
 
760
+ #### `:lazy_sidekiq`
761
+
762
+ This does the same thing as `:sidekiq`, but with lazy evaluation. Beware it does not allow you to use any non-persistent record state for indices and conditions because record will be re-fetched from database asynchronously using sidekiq. However for destroying records strategy will fallback to `:sidekiq` because it's not possible to re-fetch deleted records from database.
763
+
764
+ The purpose of this strategy is to improve the response time of the code that should update indexes, as it does not only defer actual ES calls to a background job but `update_index` callbacks evaluation (for created and updated objects) too. Similar to `:sidekiq`, index update is asynchronous so this strategy cannot be used when data and index synchronization is required.
765
+
766
+ ```ruby
767
+ Chewy.strategy(:lazy_sidekiq) do
768
+ City.popular.map(&:do_some_update_action!)
769
+ end
770
+ ```
771
+
772
+ The default queue name is `chewy`, you can customize it in settings: `sidekiq.queue_name`
773
+ ```
774
+ Chewy.settings[:sidekiq] = {queue: :low}
775
+ ```
776
+
777
+ #### `:delayed_sidekiq`
778
+
779
+ It accumulates IDs of records to be reindexed during the latency window in Redis and then performs the reindexing of all accumulated records at once.
780
+ This strategy is very useful in the case of frequently mutated records.
781
+ It supports the `update_fields` option, so it will attempt to select just enough data from the database.
782
+
783
+ Keep in mind, this strategy does not guarantee reindexing in the event of Sidekiq worker termination or an error during the reindexing phase.
784
+ This behavior is intentional to prevent continuous growth of Redis db.
785
+
786
+ There are three options that can be defined in the index:
787
+ ```ruby
788
+ class CitiesIndex...
789
+ strategy_config delayed_sidekiq: {
790
+ latency: 3,
791
+ margin: 2,
792
+ ttl: 60 * 60 * 24,
793
+ reindex_wrapper: ->(&reindex) {
794
+ ActiveRecord::Base.connected_to(role: :reading) { reindex.call }
795
+ }
796
+ # latency - will prevent scheduling identical jobs
797
+ # margin - main purpose is to cover db replication lag by the margin
798
+ # ttl - a chunk expiration time (in seconds)
799
+ # reindex_wrapper - lambda that accepts block to wrap that reindex process AR connection block.
800
+ }
801
+
802
+ ...
803
+ end
804
+ ```
805
+
806
+ Also you can define defaults in the `initializers/chewy.rb`
807
+ ```ruby
808
+ Chewy.settings = {
809
+ strategy_config: {
810
+ delayed_sidekiq: {
811
+ latency: 3,
812
+ margin: 2,
813
+ ttl: 60 * 60 * 24,
814
+ reindex_wrapper: ->(&reindex) {
815
+ ActiveRecord::Base.connected_to(role: :reading) { reindex.call }
816
+ }
817
+ }
818
+ }
819
+ }
820
+
821
+ ```
822
+ or in `config/chewy.yml`
823
+ ```ruby
824
+ strategy_config:
825
+ delayed_sidekiq:
826
+ latency: 3
827
+ margin: 2
828
+ ttl: <%= 60 * 60 * 24 %>
829
+ # reindex_wrapper setting is not possible here!!! use the initializer instead
830
+ ```
831
+
832
+ You can use the strategy identically to other strategies
833
+ ```ruby
834
+ Chewy.strategy(:delayed_sidekiq) do
835
+ City.popular.map(&:do_some_update_action!)
836
+ end
837
+ ```
838
+
839
+ The default queue name is `chewy`, you can customize it in settings: `sidekiq.queue_name`
840
+ ```
841
+ Chewy.settings[:sidekiq] = {queue: :low}
842
+ ```
843
+
844
+ Explicit call of the reindex using `:delayed_sidekiq strategy`
845
+ ```ruby
846
+ CitiesIndex.import([1, 2, 3], strategy: :delayed_sidekiq)
847
+ ```
848
+
849
+ Explicit call of the reindex using `:delayed_sidekiq` strategy with `:update_fields` support
850
+ ```ruby
851
+ CitiesIndex.import([1, 2, 3], update_fields: [:name], strategy: :delayed_sidekiq)
852
+ ```
853
+
854
+ While running tests with delayed_sidekiq strategy and Sidekiq is using a real redis instance that is NOT cleaned up in between tests (via e.g. `Sidekiq.redis(&:flushdb)`), you'll want to cleanup some redis keys in between tests to avoid state leaking and flaky tests. Chewy provides a convenience method for that:
855
+ ```ruby
856
+ # it might be a good idea to also add to your testing setup, e.g.: a rspec `before` hook
857
+ Chewy::Strategy::DelayedSidekiq.clear_timechunks!
858
+ ```
859
+
740
860
  #### `:active_job`
741
861
 
742
862
  This does the same thing as `:atomic`, but using ActiveJob. This will inherit the ActiveJob configuration settings including the `active_job.queue_adapter` setting for the environment. Patch `Chewy::Strategy::ActiveJob::Worker` for index updates improving.
@@ -773,7 +893,9 @@ It is convenient for use in e.g. the Rails console with non-block notation:
773
893
 
774
894
  #### `:bypass`
775
895
 
776
- The bypass strategy simply silences index updates.
896
+ When the bypass strategy is active the index will not be automatically updated on object save.
897
+
898
+ For example, on `City.first.save!` the cities index would not be updated.
777
899
 
778
900
  #### Nesting
779
901
 
@@ -851,7 +973,7 @@ Chewy has notifying the following events:
851
973
  {index: 30, delete: 5}
852
974
  ```
853
975
 
854
- * `payload[:errors]`: might not exists. Contains grouped errors with objects ids list:
976
+ * `payload[:errors]`: might not exist. Contains grouped errors with objects ids list:
855
977
 
856
978
  ```ruby
857
979
  {index: {
@@ -959,7 +1081,7 @@ Main methods of the request DSL are: `query`, `filter` and `post_filter`, it is
959
1081
  ```ruby
960
1082
  CitiesIndex
961
1083
  .filter(term: {name: 'Bangkok'})
962
- .query { match name: 'London' }
1084
+ .query(match: {name: 'London'})
963
1085
  .query.not(range: {population: {gt: 1_000_000}})
964
1086
  ```
965
1087
 
@@ -969,7 +1091,7 @@ You can query a set of indexes at once:
969
1091
  CitiesIndex.indices(CountriesIndex).query(match: {name: 'Some'})
970
1092
  ```
971
1093
 
972
- See https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html and https://github.com/elastic/elasticsearch-ruby/tree/master/elasticsearch-dsl for more details.
1094
+ See https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html and https://github.com/elastic/elasticsearch-dsl-ruby for more details.
973
1095
 
974
1096
  An important part of requests manipulation is merging. There are 4 methods to perform it: `merge`, `and`, `or`, `not`. See [Chewy::Search::QueryProxy](lib/chewy/search/query_proxy.rb) for details. Also, `only` and `except` methods help to remove unneeded parts of the request.
975
1097
 
@@ -983,7 +1105,7 @@ Request DSL also provides additional scope actions, like `delete_all`, `exists?`
983
1105
 
984
1106
  #### Pagination
985
1107
 
986
- The request DSL supports pagination with `Kaminari`. An extension is enabled on initializtion if `Kaminari` is available. See [Chewy::Search](lib/chewy/search.rb) and [Chewy::Search::Pagination::Kaminari](lib/chewy/search/pagination/kaminari.rb) for details.
1108
+ The request DSL supports pagination with `Kaminari`. An extension is enabled on initialization if `Kaminari` is available. See [Chewy::Search](lib/chewy/search.rb) and [Chewy::Search::Pagination::Kaminari](lib/chewy/search/pagination/kaminari.rb) for details.
987
1109
 
988
1110
  #### Named scopes
989
1111
 
@@ -1032,12 +1154,6 @@ rake chewy:reset[users,cities] # resets UsersIndex and CitiesIndex
1032
1154
  rake chewy:reset[-users,cities] # resets every index in the application except specified ones
1033
1155
  ```
1034
1156
 
1035
- #### Progressbar for `chewy:reset` tasks
1036
-
1037
- You can optionally output the `progressbar` for `chewy:reset` and `chewy:parallel:reset` during import.
1038
-
1039
- Progressbar is hidden by default. Set `ENV['PROGRESS']` to `true` to display it.
1040
-
1041
1157
  #### `chewy:upgrade`
1042
1158
 
1043
1159
  Performs reset exactly the same way as `chewy:reset` does, but only when the index specification (setting or mapping) was changed.
@@ -1090,6 +1206,10 @@ Right now the approach is that if some data had been updated, but index definiti
1090
1206
 
1091
1207
  Also, there is always full reset alternative with `rake chewy:reset`.
1092
1208
 
1209
+ #### `chewy:create_missing_indexes`
1210
+
1211
+ This rake task creates newly defined indexes in ElasticSearch and skips existing ones. Useful for production-like environments.
1212
+
1093
1213
  #### Parallelizing rake tasks
1094
1214
 
1095
1215
  Every task described above has its own parallel version. Every parallel rake task takes the number for processes for execution as the first argument and the rest of the arguments are exactly the same as for the non-parallel task version.
@@ -1115,9 +1235,29 @@ rake chewy:journal:apply["$(date -v-1H -u +%FT%TZ)"] # apply journaled changes f
1115
1235
  rake chewy:journal:apply["$(date -v-1H -u +%FT%TZ)",users] # apply journaled changes for the past hour on UsersIndex only
1116
1236
  ```
1117
1237
 
1238
+ When the size of the journal becomes very large, the classical way of deletion would be obstructive and resource consuming. Fortunately, Chewy internally uses [delete-by-query](https://www.elastic.co/guide/en/elasticsearch/reference/7.17/docs-delete-by-query.html#docs-delete-by-query-task-api) ES function which supports async execution with batching and [throttling](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-delete-by-query.html#docs-delete-by-query-throttle).
1239
+
1240
+ The available options, which can be set by ENV variables, are listed below:
1241
+ * `WAIT_FOR_COMPLETION` - a boolean flag. It controls async execution. It waits by default. When set to `false` (`0`, `f`, `false` or `off` in any case spelling is accepted as `false`), Elasticsearch performs some preflight checks, launches the request, and returns a task reference you can use to cancel the task or get its status.
1242
+ * `REQUESTS_PER_SECOND` - float. The throttle for this request in sub-requests per second. No throttling is enforced by default.
1243
+ * `SCROLL_SIZE` - integer. The number of documents to be deleted in single sub-request. The default batch size is 1000.
1244
+
1245
+ ```bash
1246
+ rake chewy:journal:clean WAIT_FOR_COMPLETION=false REQUESTS_PER_SECOND=10 SCROLL_SIZE=5000
1247
+ ```
1248
+
1118
1249
  ### RSpec integration
1119
1250
 
1120
- Just add `require 'chewy/rspec'` to your spec_helper.rb and you will get additional features: See [update_index.rb](lib/chewy/rspec/update_index.rb) for more details.
1251
+ Just add `require 'chewy/rspec'` to your spec_helper.rb and you will get additional features:
1252
+
1253
+ [update_index](lib/chewy/rspec/update_index.rb) helper
1254
+ `mock_elasticsearch_response` helper to mock elasticsearch response
1255
+ `mock_elasticsearch_response_sources` helper to mock elasticsearch response sources
1256
+ `build_query` matcher to compare request and expected query (returns `true`/`false`)
1257
+
1258
+ To use `mock_elasticsearch_response` and `mock_elasticsearch_response_sources` helpers add `include Chewy::Rspec::Helpers` to your tests.
1259
+
1260
+ See [chewy/rspec/](lib/chewy/rspec/) for more details.
1121
1261
 
1122
1262
  ### Minitest integration
1123
1263
 
@@ -1127,6 +1267,14 @@ Since you can set `:bypass` strategy for test suites and manually handle import
1127
1267
 
1128
1268
  But if you require chewy to index/update model regularly in your test suite then you can specify `:urgent` strategy for documents indexing. Add `Chewy.strategy(:urgent)` to test_helper.rb.
1129
1269
 
1270
+ Also, you can use additional helpers:
1271
+
1272
+ `mock_elasticsearch_response` to mock elasticsearch response
1273
+ `mock_elasticsearch_response_sources` to mock elasticsearch response sources
1274
+ `assert_elasticsearch_query` to compare request and expected query (returns `true`/`false`)
1275
+
1276
+ See [chewy/minitest/](lib/chewy/minitest/) for more details.
1277
+
1130
1278
  ### DatabaseCleaner
1131
1279
 
1132
1280
  If you use `DatabaseCleaner` in your tests with [the `transaction` strategy](https://github.com/DatabaseCleaner/database_cleaner#how-to-use), you may run into the problem that `ActiveRecord`'s models are not indexed automatically on save despite the fact that you set the callbacks to do this with the `update_index` method. The issue arises because `chewy` indices data on `after_commit` run as default, but all `after_commit` callbacks are not run with the `DatabaseCleaner`'s' `transaction` strategy. You can solve this issue by changing the `Chewy.use_after_commit_callbacks` option. Just add the following initializer in your Rails application:
@@ -1136,6 +1284,41 @@ If you use `DatabaseCleaner` in your tests with [the `transaction` strategy](htt
1136
1284
  Chewy.use_after_commit_callbacks = !Rails.env.test?
1137
1285
  ```
1138
1286
 
1287
+ ### Pre-request Filter
1288
+
1289
+ Should you need to inspect the query prior to it being dispatched to ElasticSearch during any queries, you can use the `before_es_request_filter`. `before_es_request_filter` is a callable object, as demonstrated below:
1290
+
1291
+ ```ruby
1292
+ Chewy.before_es_request_filter = -> (method_name, args, kw_args) { ... }
1293
+ ```
1294
+
1295
+ While using the `before_es_request_filter`, please consider the following:
1296
+
1297
+ * `before_es_request_filter` acts as a simple proxy before any request made via the `ElasticSearch::Client`. The arguments passed to this filter include:
1298
+ * `method_name` - The name of the method being called. Examples are search, count, bulk and etc.
1299
+ * `args` and `kw_args` - These are the positional arguments provided in the method call.
1300
+ * The operation is synchronous, so avoid executing any heavy or time-consuming operations within the filter to prevent performance degradation.
1301
+ * The return value of the proc is disregarded. This filter is intended for inspection or modification of the query rather than generating a response.
1302
+ * Any exception raised inside the callback will propagate upward and halt the execution of the query. It is essential to handle potential errors adequately to ensure the stability of your search functionality.
1303
+
1304
+ ### Import scope clean-up behavior
1305
+
1306
+ Whenever you set the `import_scope` for the index, in the case of ActiveRecord,
1307
+ options for order, offset and limit will be removed. You can set the behavior of
1308
+ chewy, before the clean-up itself.
1309
+
1310
+ The default behavior is a warning sent to the Chewy logger (`:warn`). Another more
1311
+ restrictive option is raising an exception (`:raise`). Both options have a
1312
+ negative impact on performance since verifying whether the code uses any of
1313
+ these options requires building AREL query.
1314
+
1315
+ To avoid the loading time impact, you can ignore the check (`:ignore`) before
1316
+ the clean-up.
1317
+
1318
+ ```
1319
+ Chewy.import_scope_cleanup_behavior = :ignore
1320
+ ```
1321
+
1139
1322
  ## Contributing
1140
1323
 
1141
1324
  1. Fork it (http://github.com/toptal/chewy/fork)
data/chewy.gemspec CHANGED
@@ -2,7 +2,7 @@ lib = File.expand_path('lib', __dir__)
2
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
3
  require 'chewy/version'
4
4
 
5
- Gem::Specification.new do |spec| # rubocop:disable Metrics/BlockLength
5
+ Gem::Specification.new do |spec|
6
6
  spec.name = 'chewy'
7
7
  spec.version = Chewy::VERSION
8
8
  spec.authors = ['Toptal, LLC', 'pyromaniac']
@@ -14,24 +14,10 @@ Gem::Specification.new do |spec| # rubocop:disable Metrics/BlockLength
14
14
 
15
15
  spec.files = `git ls-files`.split($RS)
16
16
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
- spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
17
  spec.require_paths = ['lib']
19
18
 
20
- spec.add_development_dependency 'database_cleaner'
21
- spec.add_development_dependency 'elasticsearch-extensions'
22
- spec.add_development_dependency 'rake'
23
- spec.add_development_dependency 'rspec', '>= 3.7.0'
24
- spec.add_development_dependency 'rspec-collection_matchers'
25
- spec.add_development_dependency 'rspec-its'
26
- spec.add_development_dependency 'rubocop', '1.11'
27
- spec.add_development_dependency 'sqlite3'
28
- spec.add_development_dependency 'timecop'
29
-
30
- spec.add_development_dependency 'method_source'
31
- spec.add_development_dependency 'unparser'
32
-
33
- spec.add_dependency 'activesupport', '>= 5.2'
34
- spec.add_dependency 'elasticsearch', '>= 6.3.0'
19
+ spec.add_dependency 'activesupport', '>= 5.2' # Remove with major version bump, 8.x
20
+ spec.add_dependency 'elasticsearch', '>= 7.14.0', '< 8'
35
21
  spec.add_dependency 'elasticsearch-dsl'
36
- spec.add_dependency 'ruby-progressbar'
22
+ spec.metadata['rubygems_mfa_required'] = 'true'
37
23
  end
@@ -0,0 +1,12 @@
1
+ gem 'database_cleaner'
2
+ gem 'elasticsearch-extensions'
3
+ gem 'method_source'
4
+ gem 'rake'
5
+ gem 'redis', require: false
6
+ gem 'rspec', '>= 3.7.0'
7
+ gem 'rspec-collection_matchers'
8
+ gem 'rspec-its'
9
+ gem 'rubocop', '1.63.4'
10
+ gem 'sqlite3', '~> 1.4'
11
+ gem 'timecop'
12
+ gem 'unparser'
@@ -8,6 +8,7 @@ gem 'parallel', require: false
8
8
  gem 'rspec_junit_formatter', '~> 0.4.1'
9
9
  gem 'sidekiq', require: false
10
10
 
11
- gem 'rexml' if RUBY_VERSION >= '3.0.0'
11
+ gem 'rexml'
12
12
 
13
13
  gemspec path: '../'
14
+ eval_gemfile 'base.gemfile'
@@ -1,11 +1,14 @@
1
1
  source 'https://rubygems.org'
2
2
 
3
- gem 'activejob', '~> 5.2.0'
4
- gem 'activerecord', '~> 5.2.0'
5
- gem 'activesupport', '~> 5.2.0'
3
+ gem 'activejob', '~> 7.0.0'
4
+ gem 'activerecord', '~> 7.0.0'
5
+ gem 'activesupport', '~> 7.0.0'
6
6
  gem 'kaminari-core', '~> 1.1.0', require: false
7
7
  gem 'parallel', require: false
8
8
  gem 'rspec_junit_formatter', '~> 0.4.1'
9
9
  gem 'sidekiq', require: false
10
10
 
11
+ gem 'rexml'
12
+
11
13
  gemspec path: '../'
14
+ eval_gemfile 'base.gemfile'
@@ -1,11 +1,14 @@
1
1
  source 'https://rubygems.org'
2
2
 
3
- gem 'activejob', '~> 6.0.0'
4
- gem 'activerecord', '~> 6.0.0'
5
- gem 'activesupport', '~> 6.0.0'
3
+ gem 'activejob', '~> 7.1.0'
4
+ gem 'activerecord', '~> 7.1.0'
5
+ gem 'activesupport', '~> 7.1.0'
6
6
  gem 'kaminari-core', '~> 1.1.0', require: false
7
7
  gem 'parallel', require: false
8
8
  gem 'rspec_junit_formatter', '~> 0.4.1'
9
9
  gem 'sidekiq', require: false
10
10
 
11
+ gem 'rexml'
12
+
11
13
  gemspec path: '../'
14
+ eval_gemfile 'base.gemfile'
data/lib/chewy/config.rb CHANGED
@@ -32,13 +32,18 @@ module Chewy
32
32
  # Set number_of_replicas to 0 before reset and put the original value after
33
33
  # https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-update-settings.html
34
34
  :reset_no_replicas,
35
- # Refresh or not when import async (sidekiq, activejob)
35
+ # Refresh or not when import async (sidekiq, lazy_sidekiq, activejob)
36
36
  :disable_refresh_async,
37
37
  # Default options for root of Chewy type. Allows to set default options
38
38
  # for type mappings like `_all`.
39
39
  :default_root_options,
40
40
  # Default field type for any field in any Chewy type. Defaults to 'text'.
41
- :default_field_type
41
+ :default_field_type,
42
+ # Callback called on each search request to be done into ES
43
+ :before_es_request_filter,
44
+ # Behavior when import scope for index includes order, offset or limit.
45
+ # Can be :ignore, :warn, :raise. Defaults to :warn
46
+ :import_scope_cleanup_behavior
42
47
 
43
48
  attr_reader :transport_logger, :transport_tracer,
44
49
  # Chewy search request DSL base class, used by every index.
@@ -60,16 +65,17 @@ module Chewy
60
65
  @indices_path = 'app/chewy'
61
66
  @default_root_options = {}
62
67
  @default_field_type = 'text'.freeze
68
+ @import_scope_cleanup_behavior = :warn
63
69
  @search_class = build_search_class(Chewy::Search::Request)
64
70
  end
65
71
 
66
72
  def transport_logger=(logger)
67
- Chewy.client.transport.logger = logger
73
+ Chewy.client.transport.transport.logger = logger
68
74
  @transport_logger = logger
69
75
  end
70
76
 
71
77
  def transport_tracer=(tracer)
72
- Chewy.client.transport.tracer = tracer
78
+ Chewy.client.transport.transport.tracer = tracer
73
79
  @transport_tracer = tracer
74
80
  end
75
81
 
@@ -127,17 +133,19 @@ module Chewy
127
133
  private
128
134
 
129
135
  def yaml_settings
130
- @yaml_settings ||= begin
131
- if defined?(Rails::VERSION)
132
- file = Rails.root.join('config', 'chewy.yml')
136
+ @yaml_settings ||= build_yaml_settings || {}
137
+ end
133
138
 
134
- if File.exist?(file)
135
- yaml = ERB.new(File.read(file)).result
136
- hash = YAML.load(yaml) # rubocop:disable Security/YAMLLoad
137
- hash[Rails.env].try(:deep_symbolize_keys) if hash
138
- end
139
- end || {}
140
- end
139
+ def build_yaml_settings
140
+ return unless defined?(Rails::VERSION)
141
+
142
+ file = Rails.root.join('config', 'chewy.yml')
143
+
144
+ return unless File.exist?(file)
145
+
146
+ yaml = ERB.new(File.read(file)).result
147
+ hash = YAML.unsafe_load(yaml)
148
+ hash[Rails.env].try(:deep_symbolize_keys) if hash
141
149
  end
142
150
 
143
151
  def build_search_class(base)
@@ -0,0 +1,31 @@
1
+ module Chewy
2
+ # Replacement for Chewy.client
3
+ class ElasticClient
4
+ def self.build_es_client(configuration = Chewy.configuration)
5
+ client_configuration = configuration.deep_dup
6
+ client_configuration.delete(:prefix) # used by Chewy, not relevant to Elasticsearch::Client
7
+ block = client_configuration[:transport_options].try(:delete, :proc)
8
+ ::Elasticsearch::Client.new(client_configuration, &block)
9
+ end
10
+
11
+ def initialize(elastic_client = self.class.build_es_client)
12
+ @elastic_client = elastic_client
13
+ end
14
+
15
+ private
16
+
17
+ def method_missing(name, *args, **kwargs, &block)
18
+ inspect_payload(name, args, kwargs)
19
+
20
+ @elastic_client.__send__(name, *args, **kwargs, &block)
21
+ end
22
+
23
+ def respond_to_missing?(name, _include_private = false)
24
+ @elastic_client.respond_to?(name) || super
25
+ end
26
+
27
+ def inspect_payload(name, args, kwargs)
28
+ Chewy.config.before_es_request_filter&.call(name, args, kwargs)
29
+ end
30
+ end
31
+ end
data/lib/chewy/errors.rb CHANGED
@@ -7,7 +7,7 @@ module Chewy
7
7
 
8
8
  class UndefinedUpdateStrategy < Error
9
9
  def initialize(_type)
10
- super <<-MESSAGE
10
+ super(<<-MESSAGE)
11
11
  Index update strategy is undefined for current context.
12
12
  Please wrap your code with `Chewy.strategy(:strategy_name) block.`
13
13
  MESSAGE
@@ -27,7 +27,16 @@ module Chewy
27
27
  message << " on #{documents.count} documents: #{documents}\n"
28
28
  end
29
29
  end
30
- super message
30
+ super(message)
31
31
  end
32
32
  end
33
+
34
+ class InvalidJoinFieldType < Error
35
+ def initialize(join_field_type, join_field_name, relations)
36
+ super("`#{join_field_type}` set for the join field `#{join_field_name}` is not on the :relations list (#{relations})")
37
+ end
38
+ end
39
+
40
+ class ImportScopeCleanupError < Error
41
+ end
33
42
  end