chewy 7.6.0 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/CODEOWNERS +1 -1
- data/.github/dependabot.yml +2 -2
- data/.github/workflows/ruby.yml +11 -10
- data/.rubocop.yml +1 -1
- data/.rubocop_todo.yml +132 -39
- data/CHANGELOG.md +18 -1
- data/CONTRIBUTING.md +1 -1
- data/LICENSE.txt +1 -1
- data/README.md +50 -1125
- data/chewy.gemspec +3 -2
- data/docker-compose.yml +14 -0
- data/docs/README.md +16 -0
- data/docs/configuration.md +440 -0
- data/docs/import.md +122 -0
- data/docs/indexing.md +329 -0
- data/docs/querying.md +72 -0
- data/docs/rake_tasks.md +108 -0
- data/docs/testing.md +41 -0
- data/docs/troubleshooting.md +101 -0
- data/gemfiles/base.gemfile +3 -3
- data/gemfiles/{rails.6.1.activerecord.gemfile → rails.7.2.activerecord.gemfile} +3 -3
- data/gemfiles/{rails.7.0.activerecord.gemfile → rails.8.0.activerecord.gemfile} +3 -3
- data/lib/chewy/config.rb +2 -2
- data/lib/chewy/errors.rb +3 -0
- data/lib/chewy/fields/root.rb +1 -1
- data/lib/chewy/index/actions.rb +5 -5
- data/lib/chewy/index/aliases.rb +1 -1
- data/lib/chewy/index/syncer.rb +5 -5
- data/lib/chewy/minitest/helpers.rb +1 -1
- data/lib/chewy/search/request.rb +4 -4
- data/lib/chewy/search/response.rb +7 -0
- data/lib/chewy/search/scrolling.rb +2 -1
- data/lib/chewy/strategy/delayed_sidekiq/worker.rb +1 -1
- data/lib/chewy/version.rb +1 -1
- data/lib/chewy.rb +4 -0
- data/migration_guide.md +1 -1
- data/spec/chewy/config_spec.rb +13 -14
- data/spec/chewy/elastic_client_spec.rb +1 -1
- data/spec/chewy/fields/base_spec.rb +2 -2
- data/spec/chewy/fields/time_fields_spec.rb +1 -1
- data/spec/chewy/index/actions_spec.rb +9 -70
- data/spec/chewy/index/aliases_spec.rb +1 -1
- data/spec/chewy/index/import/bulk_builder_spec.rb +2 -2
- data/spec/chewy/index/import/bulk_request_spec.rb +1 -1
- data/spec/chewy/index/import/routine_spec.rb +1 -1
- data/spec/chewy/index/import_spec.rb +15 -15
- data/spec/chewy/index/observe/callback_spec.rb +1 -1
- data/spec/chewy/index/specification_spec.rb +1 -4
- data/spec/chewy/index/syncer_spec.rb +1 -1
- data/spec/chewy/index_spec.rb +1 -1
- data/spec/chewy/journal_spec.rb +2 -2
- data/spec/chewy/minitest/helpers_spec.rb +2 -6
- data/spec/chewy/multi_search_spec.rb +1 -1
- data/spec/chewy/rake_helper_spec.rb +1 -1
- data/spec/chewy/repository_spec.rb +4 -4
- data/spec/chewy/rspec/update_index_spec.rb +2 -2
- data/spec/chewy/runtime_spec.rb +2 -2
- data/spec/chewy/search/loader_spec.rb +1 -1
- data/spec/chewy/search/pagination/kaminari_examples.rb +1 -1
- data/spec/chewy/search/query_proxy_spec.rb +0 -24
- data/spec/chewy/search/request_spec.rb +7 -3
- data/spec/chewy/search/response_spec.rb +2 -24
- data/spec/chewy/search/scrolling_spec.rb +1 -1
- data/spec/chewy/search_spec.rb +1 -1
- data/spec/chewy/stash_spec.rb +1 -1
- data/spec/chewy/strategy/delayed_sidekiq_spec.rb +27 -10
- data/spec/chewy/strategy_spec.rb +1 -1
- data/spec/chewy_spec.rb +5 -22
- data/spec/spec_helper.rb +26 -0
- data/spec/support/active_record.rb +35 -4
- metadata +22 -17
- data/gemfiles/rails.7.1.activerecord.gemfile +0 -14
data/docs/indexing.md
ADDED
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
# Indexing
|
|
2
|
+
|
|
3
|
+
## Index definition
|
|
4
|
+
|
|
5
|
+
1. Create `/app/chewy/users_index.rb`
|
|
6
|
+
|
|
7
|
+
```ruby
|
|
8
|
+
class UsersIndex < Chewy::Index
|
|
9
|
+
|
|
10
|
+
end
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
2. Define index scope (you can omit this part if you don't need to specify a scope (i.e. use PORO objects for import) or options)
|
|
14
|
+
|
|
15
|
+
```ruby
|
|
16
|
+
class UsersIndex < Chewy::Index
|
|
17
|
+
index_scope User.active # or just model instead_of scope: index_scope User
|
|
18
|
+
end
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
3. Add some mappings
|
|
22
|
+
|
|
23
|
+
```ruby
|
|
24
|
+
class UsersIndex < Chewy::Index
|
|
25
|
+
index_scope User.active.includes(:country, :badges, :projects)
|
|
26
|
+
field :first_name, :last_name # multiple fields without additional options
|
|
27
|
+
field :email, analyzer: 'email' # Elasticsearch-related options
|
|
28
|
+
field :country, value: ->(user) { user.country.name } # custom value proc
|
|
29
|
+
field :badges, value: ->(user) { user.badges.map(&:name) } # passing array values to index
|
|
30
|
+
field :projects do # the same block syntax for multi_field, if `:type` is specified
|
|
31
|
+
field :title
|
|
32
|
+
field :description # default data type is `text`
|
|
33
|
+
# additional top-level objects passed to value proc:
|
|
34
|
+
field :categories, value: ->(project, user) { project.categories.map(&:name) if user.active? }
|
|
35
|
+
end
|
|
36
|
+
field :rating, type: 'integer' # custom data type
|
|
37
|
+
field :created, type: 'date', include_in_all: false,
|
|
38
|
+
value: ->{ created_at } # value proc for source object context
|
|
39
|
+
end
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
[See here for mapping definitions](https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html).
|
|
43
|
+
|
|
44
|
+
4. Add some index-related settings. Analyzer repositories might be used as well. See `Chewy::Index.settings` docs for details:
|
|
45
|
+
|
|
46
|
+
```ruby
|
|
47
|
+
class UsersIndex < Chewy::Index
|
|
48
|
+
settings analysis: {
|
|
49
|
+
analyzer: {
|
|
50
|
+
email: {
|
|
51
|
+
tokenizer: 'keyword',
|
|
52
|
+
filter: ['lowercase']
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
index_scope User.active.includes(:country, :badges, :projects)
|
|
58
|
+
root date_detection: false do
|
|
59
|
+
template 'about_translations.*', type: 'text', analyzer: 'standard'
|
|
60
|
+
|
|
61
|
+
field :first_name, :last_name
|
|
62
|
+
field :email, analyzer: 'email'
|
|
63
|
+
field :country, value: ->(user) { user.country.name }
|
|
64
|
+
field :badges, value: ->(user) { user.badges.map(&:name) }
|
|
65
|
+
field :projects do
|
|
66
|
+
field :title
|
|
67
|
+
field :description
|
|
68
|
+
end
|
|
69
|
+
field :about_translations, type: 'object' # pass object type explicitly if necessary
|
|
70
|
+
field :rating, type: 'integer'
|
|
71
|
+
field :created, type: 'date', include_in_all: false,
|
|
72
|
+
value: ->{ created_at }
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
[See index settings here](https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-update-settings.html).
|
|
78
|
+
[See root object settings here](https://www.elastic.co/guide/en/elasticsearch/reference/current/dynamic-field-mapping.html).
|
|
79
|
+
|
|
80
|
+
See [mapping.rb](../lib/chewy/index/mapping.rb) for more details.
|
|
81
|
+
|
|
82
|
+
5. Add model-observing code
|
|
83
|
+
|
|
84
|
+
```ruby
|
|
85
|
+
class User < ActiveRecord::Base
|
|
86
|
+
update_index('users') { self } # specifying index and back-reference
|
|
87
|
+
# for updating after user save or destroy
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
class Country < ActiveRecord::Base
|
|
91
|
+
has_many :users
|
|
92
|
+
|
|
93
|
+
update_index('users') { users } # return single object or collection
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
class Project < ActiveRecord::Base
|
|
97
|
+
update_index('users') { user if user.active? } # you can return even `nil` from the back-reference
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
class Book < ActiveRecord::Base
|
|
101
|
+
update_index(->(book) {"books_#{book.language}"}) { self } # dynamic index name with proc.
|
|
102
|
+
# For book with language == "en"
|
|
103
|
+
# this code will generate `books_en`
|
|
104
|
+
end
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
The `update_index` callback requires an active update strategy to be set. See [configuration.md](configuration.md#index-update-strategies) for available strategies and how they integrate with Rails.
|
|
108
|
+
|
|
109
|
+
Also, you can use the second argument for method name passing:
|
|
110
|
+
|
|
111
|
+
```ruby
|
|
112
|
+
update_index('users', :self)
|
|
113
|
+
update_index('users', :users)
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
In the case of a belongs_to association you may need to update both associated objects, previous and current:
|
|
117
|
+
|
|
118
|
+
```ruby
|
|
119
|
+
class City < ActiveRecord::Base
|
|
120
|
+
belongs_to :country
|
|
121
|
+
|
|
122
|
+
update_index('cities') { self }
|
|
123
|
+
update_index 'countries' do
|
|
124
|
+
previous_changes['country_id'] || country
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Multi (nested) and object field types
|
|
130
|
+
|
|
131
|
+
To define an objects field you can simply nest fields in the DSL:
|
|
132
|
+
|
|
133
|
+
```ruby
|
|
134
|
+
field :projects do
|
|
135
|
+
field :title
|
|
136
|
+
field :description
|
|
137
|
+
end
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
This will automatically set the type or root field to `object`. You may also specify `type: 'objects'` explicitly.
|
|
141
|
+
|
|
142
|
+
To define a multi field you have to specify any type except for `object` or `nested` in the root field:
|
|
143
|
+
|
|
144
|
+
```ruby
|
|
145
|
+
field :full_name, type: 'text', value: ->{ full_name.strip } do
|
|
146
|
+
field :ordered, analyzer: 'ordered'
|
|
147
|
+
field :untouched, type: 'keyword'
|
|
148
|
+
end
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
The `value:` option for internal fields will no longer be effective.
|
|
152
|
+
|
|
153
|
+
## Geo Point fields
|
|
154
|
+
|
|
155
|
+
You can use [Elasticsearch's geo mapping](https://www.elastic.co/guide/en/elasticsearch/reference/current/geo-point.html) with the `geo_point` field type, allowing you to query, filter and order by latitude and longitude. You can use the following hash format:
|
|
156
|
+
|
|
157
|
+
```ruby
|
|
158
|
+
field :coordinates, type: 'geo_point', value: ->{ {lat: latitude, lon: longitude} }
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
or by using nested fields:
|
|
162
|
+
|
|
163
|
+
```ruby
|
|
164
|
+
field :coordinates, type: 'geo_point' do
|
|
165
|
+
field :lat, value: ->{ latitude }
|
|
166
|
+
field :long, value: ->{ longitude }
|
|
167
|
+
end
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
See the section on *Script fields* for details on calculating distance in a search.
|
|
171
|
+
|
|
172
|
+
## Join fields
|
|
173
|
+
|
|
174
|
+
You can use a [join field](https://www.elastic.co/guide/en/elasticsearch/reference/current/parent-join.html)
|
|
175
|
+
to implement parent-child relationships between documents.
|
|
176
|
+
It [replaces the old `parent_id` based parent-child mapping](https://www.elastic.co/guide/en/elasticsearch/reference/current/removal-of-types.html#parent-child-mapping-types)
|
|
177
|
+
|
|
178
|
+
To use it, you need to pass `relations` and `join` (with `type` and `id`) options:
|
|
179
|
+
```ruby
|
|
180
|
+
field :hierarchy_link, type: :join, relations: {question: %i[answer comment], answer: :vote, vote: :subvote}, join: {type: :comment_type, id: :commented_id}
|
|
181
|
+
```
|
|
182
|
+
assuming you have `comment_type` and `commented_id` fields in your model.
|
|
183
|
+
|
|
184
|
+
Note that when you reindex a parent, its children and grandchildren will be reindexed as well.
|
|
185
|
+
This may require additional queries to the primary database and to Elasticsearch.
|
|
186
|
+
|
|
187
|
+
Also note that the join field doesn't support crutches (it should be a field directly defined on the model).
|
|
188
|
+
|
|
189
|
+
## Crutches technology
|
|
190
|
+
|
|
191
|
+
Assume you are defining your index like this (product has_many categories through product_categories):
|
|
192
|
+
|
|
193
|
+
```ruby
|
|
194
|
+
class ProductsIndex < Chewy::Index
|
|
195
|
+
index_scope Product.includes(:categories)
|
|
196
|
+
field :name
|
|
197
|
+
field :category_names, value: ->(product) { product.categories.map(&:name) } # or shorter just -> { categories.map(&:name) }
|
|
198
|
+
end
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
Then the Chewy reindexing flow will look like the following pseudo-code:
|
|
202
|
+
|
|
203
|
+
```ruby
|
|
204
|
+
Product.includes(:categories).find_in_batches(1000) do |batch|
|
|
205
|
+
bulk_body = batch.map do |object|
|
|
206
|
+
{name: object.name, category_names: object.categories.map(&:name)}.to_json
|
|
207
|
+
end
|
|
208
|
+
# here we are sending every batch of data to ES
|
|
209
|
+
Chewy.client.bulk bulk_body
|
|
210
|
+
end
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
If you meet complicated cases when associations are not applicable you can replace Rails associations with Chewy Crutches technology:
|
|
214
|
+
|
|
215
|
+
```ruby
|
|
216
|
+
class ProductsIndex < Chewy::Index
|
|
217
|
+
index_scope Product
|
|
218
|
+
crutch :categories do |collection| # collection here is a current batch of products
|
|
219
|
+
# data is fetched with a lightweight query without objects initialization
|
|
220
|
+
data = ProductCategory.joins(:category).where(product_id: collection.map(&:id)).pluck(:product_id, 'categories.name')
|
|
221
|
+
# then we have to convert fetched data to appropriate format
|
|
222
|
+
# this will return our data in structure like:
|
|
223
|
+
# {123 => ['sweets', 'juices'], 456 => ['meat']}
|
|
224
|
+
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
field :name
|
|
228
|
+
# simply use crutch-fetched data as a value:
|
|
229
|
+
field :category_names, value: ->(product, crutches) { crutches[:categories][product.id] }
|
|
230
|
+
end
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
An example flow will look like this:
|
|
234
|
+
|
|
235
|
+
```ruby
|
|
236
|
+
Product.includes(:categories).find_in_batches(1000) do |batch|
|
|
237
|
+
crutches[:categories] = ProductCategory.joins(:category).where(product_id: batch.map(&:id)).pluck(:product_id, 'categories.name')
|
|
238
|
+
.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
|
|
239
|
+
|
|
240
|
+
bulk_body = batch.map do |object|
|
|
241
|
+
{name: object.name, category_names: crutches[:categories][object.id]}.to_json
|
|
242
|
+
end
|
|
243
|
+
Chewy.client.bulk bulk_body
|
|
244
|
+
end
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
So Chewy Crutches technology is able to increase your indexing performance in some cases up to a hundredfold or even more depending on your associations complexity. For another approach to import performance, see [Raw import](import.md#raw-import).
|
|
248
|
+
|
|
249
|
+
## Witchcraft technology
|
|
250
|
+
|
|
251
|
+
One more experimental technology to increase import performance. As far as you know, chewy defines value proc for every imported field in mapping, so at the import time each of these procs is executed on imported object to extract result document to import. It would be great for performance to use one huge whole-document-returning proc instead. So basically the idea or Witchcraft technology is to compile a single document-returning proc from the index definition.
|
|
252
|
+
|
|
253
|
+
```ruby
|
|
254
|
+
index_scope Product
|
|
255
|
+
witchcraft!
|
|
256
|
+
|
|
257
|
+
field :title
|
|
258
|
+
field :tags, value: -> { tags.map(&:name) }
|
|
259
|
+
field :categories do
|
|
260
|
+
field :name, value: -> (product, category) { category.name }
|
|
261
|
+
field :type, value: -> (product, category, crutch) { crutch.types[category.name] }
|
|
262
|
+
end
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
The index definition above will be compiled to something close to:
|
|
266
|
+
|
|
267
|
+
```ruby
|
|
268
|
+
-> (object, crutches) do
|
|
269
|
+
{
|
|
270
|
+
title: object.title,
|
|
271
|
+
tags: object.tags.map(&:name),
|
|
272
|
+
categories: object.categories.map do |object2|
|
|
273
|
+
{
|
|
274
|
+
name: object2.name
|
|
275
|
+
type: crutches.types[object2.name]
|
|
276
|
+
}
|
|
277
|
+
end
|
|
278
|
+
}
|
|
279
|
+
end
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
And don't even ask how is it possible, it is a witchcraft.
|
|
283
|
+
Obviously not every type of definition might be compiled. There are some restrictions:
|
|
284
|
+
|
|
285
|
+
1. Use reasonable formatting to make `method_source` be able to extract field value proc sources.
|
|
286
|
+
2. Value procs with splat arguments are not supported right now.
|
|
287
|
+
3. If you are generating fields dynamically use value proc with arguments, argumentless value procs are not supported yet:
|
|
288
|
+
|
|
289
|
+
```ruby
|
|
290
|
+
[:first_name, :last_name].each do |name|
|
|
291
|
+
field name, value: -> (o) { o.send(name) }
|
|
292
|
+
end
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
However, it is quite possible that your index definition will be supported by Witchcraft technology out of the box in most of the cases.
|
|
296
|
+
|
|
297
|
+
## Index manipulation
|
|
298
|
+
|
|
299
|
+
```ruby
|
|
300
|
+
UsersIndex.delete # destroy index if it exists
|
|
301
|
+
UsersIndex.delete!
|
|
302
|
+
|
|
303
|
+
UsersIndex.create
|
|
304
|
+
UsersIndex.create! # use bang or non-bang methods
|
|
305
|
+
|
|
306
|
+
UsersIndex.purge
|
|
307
|
+
UsersIndex.purge! # deletes then creates index
|
|
308
|
+
|
|
309
|
+
UsersIndex.import # import with 0 arguments process all the data specified in index_scope definition
|
|
310
|
+
UsersIndex.import User.where('rating > 100') # or import specified users scope
|
|
311
|
+
UsersIndex.import User.where('rating > 100').to_a # or import specified users array
|
|
312
|
+
UsersIndex.import [1, 2, 42] # pass even ids for import, it will be handled in the most effective way
|
|
313
|
+
UsersIndex.import User.where('rating > 100'), update_fields: [:email] # if update fields are specified - it will update their values only with the `update` bulk action
|
|
314
|
+
UsersIndex.import! # raises an exception in case of any import errors
|
|
315
|
+
|
|
316
|
+
UsersIndex.reset! # purges index and imports default data for all types
|
|
317
|
+
```
|
|
318
|
+
|
|
319
|
+
For more on import options, batching and journaling, see [import.md](import.md).
|
|
320
|
+
|
|
321
|
+
If the passed user is `#destroyed?`, or satisfies a `delete_if` index_scope option, or the specified id does not exist in the database, import will perform delete from index action for this object.
|
|
322
|
+
|
|
323
|
+
```ruby
|
|
324
|
+
index_scope User, delete_if: :deleted_at
|
|
325
|
+
index_scope User, delete_if: -> { deleted_at }
|
|
326
|
+
index_scope User, delete_if: ->(user) { user.deleted_at }
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
See [actions.rb](../lib/chewy/index/actions.rb) for more details.
|
data/docs/querying.md
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# Querying
|
|
2
|
+
|
|
3
|
+
## Composing requests
|
|
4
|
+
|
|
5
|
+
The request DSL have the same chainable nature as AR. The main class is `Chewy::Search::Request`.
|
|
6
|
+
|
|
7
|
+
```ruby
|
|
8
|
+
CitiesIndex.query(match: {name: 'London'})
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
Main methods of the request DSL are: `query`, `filter` and `post_filter`, it is possible to pass pure query hashes or use `elasticsearch-dsl`.
|
|
12
|
+
|
|
13
|
+
```ruby
|
|
14
|
+
CitiesIndex
|
|
15
|
+
.filter(term: {name: 'Bangkok'})
|
|
16
|
+
.query(match: {name: 'London'})
|
|
17
|
+
.query.not(range: {population: {gt: 1_000_000}})
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
You can query a set of indexes at once:
|
|
21
|
+
|
|
22
|
+
```ruby
|
|
23
|
+
CitiesIndex.indices(CountriesIndex).query(match: {name: 'Some'})
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
See https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html and https://github.com/elastic/elasticsearch-dsl-ruby for more details.
|
|
27
|
+
|
|
28
|
+
An important part of requests manipulation is merging. There are 4 methods to perform it: `merge`, `and`, `or`, `not`. See [Chewy::Search::QueryProxy](../lib/chewy/search/query_proxy.rb) for details. Also, `only` and `except` methods help to remove unneeded parts of the request.
|
|
29
|
+
|
|
30
|
+
Every other request part is covered by a bunch of additional methods, see [Chewy::Search::Request](../lib/chewy/search/request.rb) for details:
|
|
31
|
+
|
|
32
|
+
```ruby
|
|
33
|
+
CitiesIndex.limit(10).offset(30).order(:name, {population: {order: :desc}})
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Request DSL also provides additional scope actions, like `delete_all`, `exists?`, `count`, `pluck`, etc.
|
|
37
|
+
|
|
38
|
+
## Pagination
|
|
39
|
+
|
|
40
|
+
The request DSL supports pagination with `Kaminari`. An extension is enabled on initialization if `Kaminari` is available. See [Chewy::Search](../lib/chewy/search.rb) and [Chewy::Search::Pagination::Kaminari](../lib/chewy/search/pagination/kaminari.rb) for details.
|
|
41
|
+
|
|
42
|
+
## Named scopes
|
|
43
|
+
|
|
44
|
+
Chewy supports named scopes functionality. There is no specialized DSL for named scopes definition, it is simply about defining class methods.
|
|
45
|
+
|
|
46
|
+
See [Chewy::Search::Scoping](../lib/chewy/search/scoping.rb) for details.
|
|
47
|
+
|
|
48
|
+
## Scroll API
|
|
49
|
+
|
|
50
|
+
Elasticsearch scroll API is utilized by a bunch of methods: `scroll_batches`, `scroll_hits`, `scroll_wrappers` and `scroll_objects`.
|
|
51
|
+
|
|
52
|
+
See [Chewy::Search::Scrolling](../lib/chewy/search/scrolling.rb) for details.
|
|
53
|
+
|
|
54
|
+
## Loading objects
|
|
55
|
+
|
|
56
|
+
It is possible to load ORM/ODM source objects with the `objects` method. To provide additional loading options use `load` method:
|
|
57
|
+
|
|
58
|
+
```ruby
|
|
59
|
+
CitiesIndex.load(scope: -> { active }).to_a # to_a returns `Chewy::Index` wrappers.
|
|
60
|
+
CitiesIndex.load(scope: -> { active }).objects # An array of AR source objects.
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
See [Chewy::Search::Loader](../lib/chewy/search/loader.rb) for more details.
|
|
64
|
+
|
|
65
|
+
In case when it is necessary to iterate through both of the wrappers and objects simultaneously, `object_hash` method helps a lot:
|
|
66
|
+
|
|
67
|
+
```ruby
|
|
68
|
+
scope = CitiesIndex.load(scope: -> { active })
|
|
69
|
+
scope.each do |wrapper|
|
|
70
|
+
scope.object_hash[wrapper]
|
|
71
|
+
end
|
|
72
|
+
```
|
data/docs/rake_tasks.md
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# Rake Tasks
|
|
2
|
+
|
|
3
|
+
For a Rails application, some index-maintaining rake tasks are defined.
|
|
4
|
+
|
|
5
|
+
## `chewy:reset`
|
|
6
|
+
|
|
7
|
+
Performs zero-downtime reindexing as described [here](https://www.elastic.co/blog/changing-mapping-with-zero-downtime). So the rake task creates a new index with unique suffix and then simply aliases it to the common index name. The previous index is deleted afterwards (see `Chewy::Index.reset!` for more details).
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
rake chewy:reset # resets all the existing indices
|
|
11
|
+
rake chewy:reset[users] # resets UsersIndex only
|
|
12
|
+
rake chewy:reset[users,cities] # resets UsersIndex and CitiesIndex
|
|
13
|
+
rake chewy:reset[-users,cities] # resets every index in the application except specified ones
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## `chewy:upgrade`
|
|
17
|
+
|
|
18
|
+
Performs reset exactly the same way as `chewy:reset` does, but only when the index specification (setting or mapping) was changed.
|
|
19
|
+
|
|
20
|
+
It works only when index specification is locked in `Chewy::Stash::Specification` index. The first run will reset all indexes and lock their specifications.
|
|
21
|
+
|
|
22
|
+
See [Chewy::Stash::Specification](../lib/chewy/stash.rb) and [Chewy::Index::Specification](../lib/chewy/index/specification.rb) for more details.
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
rake chewy:upgrade # upgrades all the existing indices
|
|
27
|
+
rake chewy:upgrade[users] # upgrades UsersIndex only
|
|
28
|
+
rake chewy:upgrade[users,cities] # upgrades UsersIndex and CitiesIndex
|
|
29
|
+
rake chewy:upgrade[-users,cities] # upgrades every index in the application except specified ones
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## `chewy:update`
|
|
33
|
+
|
|
34
|
+
It doesn't create indexes, it simply imports everything to the existing ones and fails if the index was not created before.
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
rake chewy:update # updates all the existing indices
|
|
38
|
+
rake chewy:update[users] # updates UsersIndex only
|
|
39
|
+
rake chewy:update[users,cities] # updates UsersIndex and CitiesIndex
|
|
40
|
+
rake chewy:update[-users,cities] # updates every index in the application except UsersIndex and CitiesIndex
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## `chewy:sync`
|
|
44
|
+
|
|
45
|
+
Provides a way to synchronize outdated indexes with the source quickly and without doing a full reset. By default field `updated_at` is used to find outdated records, but this could be customized by `outdated_sync_field` as described at [Chewy::Index::Syncer](../lib/chewy/index/syncer.rb).
|
|
46
|
+
|
|
47
|
+
Arguments are similar to the ones taken by `chewy:update` task.
|
|
48
|
+
|
|
49
|
+
See [Chewy::Index::Syncer](../lib/chewy/index/syncer.rb) for more details.
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
rake chewy:sync # synchronizes all the existing indices
|
|
53
|
+
rake chewy:sync[users] # synchronizes UsersIndex only
|
|
54
|
+
rake chewy:sync[users,cities] # synchronizes UsersIndex and CitiesIndex
|
|
55
|
+
rake chewy:sync[-users,cities] # synchronizes every index in the application except UsersIndex and CitiesIndex
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## `chewy:deploy`
|
|
59
|
+
|
|
60
|
+
This rake task is especially useful during the production deploy. It is a combination of `chewy:upgrade` and `chewy:sync` and the latter is called only for the indexes that were not reset during the first stage.
|
|
61
|
+
|
|
62
|
+
It is not possible to specify any particular indexes for this task as it doesn't make much sense.
|
|
63
|
+
|
|
64
|
+
Right now the approach is that if some data had been updated, but index definition was not changed (no changes satisfying the synchronization algorithm were done), it would be much faster to perform manual partial index update inside data migrations or even manually after the deploy.
|
|
65
|
+
|
|
66
|
+
Also, there is always full reset alternative with `rake chewy:reset`. See [configuration.md](configuration.md#index-update-strategies) for how update strategies interact with deployment.
|
|
67
|
+
|
|
68
|
+
## `chewy:create_missing_indexes`
|
|
69
|
+
|
|
70
|
+
This rake task creates newly defined indexes in Elasticsearch and skips existing ones. Useful for production-like environments.
|
|
71
|
+
|
|
72
|
+
## Parallelizing rake tasks
|
|
73
|
+
|
|
74
|
+
Every task described above has its own parallel version. Every parallel rake task takes the number for processes for execution as the first argument and the rest of the arguments are exactly the same as for the non-parallel task version.
|
|
75
|
+
|
|
76
|
+
[https://github.com/grosser/parallel](https://github.com/grosser/parallel) gem is required to use these tasks.
|
|
77
|
+
|
|
78
|
+
If the number of processes is not specified explicitly - `parallel` gem tries to automatically derive the number of processes to use.
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
rake chewy:parallel:reset
|
|
82
|
+
rake chewy:parallel:upgrade[4]
|
|
83
|
+
rake chewy:parallel:update[4,cities]
|
|
84
|
+
rake chewy:parallel:sync[4,-users]
|
|
85
|
+
rake chewy:parallel:deploy[4] # performs parallel upgrade and parallel sync afterwards
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## `chewy:journal`
|
|
89
|
+
|
|
90
|
+
This namespace contains two tasks for the journal manipulations: `chewy:journal:apply` and `chewy:journal:clean`. Both are taking time as the first argument (optional for clean) and a list of indexes exactly as the tasks above. Time can be in any format parsable by ActiveSupport.
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
rake chewy:journal:apply["$(date -v-1H -u +%FT%TZ)"] # apply journaled changes for the past hour
|
|
94
|
+
rake chewy:journal:apply["$(date -v-1H -u +%FT%TZ)",users] # apply journaled changes for the past hour on UsersIndex only
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
See [import.md](import.md#journaling) for how journaling works and how to enable it.
|
|
98
|
+
|
|
99
|
+
When the size of the journal becomes very large, the classical way of deletion would be obstructive and resource consuming. Fortunately, Chewy internally uses [delete-by-query](https://www.elastic.co/guide/en/elasticsearch/reference/7.17/docs-delete-by-query.html#docs-delete-by-query-task-api) ES function which supports async execution with batching and [throttling](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-delete-by-query.html#docs-delete-by-query-throttle).
|
|
100
|
+
|
|
101
|
+
The available options, which can be set by ENV variables, are listed below:
|
|
102
|
+
* `WAIT_FOR_COMPLETION` - a boolean flag. It controls async execution. It waits by default. When set to `false` (`0`, `f`, `false` or `off` in any case spelling is accepted as `false`), Elasticsearch performs some preflight checks, launches the request, and returns a task reference you can use to cancel the task or get its status.
|
|
103
|
+
* `REQUESTS_PER_SECOND` - float. The throttle for this request in sub-requests per second. No throttling is enforced by default.
|
|
104
|
+
* `SCROLL_SIZE` - integer. The number of documents to be deleted in single sub-request. The default batch size is 1000.
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
rake chewy:journal:clean WAIT_FOR_COMPLETION=false REQUESTS_PER_SECOND=10 SCROLL_SIZE=5000
|
|
108
|
+
```
|
data/docs/testing.md
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# Testing
|
|
2
|
+
|
|
3
|
+
## RSpec integration
|
|
4
|
+
|
|
5
|
+
Just add `require 'chewy/rspec'` to your spec_helper.rb and you will get additional features:
|
|
6
|
+
|
|
7
|
+
[update_index](../lib/chewy/rspec/update_index.rb) helper
|
|
8
|
+
`mock_elasticsearch_response` helper to mock elasticsearch response
|
|
9
|
+
`mock_elasticsearch_response_sources` helper to mock elasticsearch response sources
|
|
10
|
+
`build_query` matcher to compare request and expected query (returns `true`/`false`)
|
|
11
|
+
|
|
12
|
+
To use `mock_elasticsearch_response` and `mock_elasticsearch_response_sources` helpers add `include Chewy::Rspec::Helpers` to your tests.
|
|
13
|
+
|
|
14
|
+
See [chewy/rspec/](../lib/chewy/rspec/) for more details.
|
|
15
|
+
|
|
16
|
+
## Minitest integration
|
|
17
|
+
|
|
18
|
+
Add `require 'chewy/minitest'` to your test_helper.rb, and then for tests which you'd like indexing test hooks, `include Chewy::Minitest::Helpers`.
|
|
19
|
+
|
|
20
|
+
You can set the `:bypass` strategy for test suites and manually handle imports and flush test indices using `Chewy.massacre`. This will help reduce unnecessary ES requests.
|
|
21
|
+
|
|
22
|
+
But if you require chewy to index/update model regularly in your test suite then you can specify `:urgent` strategy for documents indexing. Add `Chewy.strategy(:urgent)` to test_helper.rb.
|
|
23
|
+
|
|
24
|
+
Also, you can use additional helpers:
|
|
25
|
+
|
|
26
|
+
`mock_elasticsearch_response` to mock elasticsearch response
|
|
27
|
+
`mock_elasticsearch_response_sources` to mock elasticsearch response sources
|
|
28
|
+
`assert_elasticsearch_query` to compare request and expected query (returns `true`/`false`)
|
|
29
|
+
|
|
30
|
+
See [chewy/minitest/](../lib/chewy/minitest/) for more details.
|
|
31
|
+
|
|
32
|
+
## DatabaseCleaner
|
|
33
|
+
|
|
34
|
+
If you use `DatabaseCleaner` in your tests with [the `transaction` strategy](https://github.com/DatabaseCleaner/database_cleaner#how-to-use), you may run into the problem that `ActiveRecord`'s models are not indexed automatically on save despite the fact that you set the callbacks to do this with the `update_index` method. The issue arises because `chewy` indices data on `after_commit` run as default, but all `after_commit` callbacks are not run with the `DatabaseCleaner`'s' `transaction` strategy. You can solve this issue by changing the `Chewy.use_after_commit_callbacks` option. Just add the following initializer in your Rails application:
|
|
35
|
+
|
|
36
|
+
```ruby
|
|
37
|
+
#config/initializers/chewy.rb
|
|
38
|
+
Chewy.use_after_commit_callbacks = !Rails.env.test?
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
If you're seeing other unexpected behavior in tests, check [troubleshooting.md](troubleshooting.md) for common issues and debugging tips.
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# Troubleshooting
|
|
2
|
+
|
|
3
|
+
## `UndefinedUpdateStrategy` error
|
|
4
|
+
|
|
5
|
+
This is the most common Chewy error. When you save a model that has an `update_index` callback and no update strategy is active, Chewy raises `Chewy::UndefinedUpdateStrategy`:
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
Index update strategy is undefined for current context.
|
|
9
|
+
Please wrap your code with `Chewy.strategy(:strategy_name)` block.
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
**Fix:** wrap the code that triggers the save in a strategy block:
|
|
13
|
+
|
|
14
|
+
```ruby
|
|
15
|
+
Chewy.strategy(:atomic) do
|
|
16
|
+
city.save!
|
|
17
|
+
end
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
In a Rails app, controller actions already use the `:atomic` strategy by default. This error typically appears in background jobs, rake tasks, or console sessions. For console use, you can set `:urgent` as a persistent strategy:
|
|
21
|
+
|
|
22
|
+
```ruby
|
|
23
|
+
Chewy.strategy(:urgent)
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
If you want to suppress index updates entirely (e.g. in tests or migrations), use `:bypass`:
|
|
27
|
+
|
|
28
|
+
```ruby
|
|
29
|
+
Chewy.root_strategy = :bypass
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
See [configuration.md](configuration.md#index-update-strategies) for the full list of strategies.
|
|
33
|
+
|
|
34
|
+
## Elasticsearch 8 security defaults
|
|
35
|
+
|
|
36
|
+
Elasticsearch 8 enables security (TLS + authentication) by default. If you see connection refused or authentication errors after upgrading, you need to configure credentials and the CA certificate. See the [Security section](../README.md#security) in the main README for setup instructions.
|
|
37
|
+
|
|
38
|
+
## Wildcard index deletion disabled in ES 8
|
|
39
|
+
|
|
40
|
+
Starting from Elasticsearch 8, wildcard deletion of indices is disabled by default. If `Chewy.massacre` or other bulk-delete operations fail with a `Chewy::FeatureDisabled` error, you need to set the cluster setting `action.destructive_requires_name` to `false`:
|
|
41
|
+
|
|
42
|
+
```
|
|
43
|
+
PUT _cluster/settings
|
|
44
|
+
{ "persistent": { "action.destructive_requires_name": false } }
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Import errors and debugging
|
|
48
|
+
|
|
49
|
+
When using `import!` (with a bang), Chewy raises `Chewy::ImportFailed` if any documents fail to index. The error message groups failures by action type (index, delete) and includes the document IDs:
|
|
50
|
+
|
|
51
|
+
```
|
|
52
|
+
Import failed for `ProductsIndex` with:
|
|
53
|
+
Index errors:
|
|
54
|
+
`mapper_parsing_exception`
|
|
55
|
+
on 3 documents: ["1", "2", "3"]
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
For non-bang `import`, errors are silently swallowed. To debug import issues, set up a logger:
|
|
59
|
+
|
|
60
|
+
```ruby
|
|
61
|
+
Chewy.logger = Logger.new(STDOUT)
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
You can also subscribe to `import_objects.chewy` notifications — see [configuration.md](configuration.md#activesupportnotifications-support) for the payload format.
|
|
65
|
+
|
|
66
|
+
## Import scope cleanup warnings
|
|
67
|
+
|
|
68
|
+
When an `index_scope` includes `order`, `limit`, or `offset`, Chewy strips them before importing (they don't make sense for batch processing). By default this logs a warning. If you see unexpected warnings during import, you can control this via:
|
|
69
|
+
|
|
70
|
+
```ruby
|
|
71
|
+
Chewy.import_scope_cleanup_behavior = :ignore # no warning
|
|
72
|
+
Chewy.import_scope_cleanup_behavior = :raise # raise Chewy::ImportScopeCleanupError
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
See [configuration.md](configuration.md#import-scope-clean-up-behavior) for details.
|
|
76
|
+
|
|
77
|
+
## Missing optional dependencies
|
|
78
|
+
|
|
79
|
+
Some Chewy features require additional gems that are not listed as hard dependencies:
|
|
80
|
+
|
|
81
|
+
- **`parallel`** — required for `chewy:parallel:*` rake tasks. Install it with `gem 'parallel'` in your Gemfile.
|
|
82
|
+
- **`method_source`** — required for the Witchcraft technology (compiled value procs). Install it with `gem 'method_source'`.
|
|
83
|
+
|
|
84
|
+
If these gems are missing you'll get a `LoadError` when the relevant feature is used.
|
|
85
|
+
|
|
86
|
+
## Pre-request filter
|
|
87
|
+
|
|
88
|
+
Should you need to inspect the query prior to it being dispatched to Elasticsearch during any queries, you can use the `before_es_request_filter`. `before_es_request_filter` is a callable object, as demonstrated below:
|
|
89
|
+
|
|
90
|
+
```ruby
|
|
91
|
+
Chewy.before_es_request_filter = -> (method_name, args, kw_args) { ... }
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
While using the `before_es_request_filter`, please consider the following:
|
|
95
|
+
|
|
96
|
+
* `before_es_request_filter` acts as a simple proxy before any request made via the `Elasticsearch::Client`. The arguments passed to this filter include:
|
|
97
|
+
* `method_name` — the name of the method being called (e.g. search, count, bulk).
|
|
98
|
+
* `args` and `kw_args` — the positional and keyword arguments provided in the method call.
|
|
99
|
+
* The operation is synchronous, so avoid executing any heavy or time-consuming operations within the filter to prevent performance degradation.
|
|
100
|
+
* The return value of the proc is disregarded. This filter is intended for inspection or modification of the query rather than generating a response.
|
|
101
|
+
* Any exception raised inside the callback will propagate upward and halt the execution of the query. It is essential to handle potential errors adequately to ensure the stability of your search functionality.
|
data/gemfiles/base.gemfile
CHANGED
|
@@ -6,7 +6,7 @@ gem 'redis', require: false
|
|
|
6
6
|
gem 'rspec', '>= 3.7.0'
|
|
7
7
|
gem 'rspec-collection_matchers'
|
|
8
8
|
gem 'rspec-its'
|
|
9
|
-
gem 'rubocop', '1.
|
|
10
|
-
gem 'sqlite3', '~> 1
|
|
9
|
+
gem 'rubocop', '1.84.2'
|
|
10
|
+
gem 'sqlite3', '~> 2.1'
|
|
11
11
|
gem 'timecop'
|
|
12
|
-
gem 'unparser'
|
|
12
|
+
gem 'unparser', '~> 0.6.15'
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
source 'https://rubygems.org'
|
|
2
2
|
|
|
3
|
-
gem 'activejob', '~>
|
|
4
|
-
gem 'activerecord', '~>
|
|
5
|
-
gem 'activesupport', '~>
|
|
3
|
+
gem 'activejob', '~> 7.2.0'
|
|
4
|
+
gem 'activerecord', '~> 7.2.0'
|
|
5
|
+
gem 'activesupport', '~> 7.2.0'
|
|
6
6
|
gem 'kaminari-core', '~> 1.1.0', require: false
|
|
7
7
|
gem 'parallel', require: false
|
|
8
8
|
gem 'rspec_junit_formatter', '~> 0.4.1'
|