chewy 0.7.0 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rspec +0 -1
- data/.travis.yml +2 -2
- data/Appraisals +6 -2
- data/CHANGELOG.md +29 -1
- data/Gemfile +4 -0
- data/README.md +137 -19
- data/chewy.gemspec +1 -0
- data/gemfiles/rails.3.2.activerecord.gemfile +2 -0
- data/gemfiles/rails.3.2.activerecord.kaminari.gemfile +1 -1
- data/gemfiles/rails.4.0.activerecord.gemfile +2 -0
- data/gemfiles/rails.4.0.activerecord.kaminari.gemfile +1 -1
- data/gemfiles/rails.4.0.mongoid.gemfile +2 -0
- data/gemfiles/rails.4.0.mongoid.kaminari.gemfile +1 -1
- data/gemfiles/rails.4.1.activerecord.gemfile +2 -0
- data/gemfiles/rails.4.1.activerecord.kaminari.gemfile +1 -1
- data/gemfiles/rails.4.1.mongoid.gemfile +2 -0
- data/gemfiles/rails.4.1.mongoid.kaminari.gemfile +1 -1
- data/gemfiles/rails.4.2.activerecord.gemfile +2 -0
- data/gemfiles/rails.4.2.activerecord.kaminari.gemfile +1 -1
- data/gemfiles/rails.4.2.mongoid.gemfile +2 -0
- data/gemfiles/rails.4.2.mongoid.kaminari.gemfile +1 -1
- data/lib/chewy.rb +1 -2
- data/lib/chewy/config.rb +3 -3
- data/lib/chewy/fields/base.rb +27 -30
- data/lib/chewy/fields/root.rb +9 -19
- data/lib/chewy/query.rb +34 -1
- data/lib/chewy/railtie.rb +1 -0
- data/lib/chewy/rspec/update_index.rb +16 -6
- data/lib/chewy/strategy.rb +12 -0
- data/lib/chewy/strategy/atomic.rb +1 -1
- data/lib/chewy/strategy/resque.rb +26 -0
- data/lib/chewy/strategy/sidekiq.rb +26 -0
- data/lib/chewy/strategy/urgent.rb +1 -1
- data/lib/chewy/type.rb +2 -0
- data/lib/chewy/type/adapter/active_record.rb +7 -3
- data/lib/chewy/type/adapter/mongoid.rb +5 -0
- data/lib/chewy/type/adapter/orm.rb +1 -1
- data/lib/chewy/type/crutch.rb +31 -0
- data/lib/chewy/type/import.rb +7 -6
- data/lib/chewy/type/mapping.rb +7 -3
- data/lib/chewy/type/observe.rb +24 -35
- data/lib/chewy/version.rb +1 -1
- data/spec/chewy/fields/base_spec.rb +26 -19
- data/spec/chewy/query_spec.rb +13 -0
- data/spec/chewy/runtime_spec.rb +1 -1
- data/spec/chewy/strategy/resque_spec.rb +35 -0
- data/spec/chewy/strategy/sidekiq_spec.rb +35 -0
- data/spec/chewy/type/adapter/mongoid_spec.rb +18 -9
- data/spec/chewy/type/mapping_spec.rb +14 -9
- data/spec/chewy/type/observe_spec.rb +22 -7
- data/spec/spec_helper.rb +1 -0
- metadata +23 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8eb12f365d07168ff5e5f52511029a07a5eccda1
|
4
|
+
data.tar.gz: aae1608aa3438ea0bc28ed43bf65ccde19e2bc8b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2db0c6c51c6fc9061c5e751e2759213723b5a2524c4443e7688bd792b81615104d6f5ba698b5de33a0c91030045666d20957255a13e3d706c8f76b68bf7d4174
|
7
|
+
data.tar.gz: bab3cb005e8f0264022c39ecaa4d04b81b9dbccfc4de7fd28c637250746eb18ffea1d6cef89c2dcc7c09f4a4dccb0120a89e7bf15be1e1bf769dc85c10b5b4f1
|
data/.gitignore
CHANGED
data/.rspec
CHANGED
data/.travis.yml
CHANGED
@@ -37,6 +37,6 @@ matrix:
|
|
37
37
|
- rvm: 2.2.0
|
38
38
|
gemfile: gemfiles/rails.3.2.activerecord.will_paginate.gemfile
|
39
39
|
before_install:
|
40
|
-
- curl -# https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.
|
40
|
+
- curl -# https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.5.0.tar.gz | tar xz -C /tmp
|
41
41
|
before_script:
|
42
|
-
- TEST_CLUSTER_COMMAND="/tmp/elasticsearch-1.
|
42
|
+
- TEST_CLUSTER_COMMAND="/tmp/elasticsearch-1.5.0/bin/elasticsearch" rake elasticsearch:start
|
data/Appraisals
CHANGED
@@ -2,12 +2,14 @@
|
|
2
2
|
appraise "rails.#{version}.activerecord" do
|
3
3
|
gem 'activerecord', "~> #{version}.0"
|
4
4
|
gem 'activesupport', "~> #{version}.0"
|
5
|
+
gem 'resque', require: false
|
6
|
+
gem 'sidekiq', require: false
|
5
7
|
end
|
6
8
|
|
7
9
|
appraise "rails.#{version}.activerecord.kaminari" do
|
8
10
|
gem 'activerecord', "~> #{version}.0"
|
9
11
|
gem 'activesupport', "~> #{version}.0"
|
10
|
-
gem 'kaminari', require: false
|
12
|
+
gem 'kaminari', '0.16.3', require: false
|
11
13
|
end
|
12
14
|
|
13
15
|
appraise "rails.#{version}.activerecord.will_paginate" do
|
@@ -21,12 +23,14 @@ end
|
|
21
23
|
appraise "rails.#{version}.mongoid" do
|
22
24
|
gem 'mongoid', '~> 4.0.0'
|
23
25
|
gem 'activesupport', "~> #{version}.0"
|
26
|
+
gem 'resque', require: false
|
27
|
+
gem 'sidekiq', require: false
|
24
28
|
end
|
25
29
|
|
26
30
|
appraise "rails.#{version}.mongoid.kaminari" do
|
27
31
|
gem 'mongoid', '~> 4.0.0'
|
28
32
|
gem 'activesupport', "~> #{version}.0"
|
29
|
-
gem 'kaminari', require: false
|
33
|
+
gem 'kaminari', '0.16.3', require: false
|
30
34
|
end
|
31
35
|
|
32
36
|
appraise "rails.#{version}.mongoid.will_paginate" do
|
data/CHANGELOG.md
CHANGED
@@ -1,9 +1,37 @@
|
|
1
1
|
# master
|
2
2
|
|
3
|
+
# Version 0.8.0
|
4
|
+
|
5
|
+
## Incompatible changes:
|
6
|
+
|
7
|
+
* `:atomic` and `:urgent` strategies are using `import!` method raising exceptions
|
8
|
+
|
9
|
+
## Changes
|
10
|
+
|
11
|
+
* Crutches™ technology
|
12
|
+
|
13
|
+
* Added `.script_fields` chainable method to query (@ka8725)
|
14
|
+
|
15
|
+
* `update_index` mocha support (@lardawge)
|
16
|
+
|
17
|
+
* `:resque` async strategy
|
18
|
+
|
19
|
+
* `:sidekiq` async strategy (inspired by @sharkzp)
|
20
|
+
|
21
|
+
* Added `Query#search_type` for `search_type` request option setup (@marshall-lee)
|
22
|
+
|
23
|
+
## Bugfixes
|
24
|
+
|
25
|
+
* Rails 4.2 migrations are not raising UndefinedUpdateStrategy anymore on data updates
|
26
|
+
|
27
|
+
* Mongoid random failing specs fixes (@marshall-lee)
|
28
|
+
|
29
|
+
# Version 0.7.0
|
30
|
+
|
3
31
|
## Incompatible changes:
|
4
32
|
|
5
33
|
* `Chewy.use_after_commit_callbacks = false` returns previous RDBMS behavior
|
6
|
-
in tests
|
34
|
+
in tests
|
7
35
|
|
8
36
|
* ActiveRecord import is now called after_commit instead of after_save and after_destroy
|
9
37
|
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
[![Gem Version](https://badge.fury.io/rb/chewy.svg)](http://badge.fury.io/rb/chewy)
|
1
2
|
[![Build Status](https://travis-ci.org/toptal/chewy.png)](https://travis-ci.org/toptal/chewy)
|
2
3
|
[![Code Climate](https://codeclimate.com/github/toptal/chewy.png)](https://codeclimate.com/github/toptal/chewy)
|
3
4
|
[![Inline docs](http://inch-ci.org/github/toptal/chewy.svg?branch=master)](http://inch-ci.org/github/toptal/chewy)
|
@@ -195,6 +196,86 @@ See [config.rb](lib/chewy/config.rb) for more details.
|
|
195
196
|
update_index('users#user', :users)
|
196
197
|
```
|
197
198
|
|
199
|
+
In case of belongs_to association you may need to update both associated objects, previous and current:
|
200
|
+
|
201
|
+
```ruby
|
202
|
+
class City < ActiveRecord::Base
|
203
|
+
belongs_to :country
|
204
|
+
|
205
|
+
update_index('cities#city') { self }
|
206
|
+
update_index 'countries#country' do
|
207
|
+
# For the latest active_record changed values are
|
208
|
+
# already in `previous_changes` hash,
|
209
|
+
# but for mongoid you have to use `changes` hash
|
210
|
+
previous_changes['country_id'] || country
|
211
|
+
end
|
212
|
+
end
|
213
|
+
```
|
214
|
+
|
215
|
+
### Crutches™ technology
|
216
|
+
|
217
|
+
Assume you are defining index like this (product has_many categories through product_categories):
|
218
|
+
|
219
|
+
```ruby
|
220
|
+
class ProductsIndex < Chewy::Index
|
221
|
+
define_type Product.includes(:categories) do
|
222
|
+
field :name
|
223
|
+
field :category_names, value: ->(product) { product.categories.map(&:name) } # or shorter just -> { categories.map(&:name) }
|
224
|
+
end
|
225
|
+
end
|
226
|
+
```
|
227
|
+
|
228
|
+
Then chewy reindexing flow would be look like following pseudo-code (even in mongoid):
|
229
|
+
|
230
|
+
```ruby
|
231
|
+
Product.includes(:categories).find_in_batches(1000) do |batch|
|
232
|
+
bulk_body = batch.map do |object|
|
233
|
+
{name: object.name, category_names: object.categories.map(&:name)}.to_json
|
234
|
+
end
|
235
|
+
# here we are sending every batch of data to ES
|
236
|
+
Chewy.client.bulk bulk_body
|
237
|
+
end
|
238
|
+
```
|
239
|
+
|
240
|
+
But in rails 4.1 and 4.2 you may face with slow associations problem (take a look on https://github.com/rails/rails/pull/19423) also, there might be really complicated cases when associations are not applicable.
|
241
|
+
|
242
|
+
Then you are able to replace rails associations with Chewy Crutches™ technology:
|
243
|
+
|
244
|
+
```ruby
|
245
|
+
class ProductsIndex < Chewy::Index
|
246
|
+
define_type Product.includes(:categories) do
|
247
|
+
crutch :categories do |collection| # collection here is a current batch of products
|
248
|
+
# data is fetched with a lightweight query without objects initialization
|
249
|
+
data = ProductCategory.joins(:category).where(product_id: collection.map(&:id)).pluck(:product_id, 'categories.name')
|
250
|
+
# then we have to convert fetched data to appropriate format
|
251
|
+
# this will return our data in structure like:
|
252
|
+
# {123 => ['seweets', 'juices'], 456 => ['meat']}
|
253
|
+
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
|
254
|
+
end
|
255
|
+
|
256
|
+
field :name
|
257
|
+
# simply use crutch-fetched data as a value:
|
258
|
+
field :category_names, value: ->(product, crutches) { crutches.categories[product.id] }
|
259
|
+
end
|
260
|
+
end
|
261
|
+
```
|
262
|
+
|
263
|
+
And example flow would be look like this:
|
264
|
+
|
265
|
+
```ruby
|
266
|
+
Product.includes(:categories).find_in_batches(1000) do |batch|
|
267
|
+
crutches[:categories] = ProductCategory.joins(:category).where(product_id: batch.map(&:id)).pluck(:product_id, 'categories.name')
|
268
|
+
.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
|
269
|
+
|
270
|
+
bulk_body = batch.map do |object|
|
271
|
+
{name: object.name, category_names: crutches[:categories][object.id]}.to_json
|
272
|
+
end
|
273
|
+
Chewy.client.bulk bulk_body
|
274
|
+
end
|
275
|
+
```
|
276
|
+
|
277
|
+
So Chewy Crutches™ technology is able to increase your indexing performance in some cases up to 100 times or even more depending on your associations complexity.
|
278
|
+
|
198
279
|
### Types access
|
199
280
|
|
200
281
|
You are able to access index-defined types with the following API:
|
@@ -280,6 +361,29 @@ Using this strategy delays index update request until the end of
|
|
280
361
|
block. Updated records are aggregated and index update happens with
|
281
362
|
bulk API. So this strategy is highly optimized.
|
282
363
|
|
364
|
+
#### `:resque`
|
365
|
+
|
366
|
+
Does the same thing as `:atomic`, but in async way using resque.
|
367
|
+
Default queue name is `chewy`.
|
368
|
+
Patch `Chewy::Strategy::Resque::Worker` for index updates improving.
|
369
|
+
|
370
|
+
```ruby
|
371
|
+
Chewy.strategy(:resque) do
|
372
|
+
City.popular.map(&:do_some_update_action!)
|
373
|
+
end
|
374
|
+
```
|
375
|
+
|
376
|
+
#### `:sidekiq`
|
377
|
+
|
378
|
+
Does the same thing as `:atomic`, but in async way using sidekiq.
|
379
|
+
Patch `Chewy::Strategy::Sidekiq::Worker` for index updates improving.
|
380
|
+
|
381
|
+
```ruby
|
382
|
+
Chewy.strategy(:sidekiq) do
|
383
|
+
City.popular.map(&:do_some_update_action!)
|
384
|
+
end
|
385
|
+
```
|
386
|
+
|
283
387
|
#### `:urgent`
|
284
388
|
|
285
389
|
Next strategy is convenient if you are going to update documents in
|
@@ -339,22 +443,12 @@ city3.do_update! # index updated again
|
|
339
443
|
|
340
444
|
#### Designing own strategies
|
341
445
|
|
342
|
-
Async strategy is not implemented yet, but it is planned. So
|
343
|
-
it would be a good idea to implements own async strategy for
|
344
|
-
particular delayed jobs library or simply threads.
|
345
|
-
|
346
446
|
See [strategy/base.rb](lib/chewy/strategy/base.rb) for more details.
|
347
447
|
See [strategy/atomic.rb](lib/chewy/strategy/atomic.rb) for example.
|
348
448
|
|
349
|
-
### Async reindexing
|
350
|
-
|
351
|
-
Chewy is not support async index update, but it's planned. Until you can use third-party solutions, such as [https://github.com/averell23/chewy_kiqqer](https://github.com/averell23/chewy_kiqqer)
|
352
|
-
|
353
|
-
Not sure it works currently.
|
354
|
-
|
355
449
|
### Rails application strategies integration
|
356
450
|
|
357
|
-
There is a couple of pre-defined strategies for your rails application. At first, rails console uses `:urgent` strategy by default, except the sandbox case.
|
451
|
+
There is a couple of pre-defined strategies for your rails application. At first, rails console uses `:urgent` strategy by default, except the sandbox case. When you are running sandbox it switches to `bypass` strategy to avoid index polluting.
|
358
452
|
|
359
453
|
Also migrations are wrapped with `:bypass` strategy. Because the main behavor implies that indexes are resetted after migration, so there is no need for extra index updates.
|
360
454
|
Also indexing might be broken during migrations because of the outdated schema.
|
@@ -688,7 +782,7 @@ Compliance cheatsheet for filters and DSL expressions:
|
|
688
782
|
|
689
783
|
```json
|
690
784
|
{"has_child": {"type": "blog_tag", "query": {"term": {"tag": "something"}}}
|
691
|
-
{"has_child": {"type": "comment", "
|
785
|
+
{"has_child": {"type": "comment", "filter": {"term": {"user": "john"}}}
|
692
786
|
```
|
693
787
|
|
694
788
|
```ruby
|
@@ -728,6 +822,23 @@ When the response comes back, it will have the ```:facets``` sidechannel include
|
|
728
822
|
< { ... ,"facets":{"countries":{"_type":"terms","missing":?,"total":?,"other":?,"terms":[{"term":"USA","count":?},{"term":"Brazil","count":?}, ...}}
|
729
823
|
```
|
730
824
|
|
825
|
+
### Script fields
|
826
|
+
|
827
|
+
Script fields allow to execute elasticsearch's scripting language such as groovy, javascript and etc. More about supported languages and what is scripting [here](https://www.elastic.co/guide/en/elasticsearch/reference/0.90/modules-scripting.html). This feature allows to calculate distance between geo points, for example. This is how to use the DSL:
|
828
|
+
|
829
|
+
```ruby
|
830
|
+
UsersIndex.script_fields(
|
831
|
+
distance: {
|
832
|
+
params: {
|
833
|
+
lat: 37.569976,
|
834
|
+
lon: -122.351591
|
835
|
+
},
|
836
|
+
script: "doc['coordinates'].distanceInMiles(lat, lon)"
|
837
|
+
}
|
838
|
+
)
|
839
|
+
```
|
840
|
+
`coordinates` here is a field with `geo_point` type. There will be `distance` field for the index's model in the search result.
|
841
|
+
|
731
842
|
### Script scoring
|
732
843
|
|
733
844
|
Script scoring is used to score the search results. All scores are added to the search request and combined according to boost mode and score mode. This can be useful if, for example, a score function is computationally expensive and it is sufficient to compute the score on a filtered set of documents. For example, you might want to multiply the score by another numeric field in the doc:
|
@@ -833,27 +944,34 @@ end
|
|
833
944
|
Inside Rails application some index mantaining rake tasks are defined.
|
834
945
|
|
835
946
|
```bash
|
836
|
-
rake chewy:reset
|
837
|
-
rake chewy:reset #
|
838
|
-
rake chewy:reset[users] # resets UsersIndex
|
947
|
+
rake chewy:reset # resets all the existing indexes, declared in app/chewy
|
948
|
+
rake chewy:reset[users] # resets UsersIndex only
|
839
949
|
|
840
|
-
rake chewy:update
|
841
|
-
rake chewy:update #
|
842
|
-
rake chewy:update[users] # updates UsersIndex
|
950
|
+
rake chewy:update # updates all the existing indexes, declared in app/chewy
|
951
|
+
rake chewy:update[users] # updates UsersIndex only
|
843
952
|
```
|
844
953
|
|
954
|
+
Also `rake chewy:reset` performs zero-downtime reindexing as described here: https://www.elastic.co/blog/changing-mapping-with-zero-downtime. So basically rake task creates new index with uniq suffix and then simply aliases it to the common index name. Previous index is deleted afterwards (see `Chewy::Index.reset!` for more details).
|
955
|
+
|
956
|
+
|
845
957
|
### Rspec integration
|
846
958
|
|
847
959
|
Just add `require 'chewy/rspec'` to your spec_helper.rb and you will get additional features:
|
848
960
|
See [update_index.rb](lib/chewy/rspec/update_index.rb) for more details.
|
849
961
|
|
962
|
+
If you use `DatabaseCleaner` in your tests with `transaction` (strategy)[https://github.com/DatabaseCleaner/database_cleaner#how-to-use] you may run into the problem that `ActiveRecord`'s models are not indexed automatically on save them despite of the fact that you set the callbacks to do this with the `update_index` method. The issue arises because `chewy` indexes data on `after_commit` run as default but all `after_commit` callbacks are not run with the `DatabaseCleaner`'s' `transaction` strategy. You can solve the issue by changing the `Chewy.use_after_commit_callbacks` option. Just add the following initializer in your Rails application:
|
963
|
+
|
964
|
+
```ruby
|
965
|
+
#config/initializers/chewy.rb
|
966
|
+
Chewy.use_after_commit_callbacks = !Rails.env.test?
|
967
|
+
```
|
968
|
+
|
850
969
|
## TODO a.k.a coming soon:
|
851
970
|
|
852
971
|
* Typecasting support
|
853
972
|
* Advanced (simplyfied) query DSL: `UsersIndex.query { email == 'my@gmail.com' }` will produce term query
|
854
973
|
* update_all support
|
855
974
|
* Maybe, closer ORM/ODM integration, creating index classes implicitly
|
856
|
-
* Async indexes updating
|
857
975
|
|
858
976
|
## Contributing
|
859
977
|
|
data/chewy.gemspec
CHANGED
@@ -26,6 +26,7 @@ Gem::Specification.new do |spec|
|
|
26
26
|
spec.add_development_dependency 'sqlite3'
|
27
27
|
spec.add_development_dependency 'database_cleaner', '~> 1.3.0'
|
28
28
|
spec.add_development_dependency 'elasticsearch-extensions'
|
29
|
+
spec.add_development_dependency 'resque_spec'
|
29
30
|
spec.add_development_dependency 'rubysl', '~> 2.0' if RUBY_ENGINE == 'rbx'
|
30
31
|
|
31
32
|
spec.add_dependency 'activesupport', '>= 3.2'
|