identity_cache 1.0.1 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +75 -9
- data/.gitignore +1 -0
- data/.rubocop.yml +1 -1
- data/CAVEATS.md +25 -0
- data/CHANGELOG.md +43 -23
- data/Gemfile +2 -2
- data/LICENSE +1 -1
- data/README.md +25 -5
- data/dev.yml +4 -1
- data/gemfiles/Gemfile.latest-release +2 -0
- data/gemfiles/Gemfile.min-supported +7 -0
- data/gemfiles/Gemfile.rails-edge +1 -0
- data/identity_cache.gemspec +6 -3
- data/lib/identity_cache.rb +30 -4
- data/lib/identity_cache/cache_invalidation.rb +1 -1
- data/lib/identity_cache/cache_key_generation.rb +1 -1
- data/lib/identity_cache/cached/belongs_to.rb +21 -14
- data/lib/identity_cache/cached/prefetcher.rb +12 -2
- data/lib/identity_cache/cached/primary_index.rb +0 -1
- data/lib/identity_cache/cached/recursive/association.rb +53 -12
- data/lib/identity_cache/cached/reference/has_many.rb +2 -2
- data/lib/identity_cache/cached/reference/has_one.rb +2 -2
- data/lib/identity_cache/mem_cache_store_cas.rb +53 -0
- data/lib/identity_cache/memoized_cache_proxy.rb +19 -9
- data/lib/identity_cache/parent_model_expiration.rb +3 -2
- data/lib/identity_cache/query_api.rb +26 -66
- data/lib/identity_cache/version.rb +1 -1
- data/performance/cache_runner.rb +0 -42
- data/performance/cpu.rb +1 -1
- metadata +28 -12
- data/.travis.yml +0 -45
- data/gemfiles/Gemfile.rails52 +0 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5196973719eb77354c40188d33d291b31ac94b23311bd78ba263e1e9c973e1f1
|
4
|
+
data.tar.gz: ce1be3c774f20c56b288272b9d30ce4abcb93d60536dedb1a232b566de92e3df
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4e1bfc8ec934bedf3ea3c451b87b903b8c373c033ab062535cd1e2abcb75cf0da508887440e1d5024a492ea9cb6f34e26aac5b18c53a9a21af96c072bbbaf7fe
|
7
|
+
data.tar.gz: e3105dbebe6f2da6f25e13f05a45edd53182bbe8058ac8dbdfe800a6981d765bf7cbefb7a27caa4c21966c23a6630df0994f7f308dac70f61a35f43c801a7430
|
data/.github/workflows/ci.yml
CHANGED
@@ -1,26 +1,92 @@
|
|
1
1
|
name: CI
|
2
2
|
|
3
|
-
on:
|
3
|
+
on:
|
4
|
+
push: {}
|
5
|
+
pull_request:
|
6
|
+
types: [opened, synchronize]
|
4
7
|
|
5
8
|
jobs:
|
6
9
|
build:
|
10
|
+
if: github.event_name == 'push' || github.event.pull_request.head.repo.owner.login != 'Shopify'
|
7
11
|
|
8
12
|
runs-on: ubuntu-latest
|
9
13
|
|
14
|
+
strategy:
|
15
|
+
matrix:
|
16
|
+
entry:
|
17
|
+
- name: 'Minimum supported'
|
18
|
+
ruby: 2.5
|
19
|
+
gemfile: "Gemfile.min-supported"
|
20
|
+
- name: 'Latest released & run rubocop'
|
21
|
+
ruby: 2.7
|
22
|
+
gemfile: "Gemfile.latest-release"
|
23
|
+
rubocop: true
|
24
|
+
- name: 'Rails edge'
|
25
|
+
ruby: 2.7
|
26
|
+
gemfile: "Gemfile.rails-edge"
|
27
|
+
edge: true
|
28
|
+
|
29
|
+
name: ${{ matrix.entry.name }}
|
30
|
+
|
31
|
+
continue-on-error: ${{ matrix.entry.edge || false }}
|
32
|
+
|
33
|
+
env:
|
34
|
+
BUNDLE_GEMFILE: gemfiles/${{ matrix.entry.gemfile }}
|
35
|
+
|
36
|
+
services:
|
37
|
+
memcached:
|
38
|
+
image: memcached
|
39
|
+
ports:
|
40
|
+
- 11211:11211
|
41
|
+
mysql:
|
42
|
+
image: mysql:5.7
|
43
|
+
env:
|
44
|
+
MYSQL_ALLOW_EMPTY_PASSWORD: yes
|
45
|
+
MYSQL_DATABASE: identity_cache_test
|
46
|
+
options: >-
|
47
|
+
--health-cmd="mysqladmin ping"
|
48
|
+
--health-interval=10s
|
49
|
+
--health-timeout=5s
|
50
|
+
--health-retries=5
|
51
|
+
ports:
|
52
|
+
- 3306:3306
|
53
|
+
postgres:
|
54
|
+
image: postgres
|
55
|
+
env:
|
56
|
+
POSTGRES_PASSWORD: postgres
|
57
|
+
POSTGRES_DB: identity_cache_test
|
58
|
+
options: >-
|
59
|
+
--health-cmd pg_isready
|
60
|
+
--health-interval 10s
|
61
|
+
--health-timeout 5s
|
62
|
+
--health-retries 5
|
63
|
+
ports:
|
64
|
+
- 5432:5432
|
65
|
+
|
10
66
|
steps:
|
11
|
-
- uses: actions/checkout@v2
|
12
|
-
- name: Set up Ruby 2.6
|
13
|
-
uses: actions/setup-ruby@v1
|
14
|
-
with:
|
15
|
-
ruby-version: 2.6.x
|
16
67
|
- name: Install required packages
|
17
68
|
run: |
|
18
69
|
sudo apt-get update
|
19
|
-
sudo apt-get -y install libmysqlclient-dev libpq-dev libsasl2-dev
|
70
|
+
sudo apt-get -y install libmemcached-dev libmysqlclient-dev libpq-dev libsasl2-dev
|
71
|
+
- uses: actions/checkout@v2
|
72
|
+
- name: Set up Ruby
|
73
|
+
uses: actions/setup-ruby@v1
|
74
|
+
with:
|
75
|
+
ruby-version: ${{ matrix.entry.ruby }}
|
20
76
|
- name: Install bundler and gems
|
21
77
|
run: |
|
22
78
|
gem install bundler
|
23
79
|
bundle install --jobs 4 --retry 3
|
80
|
+
- name: Test with mysql
|
81
|
+
env:
|
82
|
+
DB: mysql2
|
83
|
+
run: bundle exec rake test
|
84
|
+
- name: Test with postgres and memcached_store
|
85
|
+
env:
|
86
|
+
DB: postgresql
|
87
|
+
POSTGRES_PASSWORD: postgres
|
88
|
+
ADAPTER: memcached
|
89
|
+
run: bundle exec rake test
|
24
90
|
- name: Run rubocop
|
25
|
-
|
26
|
-
|
91
|
+
if: matrix.entry.rubocop
|
92
|
+
run: bundle exec rubocop
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
data/CAVEATS.md
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
While IdentityCache allows to massively scale access to ActiveRecord objects, it's important to note what problems might arise with it over time and what are things that it's not solving in the current design.
|
2
|
+
|
3
|
+
## Large blobs
|
4
|
+
|
5
|
+
For [god objects](https://en.wikipedia.org/wiki/God_object) (like the Shop model at Shopify), it’s typical to have many associations. It’s appealing for developers to embed many of those associations into the god model’s IDC blob, such as `cache_has_many :images, embed: true`. As a side effect, over time, the god model’s IDC blob may grow very large. It gets even worse when some of the embedded records get larger themselves, and the total IDC blob gets into as much as a megabyte in size. This puts pressure on both the application, memcached, and the network fibre because this large blob needs to be serialized and unserialized each time and transferred over the network.
|
6
|
+
|
7
|
+
If the cache blob gets larger than the maximum cache value in memcached, then the cache fill would always fail, causing it to have to load all that data from the database each time.
|
8
|
+
|
9
|
+
### Overfetching
|
10
|
+
|
11
|
+
There's currently no way to retrieve/fill the cache with a subset of a model's associations. So, for models with many embedded associations, every `Model.fetch` would result in fetching all of the blob over the network and unserializing all embedded records, even if the client has only accessed one of those associations. That's a lot of useless CPU cycles and IO operations that happen on each access. It will also increase the network and memory bandwidth used on both the client and the server.
|
12
|
+
|
13
|
+
Another possible issue is that cache invalidations for a model can be amplified by the number of models that embed them. This can further aggravate the large cache blob problem by embedding an association that gets invalidated more frequently.
|
14
|
+
|
15
|
+
## Hot keys
|
16
|
+
|
17
|
+
To scale out memcache, it’s typical to arrange memcached instances in rings. Most memcache clients/proxies use [consistent hashing](https://github.com/facebook/mcrouter/wiki/Pools#hash-functions) to route the operation to one of the instances in the ring. This means that an IDC entry will always end up in the same memcached instance.
|
18
|
+
|
19
|
+
If the key associated with a record is on a hot codepath, that memcached instance will possibly experience saturation on the network and the number of bytes that it can send out. In cloud environments, this is a limit that is easy to hit. This especially aggravates the server-side large blob problem.
|
20
|
+
|
21
|
+
## Thundering herd
|
22
|
+
|
23
|
+
When a record gets updated, its IDC entry in memcache is expired, and it only gets populated the next time it is accessed. When many clients request the same key concurrently (e.g. on a hot codepath), they all find out that the IDC entry is missing at the same, and hence they will all go to the database to fill the IDC key at the same time. The massive number of clients hitting the DB will create a [thundering herd problem](https://en.wikipedia.org/wiki/Thundering_herd_problem) and overload the DB.
|
24
|
+
|
25
|
+
Check out https://github.com/Shopify/identity_cache/pull/373 for more context and for the proposed solution for this problem.
|
data/CHANGELOG.md
CHANGED
@@ -1,10 +1,30 @@
|
|
1
|
-
#
|
1
|
+
# Identity Cache Changelog
|
2
2
|
|
3
|
-
|
3
|
+
## 1.1.0
|
4
|
+
|
5
|
+
### Fixes
|
6
|
+
- Fix double debug logging of cache hits and misses (#474)
|
7
|
+
- Fix a Rails 6.1 deprecation warning for Rails 7.0 compatibility (#482)
|
8
|
+
- Recursively install parent expiry hooks when expiring parent caches (#476)
|
9
|
+
- Expire caches before other `after_commit` callbacks (#471)
|
10
|
+
- Avoid unnecessary record cache expiry on save with no DB update (#464)
|
11
|
+
- Fix an Active Record deprecation warning by not using `Connection#type_cast` (#459)
|
12
|
+
- Fix broken `prefetch_associations` of a polymorphic `cache_belongs_to` (#461)
|
13
|
+
- Fix `should_use_cache?` check to avoid calling it on the wrong class (#454)
|
14
|
+
- Fix fetch `has_many` embedded association on record after adding to it (#449)
|
15
|
+
|
16
|
+
### Features
|
17
|
+
- Support multiple databases and transactional tests in `IdentityCache.should_use_cache?` (#293)
|
18
|
+
- Add support for the default `MemCacheStore` from `ActiveSupport` (#465)
|
19
|
+
|
20
|
+
### Breaking Changes
|
21
|
+
- Drop ruby 2.4 support, since it is no longer supported upstream (#468)
|
22
|
+
|
23
|
+
## 1.0.1
|
4
24
|
|
5
25
|
- Fix expiry of cache_has_one association with scope and `embed: :id` (#442)
|
6
26
|
|
7
|
-
|
27
|
+
## 1.0.0
|
8
28
|
|
9
29
|
- Remove inverse_name option. Specify inverse_of on the Active Record association instead. (#439)
|
10
30
|
- Bump the minimum Active Record version to 5.2 (#438)
|
@@ -25,16 +45,16 @@
|
|
25
45
|
- Remove deprecated `never_set_inverse_association` option (#319)
|
26
46
|
- Lazy load associated classes (#306)
|
27
47
|
|
28
|
-
|
48
|
+
## 0.5.1
|
29
49
|
|
30
50
|
- Fix bug in prefetch_associations for cache_has_one associations that may be nil
|
31
51
|
|
32
|
-
|
52
|
+
## 0.5.0
|
33
53
|
|
34
54
|
- `never_set_inverse_association` and `fetch_read_only_records` are now `true` by default (#315)
|
35
55
|
- Store the class name instead of the class itself (#311)
|
36
56
|
|
37
|
-
|
57
|
+
## 0.4.1
|
38
58
|
|
39
59
|
- Deprecated embedded associations on models that don't use IDC (#305)
|
40
60
|
- Remove a respond_to? check that hides mistakes in includes hash (#307)
|
@@ -45,11 +65,11 @@
|
|
45
65
|
- Clone instead of dup record when readonlyifying fetched records (#292)
|
46
66
|
- Consistently store the array for cached has many associations (#288)
|
47
67
|
|
48
|
-
|
68
|
+
## 0.4.0
|
49
69
|
|
50
70
|
- Return an array from fetched association to prevent chaining. Up to now, a relation was returned by default. (#287)
|
51
71
|
|
52
|
-
|
72
|
+
## 0.3.2
|
53
73
|
|
54
74
|
- Deprecate returning non read-only records when cache is used. Set IdentityCache.fetch_readonly_records to true to avoid this. (#282)
|
55
75
|
- Use loaded association first when fetching a cache_has_many id embedded association (#280)
|
@@ -57,11 +77,11 @@
|
|
57
77
|
- Fetch association returns relation or array depending on the configuration. It was only returning a relation for cache_has_many fetch association methods. (#276)
|
58
78
|
- Stop sharing the same attributes hash between the fetched record and the memoized cache, which could interfere with dirty tracking (#267)
|
59
79
|
|
60
|
-
|
80
|
+
## 0.3.1
|
61
81
|
|
62
82
|
- Fix cache_index for non-id primary key
|
63
83
|
|
64
|
-
|
84
|
+
## 0.3.0
|
65
85
|
|
66
86
|
- Add support for includes option on cache_index and fetch_by_id
|
67
87
|
- Use ActiveRecord instantiate
|
@@ -74,37 +94,37 @@
|
|
74
94
|
- Fix cache_belongs_to on polymorphic assocations.
|
75
95
|
- Fetching a cache_belongs_to association no longer loads the belongs_to association
|
76
96
|
|
77
|
-
|
97
|
+
## 0.2.5
|
78
98
|
|
79
99
|
- Fixed support for namespaced model classes
|
80
100
|
- Added some deduplication for parent cache expiry
|
81
101
|
- Fixed some deprecation warnings in rails 4.2
|
82
102
|
|
83
|
-
|
103
|
+
## 0.2.4
|
84
104
|
|
85
105
|
- Refactoring, documentation and test changes
|
86
106
|
|
87
|
-
|
107
|
+
## 0.2.3
|
88
108
|
|
89
109
|
- PostgreSQL support
|
90
110
|
- Rails 4.2 compatibility
|
91
111
|
- Fix: Don't connect to database when calling `IdentityCache.should_use_cache?`
|
92
112
|
- Fix: Fix invalid parent cache invalidation if object is embedded in different parents
|
93
113
|
|
94
|
-
|
114
|
+
## 0.2.2
|
95
115
|
|
96
116
|
- Change: memcached is no longer a runtime dependency
|
97
117
|
- Use cache for read-only models.
|
98
118
|
|
99
|
-
|
119
|
+
## 0.2.1
|
100
120
|
|
101
121
|
- Add a fallback backend using local memory.
|
102
122
|
|
103
|
-
|
123
|
+
## 0.2.0
|
104
124
|
|
105
125
|
- Memcache CAS support
|
106
126
|
|
107
|
-
|
127
|
+
## 0.1.0
|
108
128
|
|
109
129
|
- Backwards incompatible change: Stop expiring cache on after_touch callback.
|
110
130
|
- Change: fetch_multi accepts an array of keys as argument
|
@@ -115,29 +135,29 @@
|
|
115
135
|
- Fix: Avoid unused preload on fetch_multi with :includes option for cache miss
|
116
136
|
- Fix: reload will invalidate the local instance cache
|
117
137
|
|
118
|
-
|
138
|
+
## 0.0.7
|
119
139
|
|
120
140
|
- Add support for non-integer primary keys
|
121
141
|
- Fix: Not implemented error for cache_has_one with embed: false
|
122
142
|
- Fix: cache key to change when adding a cache_has_many association with :embed => false
|
123
143
|
- Fix: Compatibility rails 4.1 for `quote_value`, which needs default column.
|
124
144
|
|
125
|
-
|
145
|
+
## 0.0.6
|
126
146
|
|
127
147
|
- Fix: bug where previously nil-cached attribute caches weren't expired on record creation
|
128
148
|
- Fix: cache key to not change when adding a non-embedded association.
|
129
149
|
- Perf: Rails 4 Only create `CollectionProxy` when using it
|
130
150
|
|
131
|
-
|
151
|
+
## 0.0.5
|
132
152
|
|
133
153
|
|
134
|
-
|
154
|
+
## 0.0.4
|
135
155
|
|
136
156
|
- Fix: only marshal attributes, embedded associations and normalized association IDs
|
137
157
|
- Add cache version number to cache keys
|
138
158
|
- Add test case to ensure version number is updated when the marshalled format changes
|
139
159
|
|
140
|
-
|
160
|
+
## 0.0.3
|
141
161
|
|
142
162
|
- Fix: memoization for multi hits actually work
|
143
163
|
- Fix: quotes `SELECT` projection elements on cache misses
|
@@ -145,7 +165,7 @@
|
|
145
165
|
- Fix: table names are not hardcoded anymore
|
146
166
|
- Logger now differentiates memoized vs non memoized hits
|
147
167
|
|
148
|
-
|
168
|
+
## 0.0.2
|
149
169
|
|
150
170
|
- Fix: Existent embedded entries will no longer raise when `ActiveModel::MissingAttributeError` when accessing a newly created attribute.
|
151
171
|
- Fix: Do not marshal raw ActiveRecord associations
|
data/Gemfile
CHANGED
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -12,7 +12,10 @@ Add this line to your application's Gemfile:
|
|
12
12
|
```ruby
|
13
13
|
gem 'identity_cache'
|
14
14
|
gem 'cityhash' # optional, for faster hashing (C-Ruby only)
|
15
|
-
|
15
|
+
|
16
|
+
gem 'dalli' # To use :mem_cache_store
|
17
|
+
# alternatively
|
18
|
+
gem 'memcached_store' # to use the old libmemcached based client
|
16
19
|
```
|
17
20
|
|
18
21
|
And then execute:
|
@@ -22,13 +25,30 @@ And then execute:
|
|
22
25
|
|
23
26
|
Add the following to all your environment/*.rb files (production/development/test):
|
24
27
|
|
28
|
+
### If you use Dalli (recommended)
|
29
|
+
|
30
|
+
```ruby
|
31
|
+
config.identity_cache_store = :mem_cache_store, "mem1.server.com", "mem2.server.com", {
|
32
|
+
expires_in: 6.hours.to_i, # in case of network errors when sending a delete
|
33
|
+
failover: false, # avoids more cache consistency issues
|
34
|
+
}
|
35
|
+
```
|
36
|
+
|
37
|
+
Add an initializer with this code:
|
38
|
+
|
39
|
+
```ruby
|
40
|
+
IdentityCache.cache_backend = ActiveSupport::Cache.lookup_store(*Rails.configuration.identity_cache_store)
|
41
|
+
```
|
42
|
+
|
43
|
+
|
44
|
+
### If you use Memcached (old client)
|
45
|
+
|
25
46
|
```ruby
|
26
47
|
config.identity_cache_store = :memcached_store,
|
27
|
-
Memcached
|
48
|
+
Memcached.new(["mem1.server.com"],
|
28
49
|
support_cas: true,
|
29
50
|
auto_eject_hosts: false, # avoids more cache consistency issues
|
30
|
-
|
31
|
-
)
|
51
|
+
), { expires_in: 6.hours.to_i } # in case of network errors when sending a delete
|
32
52
|
```
|
33
53
|
|
34
54
|
Add an initializer with this code:
|
@@ -238,7 +258,7 @@ A word of warning. Some versions of rails will silently rescue all exceptions in
|
|
238
258
|
|
239
259
|
Since everything is being marshalled and unmarshalled from Memcached changing Ruby or Rails versions could mean your objects cannot be unmarshalled from Memcached. There are a number of ways to get around this such as namespacing keys when you upgrade or rescuing marshal load errors and treating it as a cache miss. Just something to be aware of if you are using IdentityCache and upgrade Ruby or Rails.
|
240
260
|
|
241
|
-
IdentityCache is also very much _opt-in_ by deliberate design. This means IdentityCache does not mess with the way normal Rails associations work, and including it in a model won't change any clients of that model until you switch them to use `fetch` instead of `find`. This is because there is no way IdentityCache is ever going to be 100% consistent. Processes die,
|
261
|
+
IdentityCache is also very much _opt-in_ by deliberate design. This means IdentityCache does not mess with the way normal Rails associations work, and including it in a model won't change any clients of that model until you switch them to use `fetch` instead of `find`. This is because there is no way IdentityCache is ever going to be 100% consistent. Processes die, exceptions happen, and network blips occur, which means there is a chance that some database transaction might commit but the corresponding memcached DEL operation does not make it. This means that you need to think carefully about when you use `fetch` and when you use `find`. For example, at Shopify, we never use any `fetch`ers on the path which moves money around, because IdentityCache could simply be wrong, and we want to charge people the right amount of money. We do however use the fetchers on performance critical paths where absolute correctness isn't the most important thing, and this is what IdentityCache is intended for.
|
242
262
|
|
243
263
|
## Notes
|
244
264
|
|
data/dev.yml
CHANGED
data/gemfiles/Gemfile.rails-edge
CHANGED
data/identity_cache.gemspec
CHANGED
@@ -31,13 +31,16 @@ Gem::Specification.new do |gem|
|
|
31
31
|
gem.require_paths = ["lib"]
|
32
32
|
gem.version = IdentityCache::VERSION
|
33
33
|
|
34
|
-
gem.required_ruby_version = '>= 2.
|
34
|
+
gem.required_ruby_version = '>= 2.5.0'
|
35
35
|
|
36
|
-
gem.
|
36
|
+
gem.metadata['allowed_push_host'] = 'https://rubygems.org'
|
37
|
+
|
38
|
+
gem.add_dependency('ar_transaction_changes', '~> 1.1')
|
37
39
|
gem.add_dependency('activerecord', '>= 5.2')
|
38
40
|
|
39
41
|
gem.add_development_dependency('memcached', '~> 1.8.0')
|
40
42
|
gem.add_development_dependency('memcached_store', '~> 1.0.0')
|
43
|
+
gem.add_development_dependency('dalli')
|
41
44
|
gem.add_development_dependency('rake')
|
42
45
|
gem.add_development_dependency('mocha', '0.14.0')
|
43
46
|
gem.add_development_dependency('spy')
|
@@ -48,7 +51,7 @@ Gem::Specification.new do |gem|
|
|
48
51
|
else
|
49
52
|
gem.add_development_dependency('cityhash', '0.6.0')
|
50
53
|
gem.add_development_dependency('mysql2')
|
51
|
-
gem.add_development_dependency('pg'
|
54
|
+
gem.add_development_dependency('pg')
|
52
55
|
gem.add_development_dependency('stackprof')
|
53
56
|
end
|
54
57
|
end
|