solid_cache_mongoid 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/MIT-LICENSE +20 -0
- data/README.md +189 -0
- data/Rakefile +48 -0
- data/app/jobs/solid_cache_mongoid/expiry_job.rb +9 -0
- data/app/models/solid_cache_mongoid/entry/expiration.rb +55 -0
- data/app/models/solid_cache_mongoid/entry/size/estimate.rb +133 -0
- data/app/models/solid_cache_mongoid/entry/size/moving_average_estimate.rb +62 -0
- data/app/models/solid_cache_mongoid/entry/size.rb +21 -0
- data/app/models/solid_cache_mongoid/entry.rb +145 -0
- data/app/models/solid_cache_mongoid/record.rb +77 -0
- data/lib/active_support/cache/solid_cache_mongoid_store.rb +9 -0
- data/lib/generators/solid_cache/install/USAGE +9 -0
- data/lib/generators/solid_cache/install/install_generator.rb +20 -0
- data/lib/generators/solid_cache/install/templates/config/cache.yml.tt +20 -0
- data/lib/generators/solid_cache/install/templates/db/cache_schema.rb +12 -0
- data/lib/generators/solid_cache/install/templates/db/cache_structure.mysql.sql +56 -0
- data/lib/generators/solid_cache/install/templates/db/cache_structure.postgresql.sql +128 -0
- data/lib/generators/solid_cache/install/templates/db/cache_structure.sqlite3.sql +6 -0
- data/lib/solid_cache_mongoid/configuration.rb +31 -0
- data/lib/solid_cache_mongoid/connections/unmanaged.rb +33 -0
- data/lib/solid_cache_mongoid/connections.rb +9 -0
- data/lib/solid_cache_mongoid/engine.rb +38 -0
- data/lib/solid_cache_mongoid/store/api.rb +178 -0
- data/lib/solid_cache_mongoid/store/connections.rb +88 -0
- data/lib/solid_cache_mongoid/store/entries.rb +79 -0
- data/lib/solid_cache_mongoid/store/execution.rb +52 -0
- data/lib/solid_cache_mongoid/store/expiry.rb +49 -0
- data/lib/solid_cache_mongoid/store/failsafe.rb +39 -0
- data/lib/solid_cache_mongoid/store/stats.rb +30 -0
- data/lib/solid_cache_mongoid/store.rb +20 -0
- data/lib/solid_cache_mongoid/version.rb +5 -0
- data/lib/solid_cache_mongoid.rb +16 -0
- data/lib/tasks/solid_cache_tasks.rake +25 -0
- metadata +201 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: cf9d5857f9532ff873ca0667c47e572d61ccaee958a9768d0bf372832ed45067
|
|
4
|
+
data.tar.gz: 2e6a812dd2b7930ddf81f7f1cddbf08cef0cf039365f974412b7460642964ee4
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 1355e87bd9234e18d12a7a33fb70d9145be98088fa1117f983110a998fe5ec5f2857e8baf1d826a601112c224839e4e37ff0f0d738f04709ecdb550173032da2
|
|
7
|
+
data.tar.gz: 86dc49c94645cf8c6ecdc5c5d69ead576bf15ed66d3ecf0629952f7c49db7beafa3c181e993e3b4050a02ba0cdc50cb03931a79518315b71d03b3574c82e30a6
|
data/MIT-LICENSE
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Copyright (c) 37signals, LLC
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
4
|
+
a copy of this software and associated documentation files (the
|
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
|
9
|
+
the following conditions:
|
|
10
|
+
|
|
11
|
+
The above copyright notice and this permission notice shall be
|
|
12
|
+
included in all copies or substantial portions of the Software.
|
|
13
|
+
|
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
# Solid Cache
|
|
2
|
+
|
|
3
|
+
Solid Cache is a database-backed Active Support cache store that lets you keep a much larger cache than is typically possible with traditional memory-only Redis or Memcached stores. This is thanks to the speed of modern SSD drives, which make the access-time penalty of using disk vs RAM insignificant for most caching purposes. Simply put, you're now usually better off keeping a huge cache on disk rather than a small cache in memory.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
Solid Cache is configured by default in new Rails 8 applications. But if you're running an earlier version, you can add it manually following these steps:
|
|
8
|
+
|
|
9
|
+
1. `bundle add solid_cache_mongoid`
|
|
10
|
+
2. `bin/rails solid_cache_mongoid:install`
|
|
11
|
+
|
|
12
|
+
This will configure Solid Cache as the production cache store and create `config/cache.yml`.
|
|
13
|
+
|
|
14
|
+
## Configuration
|
|
15
|
+
|
|
16
|
+
Configuration will be read from `config/cache.yml` or `config/solid_cache.yml`. You can change the location of the config file by setting the `SOLID_CACHE_CONFIG` env variable.
|
|
17
|
+
|
|
18
|
+
The format of the file is:
|
|
19
|
+
|
|
20
|
+
```yml
|
|
21
|
+
default: &default
|
|
22
|
+
# database: <%= ENV.fetch("SOLID_CACHE_DATABASE", "solid_cache") %>
|
|
23
|
+
# collection: solid_cache_entries
|
|
24
|
+
# client: default
|
|
25
|
+
# encrypt: false
|
|
26
|
+
store_options:
|
|
27
|
+
# Cap age of oldest cache entry to fulfill retention policies
|
|
28
|
+
# max_age: <%%= 60.days.to_i %>
|
|
29
|
+
max_size: <%%= 256.megabytes %>
|
|
30
|
+
namespace: <%%= Rails.env %>
|
|
31
|
+
|
|
32
|
+
development:
|
|
33
|
+
<<: *default
|
|
34
|
+
|
|
35
|
+
test:
|
|
36
|
+
<<: *default
|
|
37
|
+
|
|
38
|
+
production:
|
|
39
|
+
database: <%= ENV.fetch("SOLID_CACHE_DATABASE", "solid_cache") %>
|
|
40
|
+
<<: *default
|
|
41
|
+
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
For the full list of keys for `store_options` see [Cache configuration](#cache-configuration). Any options passed to the cache lookup will overwrite those specified here.
|
|
45
|
+
|
|
46
|
+
After running `solid_cache:install`, `environments/production.rb` will replace your cache store with Solid Cache, but you can also do this manually:
|
|
47
|
+
|
|
48
|
+
```ruby
|
|
49
|
+
# config/environments/production.rb
|
|
50
|
+
config.cache_store = :solid_cache_mongoid_store
|
|
51
|
+
```
|
|
52
|
+
### Engine configuration
|
|
53
|
+
|
|
54
|
+
There are five options that can be set on the engine:
|
|
55
|
+
|
|
56
|
+
- `executor` - the [Rails executor](https://guides.rubyonrails.org/threading_and_code_execution.html#executor) used to wrap asynchronous operations, defaults to the app executor
|
|
57
|
+
- `size_estimate_samples` - if `max_size` is set on the cache, the number of the samples used to estimate the size.
|
|
58
|
+
- `encrypted` - whether cache values should be encrypted (see [Enabling encryption](#enabling-encryption))
|
|
59
|
+
- `encryption_context_properties` - custom encryption context properties
|
|
60
|
+
|
|
61
|
+
These can be set in your Rails configuration:
|
|
62
|
+
|
|
63
|
+
```ruby
|
|
64
|
+
Rails.application.configure do
|
|
65
|
+
config.solid_cache.size_estimate_samples = 1000
|
|
66
|
+
end
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Cache configuration
|
|
70
|
+
|
|
71
|
+
Solid Cache supports these options in addition to the standard `ActiveSupport::Cache::Store` options:
|
|
72
|
+
|
|
73
|
+
- `error_handler` - a Proc to call to handle any transient database errors that are raised (default: log errors as warnings)
|
|
74
|
+
- `expiry_batch_size` - the batch size to use when deleting old records (default: `100`)
|
|
75
|
+
- `expiry_method` - what expiry method to use `thread` or `job` (default: `thread`)
|
|
76
|
+
- `expiry_queue` - which queue to add expiry jobs to (default: `default`)
|
|
77
|
+
- `max_age` - the maximum age of entries in the cache (default: `2.weeks.to_i`). Can be set to `nil`, but this is not recommended unless using `max_entries` to limit the size of the cache.
|
|
78
|
+
- `max_entries` - the maximum number of entries allowed in the cache (default: `nil`, meaning no limit)
|
|
79
|
+
- `max_size` - the maximum size of the cache entries (default `nil`, meaning no limit)
|
|
80
|
+
- `cluster` - (deprecated) a Hash of options for the cache database cluster, e.g `{ shards: [:database1, :database2, :database3] }`
|
|
81
|
+
- `clusters` - (deprecated) an Array of Hashes for multiple cache clusters (ignored if `:cluster` is set)
|
|
82
|
+
- `shards` - an Array of databases
|
|
83
|
+
- `active_record_instrumentation` - whether to instrument the cache's queries (default: `true`)
|
|
84
|
+
- `clear_with` - clear the cache with `:truncate` or `:delete` (default `truncate`, except for when `Rails.env.test?` then `delete`)
|
|
85
|
+
- `max_key_bytesize` - the maximum size of a normalized key in bytes (default `1024`)
|
|
86
|
+
|
|
87
|
+
## Cache expiry
|
|
88
|
+
|
|
89
|
+
Solid Cache tracks writes to the cache. For every write it increments a counter by 1. Once the counter reaches 50% of the `expiry_batch_size` it adds a task to run on a background thread. That task will:
|
|
90
|
+
|
|
91
|
+
1. Check if we have exceeded the `max_entries` or `max_size` values (if set).
|
|
92
|
+
The current entries are estimated by subtracting the max and min IDs from the `SolidCache::Entry` table.
|
|
93
|
+
The current size is estimated by sampling the entry `byte_size` columns.
|
|
94
|
+
2. If we have, it will delete `expiry_batch_size` entries.
|
|
95
|
+
3. If not, it will delete up to `expiry_batch_size` entries, provided they are all older than `max_age`.
|
|
96
|
+
|
|
97
|
+
Expiring when we reach 50% of the batch size allows us to expire records from the cache faster than we write to it when we need to reduce the cache size.
|
|
98
|
+
|
|
99
|
+
Only triggering expiry when we write means that if the cache is idle, the background thread is also idle.
|
|
100
|
+
|
|
101
|
+
If you want the cache expiry to be run in a background job instead of a thread, you can set `expiry_method` to `:job`. This will enqueue a `SolidCache::ExpiryJob`.
|
|
102
|
+
|
|
103
|
+
## Enabling encryption
|
|
104
|
+
|
|
105
|
+
To encrypt the cache values, you can add the encrypt property.
|
|
106
|
+
|
|
107
|
+
```yaml
|
|
108
|
+
# config/cache.yml
|
|
109
|
+
production:
|
|
110
|
+
encrypt: true
|
|
111
|
+
```
|
|
112
|
+
or
|
|
113
|
+
```ruby
|
|
114
|
+
# application.rb
|
|
115
|
+
config.solid_cache.encrypt = true
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
You will need to set up your application to [use Active Record Encryption](https://www.mongodb.com/docs/mongoid/current/security/encryption).
|
|
119
|
+
|
|
120
|
+
Solid Cache by default uses a custom encryptor and message serializer that are optimised for it.
|
|
121
|
+
You can choose your own context properties instead if you prefer:
|
|
122
|
+
|
|
123
|
+
```ruby
|
|
124
|
+
# application.rb
|
|
125
|
+
config.solid_cache.encryption_context_properties = {
|
|
126
|
+
deterministic: false
|
|
127
|
+
}
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## Index size limits
|
|
131
|
+
The Solid Cache migrations try to create an index with 1024 byte entries. If that is too big for your database, you should:
|
|
132
|
+
|
|
133
|
+
1. Edit the index size in the migration.
|
|
134
|
+
2. Set `max_key_bytesize` on your cache to the new value.
|
|
135
|
+
|
|
136
|
+
## Development
|
|
137
|
+
|
|
138
|
+
Run the tests with `bin/rake test`. By default, these will run against SQLite.
|
|
139
|
+
|
|
140
|
+
You can also run the tests against MySQL and PostgreSQL. First start up the databases:
|
|
141
|
+
|
|
142
|
+
```shell
|
|
143
|
+
$ docker compose up -d
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
Next, setup the database:
|
|
147
|
+
|
|
148
|
+
```shell
|
|
149
|
+
$ bin/rails db:setup
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
Then run the tests:
|
|
154
|
+
|
|
155
|
+
```shell
|
|
156
|
+
$ bin/rake test
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### Testing with multiple Rails versions
|
|
160
|
+
|
|
161
|
+
Solid Cache relies on [appraisal](https://github.com/thoughtbot/appraisal/tree/main) to test
|
|
162
|
+
multiple Rails versions.
|
|
163
|
+
|
|
164
|
+
To run a test for a specific version run:
|
|
165
|
+
|
|
166
|
+
```shell
|
|
167
|
+
bundle exec appraisal rails-7-1 bin/rake test
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
After updating the dependencies in the `Gemfile` please run:
|
|
171
|
+
|
|
172
|
+
```shell
|
|
173
|
+
$ bundle
|
|
174
|
+
$ appraisal update
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
This ensures that all the Rails versions dependencies are updated.
|
|
178
|
+
|
|
179
|
+
## Implementation
|
|
180
|
+
|
|
181
|
+
Solid Cache is a FIFO (first in, first out) cache. While this is not as efficient as an LRU (least recently used) cache, it is mitigated by the longer cache lifespan.
|
|
182
|
+
|
|
183
|
+
A FIFO cache is much easier to manage:
|
|
184
|
+
1. We don't need to track when items are read.
|
|
185
|
+
2. We can estimate and control the cache size by comparing the maximum and minimum IDs.
|
|
186
|
+
3. By deleting from one end of the table and adding at the other end we can avoid fragmentation.
|
|
187
|
+
|
|
188
|
+
## License
|
|
189
|
+
Solid Cache is licensed under MIT.
|
data/Rakefile
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
|
|
5
|
+
APP_RAKEFILE = File.expand_path("test/dummy/Rakefile", __dir__)
|
|
6
|
+
load "rails/tasks/engine.rake"
|
|
7
|
+
|
|
8
|
+
load "rails/tasks/statistics.rake"
|
|
9
|
+
|
|
10
|
+
require "bundler/gem_tasks"
|
|
11
|
+
require "rake/testtask"
|
|
12
|
+
|
|
13
|
+
def run_without_aborting(*tasks)
|
|
14
|
+
errors = []
|
|
15
|
+
|
|
16
|
+
tasks.each do |task|
|
|
17
|
+
Rake::Task[task].invoke
|
|
18
|
+
rescue Exception => e
|
|
19
|
+
puts e.message
|
|
20
|
+
puts e.backtrace
|
|
21
|
+
errors << task
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
abort "Errors running #{errors.join(', ')}" if errors.any?
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def configs
|
|
28
|
+
[ :default, :database, :no_database, :client, :collection, :encrypted, :encrypted_custom ]
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
task :test do
|
|
32
|
+
tasks = configs.map { |config| "test:#{config}" }
|
|
33
|
+
run_without_aborting(*tasks)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
configs.each do |config|
|
|
37
|
+
namespace :test do
|
|
38
|
+
task config do
|
|
39
|
+
if config == :default
|
|
40
|
+
sh("bin/rails test")
|
|
41
|
+
else
|
|
42
|
+
sh("SOLID_CACHE_CONFIG=config/cache_#{config}.yml bin/rails test")
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
task default: [:test]
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SolidCacheMongoid
|
|
4
|
+
class ExpiryJob < ActiveJob::Base
|
|
5
|
+
def perform(count, max_age: nil, max_entries: nil, max_size: nil)
|
|
6
|
+
Entry.expire(count, max_age: max_age, max_entries: max_entries, max_size: max_size)
|
|
7
|
+
end
|
|
8
|
+
end
|
|
9
|
+
end
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SolidCacheMongoid
|
|
4
|
+
class Entry
|
|
5
|
+
module Expiration
|
|
6
|
+
extend ActiveSupport::Concern
|
|
7
|
+
|
|
8
|
+
class_methods do
|
|
9
|
+
def expire(count, max_age:, max_entries:, max_size:)
|
|
10
|
+
if (ids = expiry_candidate_ids(count, max_age: max_age, max_entries: max_entries, max_size: max_size)).any?
|
|
11
|
+
where(:id.in => ids).delete_all
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
private
|
|
16
|
+
def cache_full?(max_entries:, max_size:)
|
|
17
|
+
if max_entries && max_entries < id_range
|
|
18
|
+
true
|
|
19
|
+
elsif max_size && max_size < estimated_size
|
|
20
|
+
true
|
|
21
|
+
else
|
|
22
|
+
false
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def expiry_candidate_ids(count, max_age:, max_entries:, max_size:)
|
|
27
|
+
cache_full = cache_full?(max_entries: max_entries, max_size: max_size)
|
|
28
|
+
return [] unless cache_full || max_age
|
|
29
|
+
|
|
30
|
+
# In the case of multiple concurrent expiry operations, it is desirable to
|
|
31
|
+
# reduce the overlap of entries being addressed by each. For that reason,
|
|
32
|
+
# retrieve more ids than are being expired, and use random
|
|
33
|
+
# sampling to reduce that number to the actual intended count.
|
|
34
|
+
retrieve_count = count * 3
|
|
35
|
+
|
|
36
|
+
uncached do
|
|
37
|
+
candidates = order_by([:id, :asc]).limit(retrieve_count)
|
|
38
|
+
|
|
39
|
+
candidate_ids = if cache_full
|
|
40
|
+
candidates.pluck(:id)
|
|
41
|
+
else
|
|
42
|
+
min_created_at = max_age.seconds.ago
|
|
43
|
+
# We don't have an index on created_at, but we can select
|
|
44
|
+
# the records by id and they'll be in created_at order.
|
|
45
|
+
candidates.pluck(:id, :created_at)
|
|
46
|
+
.filter_map { |id, created_at| id if created_at < min_created_at }
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
candidate_ids.sample(count)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SolidCacheMongoid
|
|
4
|
+
class Entry
|
|
5
|
+
# # Cache size estimation
|
|
6
|
+
#
|
|
7
|
+
# We store the size of each cache row in the byte_size field. This allows us to estimate the size of the cache
|
|
8
|
+
# by sampling those rows.
|
|
9
|
+
#
|
|
10
|
+
# To reduce the effect of outliers though we'll grab the N largest rows, and add their size to a sampled based
|
|
11
|
+
# estimate of the size of the remaining rows.
|
|
12
|
+
#
|
|
13
|
+
# ## Outliers
|
|
14
|
+
#
|
|
15
|
+
# There is an index on the byte_size column, so we can efficiently grab the N largest rows. We also grab the
|
|
16
|
+
# minimum byte_size of those rows, which we'll use as a cutoff for the non outlier sampling.
|
|
17
|
+
#
|
|
18
|
+
# ## Sampling
|
|
19
|
+
#
|
|
20
|
+
# To efficiently sample the data we use the key_hash column, which is a random 64 bit integer. There's an index
|
|
21
|
+
# on key_hash and byte_size so we can grab a sum of the byte_sizes in a range of key_hash directly from that
|
|
22
|
+
# index.
|
|
23
|
+
#
|
|
24
|
+
# To decide how big the range should be, we use the difference between the smallest and largest database IDs as
|
|
25
|
+
# an estimate of the number of rows in the table. This should be a good estimate, because we delete rows in ID order
|
|
26
|
+
#
|
|
27
|
+
# We then calculate the fraction of the rows we want to sample by dividing the sample size by the estimated number
|
|
28
|
+
# of rows.
|
|
29
|
+
#
|
|
30
|
+
# Then we grab the byte_size sum of the rows in the range of key_hash values excluding any rows that are larger than
|
|
31
|
+
# our minimum outlier cutoff. We then divide this by the sampling fraction to get an estimate of the size of the
|
|
32
|
+
# non outlier rows
|
|
33
|
+
#
|
|
34
|
+
# ## Equations
|
|
35
|
+
#
|
|
36
|
+
# Given N samples and a key_hash range of Kmin..Kmax
|
|
37
|
+
#
|
|
38
|
+
# outliers_cutoff OC = min(byte_size of N largest rows)
|
|
39
|
+
# outliers_size OS = sum(byte_size of N largest rows)
|
|
40
|
+
#
|
|
41
|
+
# estimated number of rows R = max(ID) - min(ID) + 1
|
|
42
|
+
# sample_fraction F = N / R
|
|
43
|
+
# sample_range_size S = (Kmax - Kmin) * F
|
|
44
|
+
# sample range is K1..K2 where K1 = Kmin + rand(Kmax - S) and K2 = K1 + S
|
|
45
|
+
#
|
|
46
|
+
# non_outlier_sample_size NSS = sum(byte_size of rows in key_hash range K1..K2 where byte_size <= OC)
|
|
47
|
+
# non_outlier_estimated_size NES = NSS / F
|
|
48
|
+
# estimated_size ES = OS + NES
|
|
49
|
+
module Size
|
|
50
|
+
class Estimate
|
|
51
|
+
attr_reader :samples, :max_records
|
|
52
|
+
|
|
53
|
+
def initialize(samples:)
|
|
54
|
+
@samples = samples
|
|
55
|
+
@max_records ||= Entry.id_range
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def size
|
|
59
|
+
outliers_size + non_outlier_estimated_size
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def exact?
|
|
63
|
+
outliers_count < samples || sampled_fraction == 1
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
private
|
|
67
|
+
def outliers_size
|
|
68
|
+
outliers_size_count_and_cutoff[0]
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def outliers_count
|
|
72
|
+
outliers_size_count_and_cutoff[1]
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def outliers_cutoff
|
|
76
|
+
outliers_size_count_and_cutoff[2]
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def outliers_size_count_and_cutoff
|
|
80
|
+
@outlier_size_and_cutoff ||= Entry.uncached do
|
|
81
|
+
largest = Entry.largest_byte_sizes(samples)
|
|
82
|
+
|
|
83
|
+
# Usar agregación de MongoDB para calcular sum, count y min
|
|
84
|
+
if largest.exists?
|
|
85
|
+
sum = largest.sum(:byte_size)
|
|
86
|
+
count = largest.count
|
|
87
|
+
min = largest.min(:byte_size)
|
|
88
|
+
[sum, count, min]
|
|
89
|
+
else
|
|
90
|
+
[0, 0, nil]
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def non_outlier_estimated_size
|
|
96
|
+
@non_outlier_estimated_size ||= sampled_fraction.zero? ? 0 : (sampled_non_outlier_size / sampled_fraction).round
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def sampled_fraction
|
|
100
|
+
@sampled_fraction ||=
|
|
101
|
+
if max_records <= samples
|
|
102
|
+
0
|
|
103
|
+
else
|
|
104
|
+
[samples.to_f / (max_records - samples), 1].min
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def sampled_non_outlier_size
|
|
109
|
+
@sampled_non_outlier_size ||= Entry.uncached do
|
|
110
|
+
Entry.in_key_hash_range(sample_range).up_to_byte_size(outliers_cutoff).sum(:byte_size)
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def sample_range
|
|
115
|
+
if sampled_fraction == 1
|
|
116
|
+
key_hash_range
|
|
117
|
+
else
|
|
118
|
+
start = rand(key_hash_range.begin..(key_hash_range.end - sample_range_size))
|
|
119
|
+
start..(start + sample_range_size)
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def key_hash_range
|
|
124
|
+
Entry::KEY_HASH_ID_RANGE
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def sample_range_size
|
|
128
|
+
@sample_range_size ||= (key_hash_range.size * sampled_fraction).to_i
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
end
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SolidCacheMongoid
|
|
4
|
+
class Entry
|
|
5
|
+
module Size
|
|
6
|
+
# Moving average cache size estimation
|
|
7
|
+
#
|
|
8
|
+
# To reduce variability in the cache size estimate, we'll use a moving average of the previous 20 estimates.
|
|
9
|
+
# The estimates are stored directly in the cache, under the "__solid_cache_entry_size_moving_average_estimates" key.
|
|
10
|
+
#
|
|
11
|
+
# We'll remove the largest and smallest estimates, and then average remaining ones.
|
|
12
|
+
class MovingAverageEstimate
|
|
13
|
+
ESTIMATES_KEY = "__solid_cache_entry_size_moving_average_estimates"
|
|
14
|
+
MAX_RETAINED_ESTIMATES = 50
|
|
15
|
+
TARGET_SAMPLED_FRACTION = 0.0005
|
|
16
|
+
|
|
17
|
+
attr_reader :samples, :size
|
|
18
|
+
delegate :exact?, to: :estimate
|
|
19
|
+
|
|
20
|
+
def initialize(samples:)
|
|
21
|
+
@samples = samples
|
|
22
|
+
@estimate = Estimate.new(samples: samples)
|
|
23
|
+
values = latest_values
|
|
24
|
+
@size = (values.sum / values.size.to_f).round
|
|
25
|
+
write_values(values)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
private
|
|
29
|
+
attr_reader :estimate
|
|
30
|
+
|
|
31
|
+
def previous_values
|
|
32
|
+
value = Entry.read(ESTIMATES_KEY)
|
|
33
|
+
return [] unless value
|
|
34
|
+
|
|
35
|
+
# Convertir de BSON::Binary a string si es necesario
|
|
36
|
+
value_str = value.is_a?(BSON::Binary) ? value.data : value.to_s
|
|
37
|
+
value_str.presence&.split("|")&.map(&:to_i) || []
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def latest_value
|
|
41
|
+
estimate.size
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def latest_values
|
|
45
|
+
(previous_values + [latest_value]).last(retained_estimates)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def write_values(values)
|
|
49
|
+
Entry.write(ESTIMATES_KEY, values.join("|"))
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def retained_estimates
|
|
53
|
+
[retained_estimates_for_target_fraction, MAX_RETAINED_ESTIMATES].min
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def retained_estimates_for_target_fraction
|
|
57
|
+
(estimate.max_records / samples * TARGET_SAMPLED_FRACTION).floor + 1
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SolidCacheMongoid
|
|
4
|
+
class Entry
|
|
5
|
+
module Size
|
|
6
|
+
extend ActiveSupport::Concern
|
|
7
|
+
|
|
8
|
+
included do
|
|
9
|
+
scope :largest_byte_sizes, -> (limit) { order(byte_size: :desc).limit(limit).only(:byte_size) }
|
|
10
|
+
scope :in_key_hash_range, -> (range) { where(:key_hash.gte => range.begin, :key_hash.lte => range.end) }
|
|
11
|
+
scope :up_to_byte_size, -> (cutoff) { where(:byte_size.lte => cutoff) }
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
class_methods do
|
|
15
|
+
def estimated_size(samples: SolidCacheMongoid.configuration.size_estimate_samples)
|
|
16
|
+
MovingAverageEstimate.new(samples: samples).size
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SolidCacheMongoid
|
|
4
|
+
class Entry
|
|
5
|
+
include Record
|
|
6
|
+
include Expiration
|
|
7
|
+
include Size
|
|
8
|
+
include Mongoid::Locker
|
|
9
|
+
|
|
10
|
+
# The estimated cost of an extra row in bytes, including fixed size columns, overhead, indexes and free space
|
|
11
|
+
# Based on experimentation on SQLite, MySQL and Postgresql.
|
|
12
|
+
# A bit high for SQLite (more like 90 bytes), but about right for MySQL/Postgresql.
|
|
13
|
+
ESTIMATED_ROW_OVERHEAD = 140
|
|
14
|
+
|
|
15
|
+
# Assuming MessagePack serialization
|
|
16
|
+
ESTIMATED_ENCRYPTION_OVERHEAD = 170
|
|
17
|
+
|
|
18
|
+
KEY_HASH_ID_RANGE = -(2**63)..(2**63 - 1)
|
|
19
|
+
|
|
20
|
+
MULTI_BATCH_SIZE = 1000
|
|
21
|
+
|
|
22
|
+
class << self
|
|
23
|
+
def write(key, value)
|
|
24
|
+
write_multi([ { key: key, value: value } ])
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def write_multi(payloads)
|
|
28
|
+
without_query_cache do
|
|
29
|
+
payloads.each_slice(MULTI_BATCH_SIZE).each do |payload_batch|
|
|
30
|
+
add_key_hash_and_byte_size(payload_batch).each do |payload|
|
|
31
|
+
# Convertir key y value a BSON::Binary
|
|
32
|
+
key = payload.delete(:key)
|
|
33
|
+
value = payload.delete(:value)
|
|
34
|
+
obj = where(key_hash: payload[:key_hash]).first_or_initialize
|
|
35
|
+
obj.assign_attributes(payload)
|
|
36
|
+
obj.key = BSON::Binary.new(key)
|
|
37
|
+
obj.value = BSON::Binary.new(value)
|
|
38
|
+
obj.save!
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def read(key)
|
|
45
|
+
read_multi([key])[key]
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def read_multi(keys)
|
|
49
|
+
without_query_cache do
|
|
50
|
+
{}.tap do |results|
|
|
51
|
+
keys.each_slice(MULTI_BATCH_SIZE).each do |keys_batch|
|
|
52
|
+
key_hashes = key_hashes_for(keys_batch)
|
|
53
|
+
|
|
54
|
+
where(:key_hash.in => key_hashes)
|
|
55
|
+
.only(:key, :value)
|
|
56
|
+
.each do |entry|
|
|
57
|
+
# Convertir BSON::Binary de vuelta a string
|
|
58
|
+
key_str = entry.key.data
|
|
59
|
+
value_str = entry.value.data
|
|
60
|
+
results[key_str] = value_str
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def delete_by_key(*keys)
|
|
68
|
+
without_query_cache do
|
|
69
|
+
where(:key_hash.in => key_hashes_for(keys)).delete_all
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def clear_truncate
|
|
74
|
+
delete_all
|
|
75
|
+
nil
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def clear_delete
|
|
79
|
+
without_query_cache do
|
|
80
|
+
delete_all
|
|
81
|
+
end
|
|
82
|
+
nil
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def lock_and_write(key, &block)
|
|
86
|
+
without_query_cache do
|
|
87
|
+
entry = where(key_hash: key_hash_for(key)).first
|
|
88
|
+
|
|
89
|
+
if entry
|
|
90
|
+
entry.with_lock do
|
|
91
|
+
entry_key = entry.key.data
|
|
92
|
+
entry_value = entry.value.data
|
|
93
|
+
|
|
94
|
+
current_value = entry_key == key ? entry_value : nil
|
|
95
|
+
new_value = block.call(current_value)
|
|
96
|
+
write(key, new_value) if new_value
|
|
97
|
+
new_value
|
|
98
|
+
end
|
|
99
|
+
else
|
|
100
|
+
new_value = block.call(nil)
|
|
101
|
+
write(key, new_value) if new_value
|
|
102
|
+
new_value
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def id_range
|
|
108
|
+
without_query_cache { count }
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
private
|
|
112
|
+
def add_key_hash_and_byte_size(payloads)
|
|
113
|
+
payloads.map do |payload|
|
|
114
|
+
payload.dup.tap do |payload|
|
|
115
|
+
payload[:key_hash] = key_hash_for(payload[:key])
|
|
116
|
+
payload[:byte_size] = byte_size_for(payload)
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def key_hash_for(key)
|
|
122
|
+
# Need to unpack this as a signed integer - Postgresql and SQLite don't support unsigned integers
|
|
123
|
+
Digest::SHA256.digest(key.to_s).unpack("q>").first
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def key_hashes_for(keys)
|
|
127
|
+
keys.map { |key| key_hash_for(key) }
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def byte_size_for(payload)
|
|
131
|
+
payload[:key].to_s.bytesize + payload[:value].to_s.bytesize + estimated_row_overhead
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def estimated_row_overhead
|
|
135
|
+
if SolidCacheMongoid.configuration.encrypt?
|
|
136
|
+
ESTIMATED_ROW_OVERHEAD + ESTIMATED_ENCRYPTION_OVERHEAD
|
|
137
|
+
else
|
|
138
|
+
ESTIMATED_ROW_OVERHEAD
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
ActiveSupport.run_load_hooks :solid_cache_entry, SolidCacheMongoid::Entry
|