resilient_reads 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +195 -17
- data/lib/resilient_reads/adapter_patch.rb +42 -12
- data/lib/resilient_reads/configuration.rb +8 -1
- data/lib/resilient_reads/lag_checker.rb +17 -12
- data/lib/resilient_reads/version.rb +1 -1
- data/lib/resilient_reads.rb +1 -0
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ec873d08dc27b40673379ea070233b0aa06ce97f75adc77b9b43b29265be38b1
|
|
4
|
+
data.tar.gz: b0fb02179cdf69e5859f2d3ec1e0910d122590a92d876665d9d398414ca56ab7
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0d370a34eae6f0a3ff3206547705fd832ecc59a48597661db41cf35f94369140e7d8e0cd68297b98e5db6918cdd3ccca61144d5583612f40a6910cf56f8b9f19
|
|
7
|
+
data.tar.gz: 2ca096b7bed1ce980eb914bca296c0d015e2766026f621881a545ef09efbbaa63cbd2cd3151b6409718af145e2065029a7c4223de31fe3cef2393062fc44836b
|
data/README.md
CHANGED
|
@@ -1,39 +1,217 @@
|
|
|
1
|
-
#
|
|
1
|
+
# Resilient Reads
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Distribute database reads across multiple replicas in Rails with **automatic load balancing**, **health checking**, and **graceful failover** to primary.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Drop-in replacement for [distribute_reads](https://github.com/ankane/distribute_reads) that adds:
|
|
6
|
+
|
|
7
|
+
- **Multiple replica support** with round-robin or random load balancing across any number of replicas
|
|
8
|
+
- **Graceful failover** — if a replica goes down, reads automatically fall back to primary. No boot crash.
|
|
9
|
+
- **Health monitoring** — background thread periodically re-checks unhealthy replicas and restores them
|
|
10
|
+
- **Per-query logging** — see exactly which connection (primary / replica name) handled each query
|
|
11
|
+
- **No proxy adapter needed** — works with the standard `postgresql`, `mysql2`, or `trilogy` adapters
|
|
12
|
+
- **Rails 7.1+ compatible** — works with Rails 7.1, 7.2, and 8.0+
|
|
13
|
+
- **Query pattern caching** — caches SQL read/write classification results in an LRU cache to avoid repeated regex matching
|
|
14
|
+
- **Lag check caching** — replication lag results are cached per-replica with a configurable TTL (default 5s) to avoid querying lag on every read
|
|
15
|
+
- **Backward compatible** — `distribute_reads { }` and `DistributeReads.by_default = true` still work
|
|
6
16
|
|
|
7
17
|
## Installation
|
|
8
18
|
|
|
9
|
-
|
|
19
|
+
Add to your Gemfile:
|
|
20
|
+
|
|
21
|
+
```ruby
|
|
22
|
+
gem "resilient_reads", path: "gems/resilient_reads"
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Remove any previous read-distribution gems:
|
|
10
26
|
|
|
11
|
-
|
|
27
|
+
```ruby
|
|
28
|
+
# Remove these:
|
|
29
|
+
# gem "distribute_reads"
|
|
30
|
+
# gem "active_record_proxy_adapters"
|
|
31
|
+
```
|
|
12
32
|
|
|
13
|
-
|
|
14
|
-
|
|
33
|
+
## Configuration
|
|
34
|
+
|
|
35
|
+
### database.yml
|
|
36
|
+
|
|
37
|
+
Use the **standard adapter** (`postgresql`, `mysql2`, or `trilogy`) for all connections. Mark replicas with `replica: true`:
|
|
38
|
+
|
|
39
|
+
```yaml
|
|
40
|
+
default: &default
|
|
41
|
+
adapter: postgresql
|
|
42
|
+
pool: 5
|
|
43
|
+
|
|
44
|
+
production:
|
|
45
|
+
primary:
|
|
46
|
+
<<: *default
|
|
47
|
+
host: primary-db.example.com
|
|
48
|
+
database: myapp_production
|
|
49
|
+
replica:
|
|
50
|
+
<<: *default
|
|
51
|
+
host: replica1.example.com
|
|
52
|
+
database: myapp_production
|
|
53
|
+
replica: true
|
|
54
|
+
replica2:
|
|
55
|
+
<<: *default
|
|
56
|
+
host: replica2.example.com
|
|
57
|
+
database: myapp_production
|
|
58
|
+
replica: true
|
|
59
|
+
replica3:
|
|
60
|
+
<<: *default
|
|
61
|
+
host: replica3.example.com
|
|
62
|
+
database: myapp_production
|
|
63
|
+
replica: true
|
|
15
64
|
```
|
|
16
65
|
|
|
17
|
-
|
|
66
|
+
You can add as many replicas as you want — they are auto-detected by matching config names against `/replica\d*/` with `replica: true`. Or list them explicitly:
|
|
67
|
+
|
|
68
|
+
```ruby
|
|
69
|
+
config.replicas = [:replica, :replica2, :replica3]
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Initializer
|
|
73
|
+
|
|
74
|
+
```ruby
|
|
75
|
+
# config/initializers/resilient_reads.rb
|
|
76
|
+
ResilientReads.configure do |config|
|
|
77
|
+
config.by_default = true # Route all reads to replicas
|
|
78
|
+
config.eager_load = true # Auto-load lazy relations in blocks
|
|
79
|
+
config.balancing_strategy = :round_robin # :round_robin or :random
|
|
80
|
+
config.health_check_interval = 30 # Seconds between health checks
|
|
81
|
+
config.max_lag = nil # Max replication lag (seconds), nil to skip
|
|
82
|
+
config.lag_failover = true # Use primary when lag exceeds max
|
|
83
|
+
config.failover = true # Fall back to primary when replicas are down
|
|
84
|
+
config.primary_delay = 2 # Seconds to use primary after a write
|
|
85
|
+
config.log_query_routing = true # Log which connection handled each query
|
|
86
|
+
config.lag_check_interval = 5 # Seconds to cache lag check per replica
|
|
87
|
+
config.query_cache_enabled = true # Cache SQL pattern matching results
|
|
88
|
+
config.query_cache_max_size = 10_000 # Max entries in the query cache
|
|
89
|
+
config.sticky_writes = true # After a write, reads stay on primary for the block
|
|
90
|
+
end
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### Model
|
|
94
|
+
|
|
95
|
+
Keep your existing `connects_to` — the gem works alongside it:
|
|
18
96
|
|
|
19
|
-
```
|
|
20
|
-
|
|
97
|
+
```ruby
|
|
98
|
+
class ApplicationRecord < ActiveRecord::Base
|
|
99
|
+
primary_abstract_class
|
|
100
|
+
connects_to database: { writing: :primary, reading: :replica }
|
|
101
|
+
end
|
|
21
102
|
```
|
|
22
103
|
|
|
23
104
|
## Usage
|
|
24
105
|
|
|
25
|
-
|
|
106
|
+
### Explicit blocks
|
|
107
|
+
|
|
108
|
+
```ruby
|
|
109
|
+
distribute_reads { User.count } # Reads from a healthy replica
|
|
110
|
+
|
|
111
|
+
distribute_reads do
|
|
112
|
+
User.find_each do |user|
|
|
113
|
+
user.orders_count = user.orders.count # replica (SELECT)
|
|
114
|
+
user.save! # primary (INSERT/UPDATE)
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### Options
|
|
120
|
+
|
|
121
|
+
```ruby
|
|
122
|
+
distribute_reads(primary: true) { ... } # Force primary
|
|
123
|
+
distribute_reads(max_lag: 3) { ... } # Override max lag
|
|
124
|
+
distribute_reads(max_lag: 3, lag_failover: true) # Fallback on high lag
|
|
125
|
+
distribute_reads(failover: false) { ... } # Raise if no replicas
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Jobs
|
|
129
|
+
|
|
130
|
+
```ruby
|
|
131
|
+
class ReportJob < ApplicationJob
|
|
132
|
+
distribute_reads
|
|
133
|
+
|
|
134
|
+
def perform
|
|
135
|
+
# All reads go to replicas
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
### By default
|
|
141
|
+
|
|
142
|
+
When `config.by_default = true`, a Rack middleware automatically wraps
|
|
143
|
+
GET/HEAD requests so all reads hit replicas. After a write (POST/PUT/etc),
|
|
144
|
+
reads stay on primary for `primary_delay` seconds (read-your-own-write).
|
|
145
|
+
|
|
146
|
+
## Query Routing Log
|
|
147
|
+
|
|
148
|
+
When `config.log_query_routing = true` (the default), every routed query is logged with the connection it used:
|
|
149
|
+
|
|
150
|
+
```
|
|
151
|
+
[ResilientReads] → replica 'replica' | User Load | SELECT "users".* FROM "users" WHERE …
|
|
152
|
+
[ResilientReads] → replica 'replica2' | Order Load | SELECT "orders".* FROM "orders" …
|
|
153
|
+
[ResilientReads] → primary (write query) | User Update | UPDATE "users" SET "name" = …
|
|
154
|
+
[ResilientReads] → primary (no healthy replicas) | User Load | SELECT "users".* …
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
This makes it easy to verify that load balancing is working and which replica handled each query. Set `config.log_query_routing = false` to disable.
|
|
158
|
+
|
|
159
|
+
## Query Pattern Caching
|
|
160
|
+
|
|
161
|
+
When `config.query_cache_enabled = true` (the default), the gem caches the result of SQL pattern matching (whether a query is a read or write) in an in-memory LRU cache. This avoids running the regex on every identical query string.
|
|
162
|
+
|
|
163
|
+
```ruby
|
|
164
|
+
# View cache stats
|
|
165
|
+
ResilientReads.query_cache.stats # => { hits: 1234, misses: 56, size: 56 }
|
|
166
|
+
|
|
167
|
+
# Clear the cache manually
|
|
168
|
+
ResilientReads.bust_query_cache!
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
Disable with `config.query_cache_enabled = false`.
|
|
172
|
+
|
|
173
|
+
## Lag Check Caching
|
|
174
|
+
|
|
175
|
+
When `config.max_lag` is set, replication lag is checked for each replica. To avoid querying the replica for lag on **every single read**, the lag value is cached per-replica for `config.lag_check_interval` seconds (default 5). This means the actual lag query runs at most once every 5 seconds per replica, not on every read.
|
|
176
|
+
|
|
177
|
+
```ruby
|
|
178
|
+
config.lag_check_interval = 10 # Cache lag result for 10 seconds
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
## How it works
|
|
182
|
+
|
|
183
|
+
1. **Adapter-level interception** — the gem prepends on the database adapter's
|
|
184
|
+
`raw_execute`. SELECT queries inside a `distribute_reads` block are routed to
|
|
185
|
+
a healthy replica connection; writes pass through to the primary. Supports
|
|
186
|
+
PostgreSQL, MySQL2, and Trilogy adapters.
|
|
187
|
+
|
|
188
|
+
2. **Separate connection pools** — each replica has its own ActiveRecord connection
|
|
189
|
+
pool (via a lightweight abstract class). Pools are lazy: no actual DB connection
|
|
190
|
+
until the first query, so replicas can be unavailable at boot without crashing.
|
|
191
|
+
|
|
192
|
+
3. **Health checking** — a background thread periodically runs `SELECT 1` against
|
|
193
|
+
each replica. Unhealthy replicas are removed from rotation and restored once
|
|
194
|
+
they recover.
|
|
26
195
|
|
|
27
|
-
|
|
196
|
+
4. **Load balancing** — round-robin (default) or random selection across healthy
|
|
197
|
+
replicas. Works with any number of replicas.
|
|
28
198
|
|
|
29
|
-
|
|
199
|
+
5. **Replication lag** — supports PostgreSQL WAL-based lag detection and MySQL
|
|
200
|
+
`SHOW REPLICA STATUS` / `Seconds_Behind_Master` lag checking. Lag values are
|
|
201
|
+
cached per-replica with a configurable TTL to avoid per-query overhead.
|
|
30
202
|
|
|
31
|
-
|
|
203
|
+
6. **Query pattern caching** — SQL read/write classification results are cached
|
|
204
|
+
in an LRU cache (configurable max size) to avoid repeated regex matching.
|
|
32
205
|
|
|
33
|
-
##
|
|
206
|
+
## Migrating from distribute_reads
|
|
34
207
|
|
|
35
|
-
|
|
208
|
+
1. Replace `gem "distribute_reads"` with `gem "resilient_reads"`
|
|
209
|
+
2. In `database.yml`, change `adapter: postgresql_proxy` to `adapter: postgresql`
|
|
210
|
+
3. Your existing initializer (`DistributeReads.by_default = true` etc.) will
|
|
211
|
+
continue to work via the backward-compatibility shim
|
|
212
|
+
4. Optionally convert to the new `ResilientReads.configure` block
|
|
213
|
+
5. Add extra replicas to `database.yml` — they're auto-detected
|
|
36
214
|
|
|
37
215
|
## License
|
|
38
216
|
|
|
39
|
-
|
|
217
|
+
MIT
|
|
@@ -13,6 +13,10 @@ module ResilientReads
|
|
|
13
13
|
# model loading and connection setup.
|
|
14
14
|
SKIP_NAMES = Set.new(%w[SCHEMA EXPLAIN]).freeze
|
|
15
15
|
|
|
16
|
+
# SQL clauses that acquire locks and must execute on the primary,
|
|
17
|
+
# even though the statement starts with SELECT.
|
|
18
|
+
LOCKING_CLAUSE_PATTERN = /\b(FOR\s+(UPDATE|NO\s+KEY\s+UPDATE|SHARE|KEY\s+SHARE)|LOCK\s+IN\s+SHARE\s+MODE)\b/i
|
|
19
|
+
|
|
16
20
|
def raw_execute(sql, *args, **kwargs)
|
|
17
21
|
ctx = Thread.current[:resilient_reads_context]
|
|
18
22
|
name = args.first
|
|
@@ -21,13 +25,27 @@ module ResilientReads
|
|
|
21
25
|
ctx[:distributing] &&
|
|
22
26
|
!ctx[:on_replica] &&
|
|
23
27
|
!ctx[:routing] &&
|
|
28
|
+
!ctx[:stick_to_primary] &&
|
|
24
29
|
!skip_replica_routing?(sql, name) &&
|
|
25
30
|
open_transactions.zero?
|
|
26
31
|
|
|
27
32
|
execute_on_replica(sql, ctx, *args, **kwargs)
|
|
28
33
|
else
|
|
29
|
-
if ctx && ctx[:distributing]
|
|
30
|
-
|
|
34
|
+
if ctx && ctx[:distributing]
|
|
35
|
+
if write_query?(sql)
|
|
36
|
+
# Sticky writes: after any write inside a distribute_reads
|
|
37
|
+
# block, all subsequent reads stay on primary for the rest of
|
|
38
|
+
# the block. This prevents stale-read → conflicting-write
|
|
39
|
+
# chains that cause MySQL/InnoDB deadlocks, especially with
|
|
40
|
+
# transactionless writes like update_column that don't bump
|
|
41
|
+
# open_transactions.
|
|
42
|
+
if ResilientReads.config.sticky_writes
|
|
43
|
+
ctx[:stick_to_primary] = true
|
|
44
|
+
end
|
|
45
|
+
ResilientReads.log_query("primary", sql, name, reason: "write query")
|
|
46
|
+
elsif ctx[:stick_to_primary]
|
|
47
|
+
ResilientReads.log_query("primary", sql, name, reason: "sticky write")
|
|
48
|
+
end
|
|
31
49
|
end
|
|
32
50
|
super(sql, *args, **kwargs)
|
|
33
51
|
end
|
|
@@ -41,17 +59,25 @@ module ResilientReads
|
|
|
41
59
|
return true if name.nil? || name == ""
|
|
42
60
|
return true if SKIP_NAMES.include?(name)
|
|
43
61
|
return true if write_query?(sql)
|
|
62
|
+
return true if locking_query?(sql)
|
|
44
63
|
false
|
|
45
64
|
end
|
|
46
65
|
|
|
66
|
+
# Detects SELECT statements that acquire row/table locks
|
|
67
|
+
# (e.g. SELECT ... FOR UPDATE, LOCK IN SHARE MODE). These must
|
|
68
|
+
# execute on the primary — a read-only replica cannot acquire locks.
|
|
69
|
+
def locking_query?(sql)
|
|
70
|
+
LOCKING_CLAUSE_PATTERN.match?(sql)
|
|
71
|
+
end
|
|
72
|
+
|
|
47
73
|
def execute_on_replica(sql, ctx, *args, **kwargs)
|
|
48
|
-
# Ensure the primary adapter is connected
|
|
49
|
-
#
|
|
50
|
-
#
|
|
51
|
-
#
|
|
52
|
-
# the primary connection
|
|
53
|
-
#
|
|
54
|
-
connect! unless
|
|
74
|
+
# Ensure the primary adapter is connected. If all prior reads
|
|
75
|
+
# were routed to replicas, the primary connection was never
|
|
76
|
+
# materialized. For PostgreSQL this avoids a nil @type_map
|
|
77
|
+
# (cast_result → get_oid_type → NoMethodError). For MySQL/Trilogy
|
|
78
|
+
# it keeps the primary connection alive so the first write after
|
|
79
|
+
# many reads doesn't hit a stale/timed-out socket.
|
|
80
|
+
connect! unless connected?
|
|
55
81
|
|
|
56
82
|
replica = ResilientReads.replica_pool.next_healthy
|
|
57
83
|
|
|
@@ -84,8 +110,8 @@ module ResilientReads
|
|
|
84
110
|
ctx[:routing] = false
|
|
85
111
|
result
|
|
86
112
|
rescue ActiveRecord::ConnectionNotEstablished,
|
|
87
|
-
|
|
88
|
-
|
|
113
|
+
ActiveRecord::StatementInvalid,
|
|
114
|
+
ActiveRecord::ConnectionFailed => e
|
|
89
115
|
ctx[:routing] = false
|
|
90
116
|
raise unless connection_level_error?(e)
|
|
91
117
|
|
|
@@ -136,6 +162,10 @@ module ResilientReads
|
|
|
136
162
|
cause = error.cause
|
|
137
163
|
cause.is_a?(PG::Error) ||
|
|
138
164
|
cause.is_a?(IOError) ||
|
|
165
|
+
cause.is_a?(Errno::ETIMEDOUT) ||
|
|
166
|
+
cause.is_a?(Errno::ECONNRESET) ||
|
|
167
|
+
cause.is_a?(Errno::EPIPE) ||
|
|
168
|
+
cause.is_a?(Errno::ECONNREFUSED) ||
|
|
139
169
|
(defined?(PG::ConnectionBad) && cause.is_a?(PG::ConnectionBad)) ||
|
|
140
170
|
(defined?(Trilogy::Error) && cause.is_a?(Trilogy::Error)) ||
|
|
141
171
|
(defined?(Mysql2::Error) && cause.is_a?(Mysql2::Error))
|
|
@@ -144,4 +174,4 @@ module ResilientReads
|
|
|
144
174
|
end
|
|
145
175
|
end
|
|
146
176
|
end
|
|
147
|
-
end
|
|
177
|
+
end
|
|
@@ -66,6 +66,12 @@ module ResilientReads
|
|
|
66
66
|
# Maximum number of entries in the SQL pattern cache.
|
|
67
67
|
attr_accessor :query_cache_max_size
|
|
68
68
|
|
|
69
|
+
# When true (default), a write inside a distribute_reads block causes
|
|
70
|
+
# all subsequent reads in the same block to go to primary. This
|
|
71
|
+
# prevents stale-read → conflicting-write chains that cause deadlocks,
|
|
72
|
+
# especially with transactionless writes like update_column.
|
|
73
|
+
attr_accessor :sticky_writes
|
|
74
|
+
|
|
69
75
|
def initialize
|
|
70
76
|
@by_default = false
|
|
71
77
|
@eager_load = false
|
|
@@ -85,6 +91,7 @@ module ResilientReads
|
|
|
85
91
|
@default_options = {}
|
|
86
92
|
@query_cache_enabled = true
|
|
87
93
|
@query_cache_max_size = 10_000
|
|
94
|
+
@sticky_writes = true
|
|
88
95
|
end
|
|
89
96
|
end
|
|
90
|
-
end
|
|
97
|
+
end
|
|
@@ -45,24 +45,29 @@ module ResilientReads
|
|
|
45
45
|
end
|
|
46
46
|
|
|
47
47
|
def self.lag_for_mysql(conn)
|
|
48
|
-
|
|
49
|
-
#
|
|
48
|
+
# Prefer SHOW REPLICA STATUS (MySQL 8.0.22+, MariaDB 10.5.1+)
|
|
49
|
+
# and fall back to the deprecated SHOW SLAVE STATUS.
|
|
50
|
+
result =
|
|
51
|
+
begin
|
|
52
|
+
conn.execute("SHOW REPLICA STATUS")
|
|
53
|
+
rescue ActiveRecord::StatementInvalid
|
|
54
|
+
conn.execute("SHOW SLAVE STATUS")
|
|
55
|
+
end
|
|
50
56
|
|
|
51
57
|
row = if result.respond_to?(:first)
|
|
52
58
|
result.first
|
|
53
|
-
|
|
59
|
+
elsif result.respond_to?(:to_a)
|
|
54
60
|
result.to_a.first
|
|
55
|
-
|
|
61
|
+
end
|
|
56
62
|
|
|
57
63
|
return nil unless row
|
|
58
64
|
|
|
59
|
-
#
|
|
60
|
-
lag =
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
end
|
|
65
|
+
# Seconds_Behind_Source (MySQL 8.0.22+) / Seconds_Behind_Master (legacy)
|
|
66
|
+
lag = if row.is_a?(Hash)
|
|
67
|
+
row["Seconds_Behind_Source"] || row["Seconds_Behind_Master"]
|
|
68
|
+
elsif row.respond_to?(:[])
|
|
69
|
+
row["Seconds_Behind_Source"] || row["Seconds_Behind_Master"]
|
|
70
|
+
end
|
|
66
71
|
|
|
67
72
|
lag&.to_f
|
|
68
73
|
rescue => e
|
|
@@ -70,4 +75,4 @@ module ResilientReads
|
|
|
70
75
|
nil
|
|
71
76
|
end
|
|
72
77
|
end
|
|
73
|
-
end
|
|
78
|
+
end
|
data/lib/resilient_reads.rb
CHANGED