active_retention 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +352 -0
- data/lib/active_retention/configuration.rb +9 -0
- data/lib/active_retention/errors.rb +3 -0
- data/lib/active_retention/model_extension.rb +221 -0
- data/lib/active_retention/purge_job.rb +43 -0
- data/lib/active_retention/version.rb +3 -0
- data/lib/active_retention.rb +28 -0
- data/lib/generators/active_retention/archive_generator.rb +22 -0
- data/lib/generators/active_retention/templates/archive_migration.rb.erb +13 -0
- metadata +142 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: c34ad200a2c7661837737e32419ab22ab39cb6952e4490a546bb46b7b4e7cb79
|
|
4
|
+
data.tar.gz: de5471837bdbc5f1288cd406cc7945622806d46ed2f6291715c9da39d69767ce
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 54347af50f9f13332ea1188767915aa2acefc45729e3fb084d6236b504742f0e01e6488f4ec3e4e01ff8ad8121b604db7287e51ad107d38d1daafafa59ffcd3a
|
|
7
|
+
data.tar.gz: 74bf0c70bb22406d5e8350ba3953424d24d0fc2b786114325f9ca0fb612f62db7ae16cd529deaedf432b2eab53960b941363f28a9b55e5abc57023749d70fdcf
|
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Ray West
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
# ActiveRetention
|
|
2
|
+
|
|
3
|
+
Automatic data retention and purging for ActiveRecord models. Define how long records should live, choose what happens when they expire, and let ActiveRetention handle the cleanup.
|
|
4
|
+
|
|
5
|
+
Built for production use at any scale — includes batch limiting, advisory locking, transactional archiving, and automatic backlog processing.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
Add the gem to your Gemfile:
|
|
10
|
+
|
|
11
|
+
```ruby
|
|
12
|
+
gem 'active_retention'
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
Then run:
|
|
16
|
+
|
|
17
|
+
```sh
|
|
18
|
+
bundle install
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
ActiveRetention automatically integrates with Rails via a Railtie. No additional setup is needed. See [Configuration](#configuration) if you prefer opt-in inclusion.
|
|
22
|
+
|
|
23
|
+
## Quick Start
|
|
24
|
+
|
|
25
|
+
```ruby
|
|
26
|
+
class Notification < ApplicationRecord
|
|
27
|
+
has_retention_policy period: 30.days, strategy: :destroy
|
|
28
|
+
end
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
```ruby
|
|
32
|
+
# Remove all notifications older than 30 days
|
|
33
|
+
Notification.cleanup_retention!
|
|
34
|
+
# => { count: 42, failed: 0, remaining: false, dry_run: false }
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Defining a Retention Policy
|
|
38
|
+
|
|
39
|
+
Call `has_retention_policy` in any ActiveRecord model to configure how expired records are handled.
|
|
40
|
+
|
|
41
|
+
```ruby
|
|
42
|
+
has_retention_policy(
|
|
43
|
+
period:, # Required — how long records are kept (e.g. 30.days, 1.year)
|
|
44
|
+
strategy: :destroy, # Optional — :destroy, :delete_all, or :archive
|
|
45
|
+
column: :created_at, # Optional — timestamp column used to determine age
|
|
46
|
+
if: nil, # Optional — lambda to further filter which records are eligible
|
|
47
|
+
batch_limit: 10_000 # Optional — max records processed per cleanup call
|
|
48
|
+
)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Options
|
|
52
|
+
|
|
53
|
+
#### `period` (required)
|
|
54
|
+
|
|
55
|
+
An `ActiveSupport::Duration` representing the maximum age of a record. Records whose timestamp column is older than `period.ago` are considered expired.
|
|
56
|
+
|
|
57
|
+
The minimum allowed period is **1 hour** to prevent accidental mass deletion.
|
|
58
|
+
|
|
59
|
+
```ruby
|
|
60
|
+
has_retention_policy period: 90.days
|
|
61
|
+
has_retention_policy period: 1.year
|
|
62
|
+
has_retention_policy period: 6.hours
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
#### `strategy` (optional, default: `:destroy`)
|
|
66
|
+
|
|
67
|
+
Determines how expired records are removed.
|
|
68
|
+
|
|
69
|
+
| Strategy | Behavior | Callbacks | Speed |
|
|
70
|
+
|---------------|--------------------------------------------------------------------------|-----------|--------|
|
|
71
|
+
| `:destroy` | Loads each record and calls `destroy`, one at a time via `find_each` | Yes | Slow |
|
|
72
|
+
| `:delete_all` | Bulk deletes matching records by plucking IDs, then issuing `DELETE` | No | Fast |
|
|
73
|
+
| `:archive` | Copies records to an archive table in batches, then deletes the originals| No | Medium |
|
|
74
|
+
|
|
75
|
+
```ruby
|
|
76
|
+
# Triggers model callbacks and dependent: :destroy associations
|
|
77
|
+
has_retention_policy period: 30.days, strategy: :destroy
|
|
78
|
+
|
|
79
|
+
# Fast bulk delete, skips callbacks entirely
|
|
80
|
+
has_retention_policy period: 30.days, strategy: :delete_all
|
|
81
|
+
|
|
82
|
+
# Preserve historical data before removing from the primary table
|
|
83
|
+
has_retention_policy period: 30.days, strategy: :archive
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
**Note on `:destroy`**: If a `before_destroy` callback throws `:abort`, the record is preserved and counted as a failure in the return value. This means records protected by callbacks will never be silently deleted.
|
|
87
|
+
|
|
88
|
+
#### `column` (optional, default: `:created_at`)
|
|
89
|
+
|
|
90
|
+
The timestamp column used to determine whether a record has expired. Must be a valid column on the model's table. An `ArgumentError` is raised at boot time if the column does not exist.
|
|
91
|
+
|
|
92
|
+
```ruby
|
|
93
|
+
class Event < ApplicationRecord
|
|
94
|
+
has_retention_policy period: 90.days, column: :occurred_at
|
|
95
|
+
end
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
#### `if` (optional)
|
|
99
|
+
|
|
100
|
+
A lambda that returns an ActiveRecord scope. When provided, only expired records that also match this scope are eligible for cleanup.
|
|
101
|
+
|
|
102
|
+
The lambda is evaluated in the context of the model's scope, so you can call query methods like `where` directly.
|
|
103
|
+
|
|
104
|
+
```ruby
|
|
105
|
+
class Notification < ApplicationRecord
|
|
106
|
+
has_retention_policy period: 30.days, strategy: :destroy, if: -> { where(read: true) }
|
|
107
|
+
end
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
This will only clean up notifications that are both older than 30 days **and** marked as read.
|
|
111
|
+
|
|
112
|
+
#### `batch_limit` (optional, default: `10_000`)
|
|
113
|
+
|
|
114
|
+
The maximum number of records that will be processed in a single `cleanup_retention!` call. This prevents any single cleanup run from consuming unbounded time, memory, or database resources.
|
|
115
|
+
|
|
116
|
+
```ruby
|
|
117
|
+
# Process at most 5,000 records per run
|
|
118
|
+
has_retention_policy period: 30.days, strategy: :destroy, batch_limit: 5_000
|
|
119
|
+
|
|
120
|
+
# Large batch for fast delete_all on tables with many expired records
|
|
121
|
+
has_retention_policy period: 7.days, strategy: :delete_all, batch_limit: 50_000
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
When the limit is reached, the return value includes `remaining: true` so callers know there are more records to process. The `PurgeJob` automatically re-enqueues itself to handle this (see [Background Job](#background-job)).
|
|
125
|
+
|
|
126
|
+
## Running Cleanup
|
|
127
|
+
|
|
128
|
+
### Manual Cleanup
|
|
129
|
+
|
|
130
|
+
Call `cleanup_retention!` on any model with a retention policy:
|
|
131
|
+
|
|
132
|
+
```ruby
|
|
133
|
+
Notification.cleanup_retention!
|
|
134
|
+
# => { count: 42, failed: 0, remaining: false, dry_run: false }
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
The method returns a hash with:
|
|
138
|
+
|
|
139
|
+
| Key | Type | Description |
|
|
140
|
+
|-------------|---------|-------------|
|
|
141
|
+
| `count` | Integer | Records actually removed |
|
|
142
|
+
| `failed` | Integer | Records where `destroy` returned false (`:destroy` strategy only) |
|
|
143
|
+
| `remaining` | Boolean | `true` if more expired records exist beyond the `batch_limit` |
|
|
144
|
+
| `dry_run` | Boolean | Whether this was a dry run |
|
|
145
|
+
| `skipped` | Boolean | Present and `true` only if another process holds the cleanup lock |
|
|
146
|
+
| `reason` | Symbol | `:locked` when `skipped` is true |
|
|
147
|
+
|
|
148
|
+
### Dry Run
|
|
149
|
+
|
|
150
|
+
Preview how many expired records exist without deleting anything:
|
|
151
|
+
|
|
152
|
+
```ruby
|
|
153
|
+
Notification.cleanup_retention!(dry_run: true)
|
|
154
|
+
# => { count: 42, dry_run: true }
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
The `count` in dry run mode reflects the **total** number of expired records, regardless of `batch_limit`.
|
|
158
|
+
|
|
159
|
+
### Background Job
|
|
160
|
+
|
|
161
|
+
ActiveRetention ships with `ActiveRetention::PurgeJob`, an ActiveJob class that finds and cleans up all models with retention policies.
|
|
162
|
+
|
|
163
|
+
```ruby
|
|
164
|
+
ActiveRetention::PurgeJob.perform_later
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
The job:
|
|
168
|
+
- **Eager-loads** all application models (if not already loaded) to discover every class with a retention policy
|
|
169
|
+
- **Iterates** through each configured model and calls `cleanup_retention!`
|
|
170
|
+
- **Logs** progress, record counts, and any errors to `Rails.logger`
|
|
171
|
+
- **Rescues** errors per-model so that a failure in one model does not halt cleanup of others
|
|
172
|
+
- **Skips** models that are already locked by another process (see [Concurrency Safety](#concurrency-safety))
|
|
173
|
+
- **Auto-re-enqueues** when any model still has remaining expired records, up to 10 rounds per invocation
|
|
174
|
+
|
|
175
|
+
The re-enqueue behavior ensures that large backlogs are fully cleared without waiting for the next scheduled run. The 10-round cap prevents infinite loops if records are being created faster than they can be purged.
|
|
176
|
+
|
|
177
|
+
The job is queued as `:maintenance`. To run it on a recurring schedule, use a scheduler like [sidekiq-cron](https://github.com/sidekiq-cron/sidekiq-cron), [solid_queue](https://github.com/rails/solid_queue), or [whenever](https://github.com/javan/whenever):
|
|
178
|
+
|
|
179
|
+
```ruby
|
|
180
|
+
# Example with sidekiq-cron
|
|
181
|
+
Sidekiq::Cron::Job.create(
|
|
182
|
+
name: 'ActiveRetention purge - daily',
|
|
183
|
+
cron: '0 3 * * *',
|
|
184
|
+
class: 'ActiveRetention::PurgeJob'
|
|
185
|
+
)
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
## Concurrency Safety
|
|
189
|
+
|
|
190
|
+
ActiveRetention uses **database-level advisory locks** to prevent concurrent cleanup of the same model. This protects against:
|
|
191
|
+
- Duplicate archive rows from two processes archiving the same records
|
|
192
|
+
- Attempting to destroy already-deleted records
|
|
193
|
+
- Wasted work from overlapping cleanup runs
|
|
194
|
+
|
|
195
|
+
| Database | Lock Mechanism |
|
|
196
|
+
|------------|------------------------------------------|
|
|
197
|
+
| PostgreSQL | `pg_try_advisory_lock` / `pg_advisory_unlock` |
|
|
198
|
+
| MySQL | `GET_LOCK` / `RELEASE_LOCK` |
|
|
199
|
+
| SQLite | Ruby `Mutex` (in-process only) |
|
|
200
|
+
|
|
201
|
+
Locks are **non-blocking**: if a lock is already held, `cleanup_retention!` returns immediately with `{ skipped: true, reason: :locked }` instead of waiting. This means the `PurgeJob` never stalls — it simply skips the locked model and moves on.
|
|
202
|
+
|
|
203
|
+
## Scopes
|
|
204
|
+
|
|
205
|
+
Declaring a retention policy adds an `expired_records` scope to the model. You can use this scope independently of cleanup:
|
|
206
|
+
|
|
207
|
+
```ruby
|
|
208
|
+
Notification.expired_records
|
|
209
|
+
# => ActiveRecord::Relation of all notifications older than 30 days
|
|
210
|
+
|
|
211
|
+
Notification.expired_records.count
|
|
212
|
+
# => 42
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
## Archive Strategy
|
|
216
|
+
|
|
217
|
+
The `:archive` strategy copies expired records into a separate archive table before deleting the originals. This is useful when you need to enforce retention limits on your primary table but want to preserve historical data for auditing or analytics.
|
|
218
|
+
|
|
219
|
+
### How It Works
|
|
220
|
+
|
|
221
|
+
1. Before archiving begins, ActiveRetention verifies the archive table exists. If it doesn't, an `ActiveRetention::ArchiveTableMissing` error is raised with instructions to generate it — no records are touched.
|
|
222
|
+
2. Expired records are loaded in batches (up to 500 records or the `batch_limit`, whichever is smaller).
|
|
223
|
+
3. Within a database transaction for each batch:
|
|
224
|
+
- Record attributes (except `id`) are inserted into the archive table in sub-batches of 50 rows to avoid oversized SQL statements
|
|
225
|
+
- The original records are deleted from the primary table
|
|
226
|
+
- If either the insert or delete fails, the **entire batch is rolled back** — originals are never lost
|
|
227
|
+
4. The archive table automatically receives an `archived_at` timestamp.
|
|
228
|
+
|
|
229
|
+
### Generating the Archive Table
|
|
230
|
+
|
|
231
|
+
Use the built-in Rails generator to create a migration for the archive table:
|
|
232
|
+
|
|
233
|
+
```sh
|
|
234
|
+
rails generate active_retention:archive Notification
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
This generates a migration that creates a `notifications_archive` table mirroring all columns from `notifications` (except `id`), plus an `archived_at` timestamp column. The table uses `bigserial` for its own primary key and includes indexes on `created_at` and `archived_at`.
|
|
238
|
+
|
|
239
|
+
Then run the migration:
|
|
240
|
+
|
|
241
|
+
```sh
|
|
242
|
+
rails db:migrate
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
**Important**: You must run the generator and migration before using `strategy: :archive`. If the archive table is missing, `cleanup_retention!` will raise `ActiveRetention::ArchiveTableMissing` rather than silently failing.
|
|
246
|
+
|
|
247
|
+
### Archive Table Naming
|
|
248
|
+
|
|
249
|
+
The archive table name is derived automatically from the model's table name:
|
|
250
|
+
|
|
251
|
+
| Model Table | Archive Table |
|
|
252
|
+
|-----------------|--------------------------|
|
|
253
|
+
| `notifications` | `notifications_archive` |
|
|
254
|
+
| `events` | `events_archive` |
|
|
255
|
+
| `audit_logs` | `audit_logs_archive` |
|
|
256
|
+
|
|
257
|
+
## Validation
|
|
258
|
+
|
|
259
|
+
ActiveRetention validates all configuration at definition time (when your Rails app boots), not at cleanup time. Invalid configuration raises `ArgumentError` immediately:
|
|
260
|
+
|
|
261
|
+
```ruby
|
|
262
|
+
# Raises ArgumentError — column doesn't exist on the table
|
|
263
|
+
has_retention_policy period: 30.days, column: :nonexistent
|
|
264
|
+
|
|
265
|
+
# Raises ArgumentError — unknown strategy
|
|
266
|
+
has_retention_policy period: 30.days, strategy: :soft_delete
|
|
267
|
+
|
|
268
|
+
# Raises ArgumentError — period too short (minimum is 1 hour)
|
|
269
|
+
has_retention_policy period: 30.seconds
|
|
270
|
+
|
|
271
|
+
# Raises ArgumentError — batch_limit must be a positive integer
|
|
272
|
+
has_retention_policy period: 30.days, batch_limit: -1
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
## STI Support
|
|
276
|
+
|
|
277
|
+
Retention configs are defined using `class_attribute`, which means they are inherited by subclasses. If you use Single Table Inheritance, the parent's retention policy applies to all subclasses unless explicitly overridden:
|
|
278
|
+
|
|
279
|
+
```ruby
|
|
280
|
+
class Notification < ApplicationRecord
|
|
281
|
+
has_retention_policy period: 30.days, strategy: :destroy
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
class AdminNotification < Notification
|
|
285
|
+
# Inherits the 30-day destroy policy from Notification
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
class SystemAlert < Notification
|
|
289
|
+
# Override with a longer retention period
|
|
290
|
+
has_retention_policy period: 1.year, strategy: :archive
|
|
291
|
+
end
|
|
292
|
+
```
|
|
293
|
+
|
|
294
|
+
## Configuration
|
|
295
|
+
|
|
296
|
+
### Auto-Include (default: enabled)
|
|
297
|
+
|
|
298
|
+
By default, ActiveRetention automatically includes itself into all ActiveRecord models via a Railtie. This adds a single `class_attribute` (`retention_config`, defaulting to `nil`) and makes `has_retention_policy` available everywhere. No cleanup runs unless you explicitly call `has_retention_policy` on a model.
|
|
299
|
+
|
|
300
|
+
If you prefer to opt in per model, disable auto-include in an initializer:
|
|
301
|
+
|
|
302
|
+
```ruby
|
|
303
|
+
# config/initializers/active_retention.rb
|
|
304
|
+
ActiveRetention.configure do |config|
|
|
305
|
+
config.auto_include = false
|
|
306
|
+
end
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
Then include the module explicitly on each model that needs it:
|
|
310
|
+
|
|
311
|
+
```ruby
|
|
312
|
+
class Notification < ApplicationRecord
|
|
313
|
+
include ActiveRetention::ModelExtension
|
|
314
|
+
has_retention_policy period: 30.days, strategy: :destroy
|
|
315
|
+
end
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
## Full Example
|
|
319
|
+
|
|
320
|
+
```ruby
|
|
321
|
+
class AuditLog < ApplicationRecord
|
|
322
|
+
has_retention_policy period: 1.year,
|
|
323
|
+
strategy: :archive,
|
|
324
|
+
column: :created_at,
|
|
325
|
+
batch_limit: 25_000
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
class Session < ApplicationRecord
|
|
329
|
+
has_retention_policy period: 24.hours,
|
|
330
|
+
strategy: :delete_all,
|
|
331
|
+
batch_limit: 50_000
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
class Notification < ApplicationRecord
|
|
335
|
+
has_retention_policy period: 30.days,
|
|
336
|
+
strategy: :destroy,
|
|
337
|
+
if: -> { where(read: true) }
|
|
338
|
+
end
|
|
339
|
+
```
|
|
340
|
+
|
|
341
|
+
```ruby
|
|
342
|
+
# Preview what would be cleaned up (reports total, ignores batch_limit)
|
|
343
|
+
AuditLog.cleanup_retention!(dry_run: true)
|
|
344
|
+
# => { count: 150_204, dry_run: true }
|
|
345
|
+
|
|
346
|
+
# Run cleanup (processes up to 25,000 per call)
|
|
347
|
+
AuditLog.cleanup_retention!
|
|
348
|
+
# => { count: 25_000, remaining: true, dry_run: false }
|
|
349
|
+
|
|
350
|
+
# Or clean up everything via the background job (auto-re-enqueues until clear)
|
|
351
|
+
ActiveRetention::PurgeJob.perform_later
|
|
352
|
+
```
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
require 'active_support/concern'
|
|
2
|
+
require 'active_support/core_ext/numeric/time'
|
|
3
|
+
require 'zlib'
|
|
4
|
+
require 'active_retention/errors'
|
|
5
|
+
|
|
6
|
+
module ActiveRetention
|
|
7
|
+
module ModelExtension
|
|
8
|
+
extend ActiveSupport::Concern
|
|
9
|
+
|
|
10
|
+
MINIMUM_RETENTION_PERIOD = 1.hour
|
|
11
|
+
DEFAULT_BATCH_LIMIT = 10_000
|
|
12
|
+
ARCHIVE_INSERT_BATCH_SIZE = 50
|
|
13
|
+
|
|
14
|
+
class_methods do
|
|
15
|
+
def has_retention_policy(period:, strategy: :destroy, **options)
|
|
16
|
+
column = (options[:column] || :created_at).to_s
|
|
17
|
+
batch_limit = options.fetch(:batch_limit, DEFAULT_BATCH_LIMIT)
|
|
18
|
+
|
|
19
|
+
unless column_names.include?(column)
|
|
20
|
+
raise ArgumentError, "Unknown column '#{column}' for #{table_name}"
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
unless %i[destroy delete_all archive].include?(strategy)
|
|
24
|
+
raise ArgumentError, "Unknown strategy '#{strategy}'. Must be :destroy, :delete_all, or :archive"
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
if period < MINIMUM_RETENTION_PERIOD
|
|
28
|
+
raise ArgumentError,
|
|
29
|
+
"Retention period must be at least #{MINIMUM_RETENTION_PERIOD.inspect}. " \
|
|
30
|
+
"A very short period risks accidental mass deletion."
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
unless batch_limit.is_a?(Integer) && batch_limit > 0
|
|
34
|
+
raise ArgumentError, "batch_limit must be a positive integer, got #{batch_limit.inspect}"
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
self.retention_config = {
|
|
38
|
+
period: period,
|
|
39
|
+
strategy: strategy,
|
|
40
|
+
filter: options[:if],
|
|
41
|
+
column: column,
|
|
42
|
+
batch_limit: batch_limit
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
scope :expired_records, -> {
|
|
46
|
+
quoted_column = "#{connection.quote_table_name(table_name)}.#{connection.quote_column_name(retention_config[:column])}"
|
|
47
|
+
where("#{quoted_column} < ?", retention_config[:period].ago)
|
|
48
|
+
}
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def cleanup_retention!(dry_run: false)
|
|
52
|
+
return unless retention_config
|
|
53
|
+
|
|
54
|
+
with_retention_lock do
|
|
55
|
+
perform_cleanup!(dry_run: dry_run)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
private
|
|
60
|
+
|
|
61
|
+
def perform_cleanup!(dry_run:)
|
|
62
|
+
scope = expired_records
|
|
63
|
+
scope = scope.merge(scope.instance_exec(&retention_config[:filter])) if retention_config[:filter]
|
|
64
|
+
|
|
65
|
+
total_expired = scope.count
|
|
66
|
+
return { count: total_expired, dry_run: true } if dry_run
|
|
67
|
+
|
|
68
|
+
batch_limit = retention_config[:batch_limit]
|
|
69
|
+
|
|
70
|
+
result = case retention_config[:strategy]
|
|
71
|
+
when :destroy
|
|
72
|
+
perform_destroy_cleanup(scope, batch_limit)
|
|
73
|
+
when :delete_all
|
|
74
|
+
perform_delete_all_cleanup(scope, batch_limit)
|
|
75
|
+
when :archive
|
|
76
|
+
validate_archive_table!
|
|
77
|
+
perform_archive_cleanup(scope, batch_limit)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
result.merge(remaining: total_expired > result[:count])
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def perform_destroy_cleanup(scope, batch_limit)
|
|
84
|
+
destroyed = 0
|
|
85
|
+
failed = 0
|
|
86
|
+
|
|
87
|
+
scope.find_each do |record|
|
|
88
|
+
break if destroyed + failed >= batch_limit
|
|
89
|
+
|
|
90
|
+
if record.destroy
|
|
91
|
+
destroyed += 1
|
|
92
|
+
else
|
|
93
|
+
failed += 1
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
{ count: destroyed, failed: failed, dry_run: false }
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def perform_delete_all_cleanup(scope, batch_limit)
|
|
101
|
+
ids = scope.limit(batch_limit).pluck(:id)
|
|
102
|
+
deleted = ids.any? ? where(id: ids).delete_all : 0
|
|
103
|
+
|
|
104
|
+
{ count: deleted, dry_run: false }
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def perform_archive_cleanup(scope, batch_limit)
|
|
108
|
+
archived = archive_retention!(scope, batch_limit: batch_limit)
|
|
109
|
+
|
|
110
|
+
{ count: archived, dry_run: false }
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def validate_archive_table!
|
|
114
|
+
archive_table = "#{table_name}_archive"
|
|
115
|
+
unless connection.table_exists?(archive_table)
|
|
116
|
+
raise ActiveRetention::ArchiveTableMissing,
|
|
117
|
+
"Archive table '#{archive_table}' does not exist. " \
|
|
118
|
+
"Run `rails generate active_retention:archive #{name}` to create it."
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def archive_retention!(scope, batch_limit:)
|
|
123
|
+
archive_table = "#{table_name}_archive"
|
|
124
|
+
total_archived = 0
|
|
125
|
+
|
|
126
|
+
scope.find_in_batches(batch_size: [500, batch_limit].min) do |batch|
|
|
127
|
+
remaining = batch_limit - total_archived
|
|
128
|
+
batch = batch.first(remaining) if batch.size > remaining
|
|
129
|
+
|
|
130
|
+
transaction do
|
|
131
|
+
data = batch.map { |r| r.attributes.except('id') }
|
|
132
|
+
|
|
133
|
+
if data.any?
|
|
134
|
+
columns = data.first.keys
|
|
135
|
+
quoted_columns = columns.map { |c| connection.quote_column_name(c) }.join(', ')
|
|
136
|
+
|
|
137
|
+
data.each_slice(ARCHIVE_INSERT_BATCH_SIZE) do |chunk|
|
|
138
|
+
values_sql = chunk.map do |row|
|
|
139
|
+
"(#{columns.map { |c| connection.quote(row[c]) }.join(', ')})"
|
|
140
|
+
end.join(', ')
|
|
141
|
+
|
|
142
|
+
sql = "INSERT INTO #{connection.quote_table_name(archive_table)} (#{quoted_columns}) VALUES #{values_sql}"
|
|
143
|
+
connection.execute(sql)
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
where(id: batch.map(&:id)).delete_all
|
|
148
|
+
total_archived += batch.size
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
break if total_archived >= batch_limit
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
total_archived
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# --- Advisory Locking ---
|
|
158
|
+
|
|
159
|
+
def with_retention_lock
|
|
160
|
+
adapter = connection.adapter_name.downcase
|
|
161
|
+
|
|
162
|
+
case adapter
|
|
163
|
+
when /postgres/
|
|
164
|
+
with_pg_advisory_lock { yield }
|
|
165
|
+
when /mysql/
|
|
166
|
+
with_mysql_lock { yield }
|
|
167
|
+
else
|
|
168
|
+
with_mutex_lock { yield }
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def retention_lock_key
|
|
173
|
+
Zlib.crc32("active_retention:#{table_name}") & 0x7FFFFFFF
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def with_pg_advisory_lock
|
|
177
|
+
locked = connection.select_value("SELECT pg_try_advisory_lock(#{retention_lock_key})")
|
|
178
|
+
return skipped_result unless locked
|
|
179
|
+
|
|
180
|
+
begin
|
|
181
|
+
yield
|
|
182
|
+
ensure
|
|
183
|
+
connection.execute("SELECT pg_advisory_unlock(#{retention_lock_key})")
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def with_mysql_lock
|
|
188
|
+
result = connection.select_value("SELECT GET_LOCK('active_retention_#{table_name}', 0)")
|
|
189
|
+
return skipped_result unless result == 1
|
|
190
|
+
|
|
191
|
+
begin
|
|
192
|
+
yield
|
|
193
|
+
ensure
|
|
194
|
+
connection.execute("SELECT RELEASE_LOCK('active_retention_#{table_name}')")
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def with_mutex_lock
|
|
199
|
+
@_retention_mutex ||= Mutex.new
|
|
200
|
+
|
|
201
|
+
if @_retention_mutex.try_lock
|
|
202
|
+
begin
|
|
203
|
+
yield
|
|
204
|
+
ensure
|
|
205
|
+
@_retention_mutex.unlock
|
|
206
|
+
end
|
|
207
|
+
else
|
|
208
|
+
skipped_result
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def skipped_result
|
|
213
|
+
{ count: 0, skipped: true, reason: :locked, dry_run: false }
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
included do
|
|
218
|
+
class_attribute :retention_config, instance_writer: false, default: nil
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
module ActiveRetention
|
|
2
|
+
class PurgeJob < ActiveJob::Base
|
|
3
|
+
queue_as :maintenance
|
|
4
|
+
|
|
5
|
+
MAX_REENQUEUE_ROUNDS = 10
|
|
6
|
+
|
|
7
|
+
def perform(round: 1)
|
|
8
|
+
Rails.application.eager_load! unless Rails.application.config.eager_load
|
|
9
|
+
|
|
10
|
+
has_remaining = false
|
|
11
|
+
|
|
12
|
+
models_with_retention.each do |model|
|
|
13
|
+
Rails.logger.info "[ActiveRetention] Purging #{model.name} (round #{round})..."
|
|
14
|
+
|
|
15
|
+
result = model.cleanup_retention!
|
|
16
|
+
|
|
17
|
+
if result&.dig(:skipped)
|
|
18
|
+
Rails.logger.info "[ActiveRetention] Skipped #{model.name} (already locked by another process)."
|
|
19
|
+
else
|
|
20
|
+
Rails.logger.info "[ActiveRetention] Cleaned up #{result[:count]} #{model.name} records."
|
|
21
|
+
has_remaining = true if result[:remaining]
|
|
22
|
+
end
|
|
23
|
+
rescue StandardError => e
|
|
24
|
+
Rails.logger.error "[ActiveRetention] Failed to purge #{model.name}: #{e.message}"
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
if has_remaining && round < MAX_REENQUEUE_ROUNDS
|
|
28
|
+
Rails.logger.info "[ActiveRetention] Re-enqueueing (round #{round + 1}/#{MAX_REENQUEUE_ROUNDS}) — models still have expired records."
|
|
29
|
+
self.class.perform_later(round: round + 1)
|
|
30
|
+
elsif has_remaining
|
|
31
|
+
Rails.logger.warn "[ActiveRetention] Reached maximum of #{MAX_REENQUEUE_ROUNDS} rounds. Some expired records remain."
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
private
|
|
36
|
+
|
|
37
|
+
def models_with_retention
|
|
38
|
+
ActiveRecord::Base.descendants.select do |model|
|
|
39
|
+
model.respond_to?(:retention_config) && model.retention_config.present?
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
require "active_retention/version"
|
|
2
|
+
require "active_retention/errors"
|
|
3
|
+
require "active_retention/configuration"
|
|
4
|
+
require "active_retention/model_extension"
|
|
5
|
+
require "active_retention/purge_job"
|
|
6
|
+
|
|
7
|
+
module ActiveRetention
|
|
8
|
+
class << self
|
|
9
|
+
attr_accessor :configuration
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def self.configure
|
|
13
|
+
self.configuration ||= Configuration.new
|
|
14
|
+
yield(configuration)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
self.configuration = Configuration.new
|
|
18
|
+
|
|
19
|
+
class Railtie < Rails::Railtie
|
|
20
|
+
initializer "active_retention.model_extension" do
|
|
21
|
+
if ActiveRetention.configuration.auto_include
|
|
22
|
+
ActiveSupport.on_load(:active_record) do
|
|
23
|
+
include ActiveRetention::ModelExtension
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
require 'rails/generators/active_record'
|
|
2
|
+
|
|
3
|
+
module ActiveRetention
|
|
4
|
+
module Generators
|
|
5
|
+
class ArchiveGenerator < ActiveRecord::Generators::Base
|
|
6
|
+
source_root File.expand_path('templates', __dir__)
|
|
7
|
+
|
|
8
|
+
def create_migration_file
|
|
9
|
+
@model_name = name.camelize
|
|
10
|
+
@table_name = name.underscore.pluralize
|
|
11
|
+
@archive_table_name = "#{@table_name}_archive"
|
|
12
|
+
migration_template "archive_migration.rb.erb", "db/migrate/create_#{@archive_table_name}.rb"
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
private
|
|
16
|
+
|
|
17
|
+
def model_columns
|
|
18
|
+
@model_name.constantize.columns.reject { |c| c.name == 'id' }
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
class Create<%= @archive_table_name.camelize %> < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
|
|
2
|
+
def change
|
|
3
|
+
create_table :<%= @archive_table_name %>, id: :bigserial do |t|
|
|
4
|
+
<% model_columns.each do |column| -%>
|
|
5
|
+
t.<%= column.type %> :<%= column.name %><%= ", limit: #{column.limit.inspect}" unless column.limit.nil? %><%= ", precision: #{column.precision.inspect}" unless column.precision.nil? %><%= ", scale: #{column.scale.inspect}" unless column.scale.nil? %><%= ", null: #{column.null}" unless column.null %>
|
|
6
|
+
<% end -%>
|
|
7
|
+
t.datetime :archived_at, null: false, default: -> { 'CURRENT_TIMESTAMP' }
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
add_index :<%= @archive_table_name %>, :created_at
|
|
11
|
+
add_index :<%= @archive_table_name %>, :archived_at
|
|
12
|
+
end
|
|
13
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: active_retention
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Ray West
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2026-02-07 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: activerecord
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - ">="
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '6.1'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - ">="
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '6.1'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: activesupport
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - ">="
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '6.1'
|
|
34
|
+
type: :runtime
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - ">="
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '6.1'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: activejob
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - ">="
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '6.1'
|
|
48
|
+
type: :runtime
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - ">="
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '6.1'
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: rspec
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - "~>"
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '3.12'
|
|
62
|
+
type: :development
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - "~>"
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: '3.12'
|
|
69
|
+
- !ruby/object:Gem::Dependency
|
|
70
|
+
name: sqlite3
|
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
|
72
|
+
requirements:
|
|
73
|
+
- - ">="
|
|
74
|
+
- !ruby/object:Gem::Version
|
|
75
|
+
version: '0'
|
|
76
|
+
type: :development
|
|
77
|
+
prerelease: false
|
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
79
|
+
requirements:
|
|
80
|
+
- - ">="
|
|
81
|
+
- !ruby/object:Gem::Version
|
|
82
|
+
version: '0'
|
|
83
|
+
- !ruby/object:Gem::Dependency
|
|
84
|
+
name: rake
|
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
|
86
|
+
requirements:
|
|
87
|
+
- - "~>"
|
|
88
|
+
- !ruby/object:Gem::Version
|
|
89
|
+
version: '13.0'
|
|
90
|
+
type: :development
|
|
91
|
+
prerelease: false
|
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
93
|
+
requirements:
|
|
94
|
+
- - "~>"
|
|
95
|
+
- !ruby/object:Gem::Version
|
|
96
|
+
version: '13.0'
|
|
97
|
+
description: Define retention policies on ActiveRecord models to automatically destroy,
|
|
98
|
+
delete, or archive expired records. Includes batch limiting, advisory locking, transactional
|
|
99
|
+
archiving, and background job support.
|
|
100
|
+
email:
|
|
101
|
+
- ray@example.com
|
|
102
|
+
executables: []
|
|
103
|
+
extensions: []
|
|
104
|
+
extra_rdoc_files: []
|
|
105
|
+
files:
|
|
106
|
+
- LICENSE
|
|
107
|
+
- README.md
|
|
108
|
+
- lib/active_retention.rb
|
|
109
|
+
- lib/active_retention/configuration.rb
|
|
110
|
+
- lib/active_retention/errors.rb
|
|
111
|
+
- lib/active_retention/model_extension.rb
|
|
112
|
+
- lib/active_retention/purge_job.rb
|
|
113
|
+
- lib/active_retention/version.rb
|
|
114
|
+
- lib/generators/active_retention/archive_generator.rb
|
|
115
|
+
- lib/generators/active_retention/templates/archive_migration.rb.erb
|
|
116
|
+
homepage: https://github.com/raywest/active_retention
|
|
117
|
+
licenses:
|
|
118
|
+
- MIT
|
|
119
|
+
metadata:
|
|
120
|
+
homepage_uri: https://github.com/raywest/active_retention
|
|
121
|
+
source_code_uri: https://github.com/raywest/active_retention
|
|
122
|
+
changelog_uri: https://github.com/raywest/active_retention/blob/main/CHANGELOG.md
|
|
123
|
+
post_install_message:
|
|
124
|
+
rdoc_options: []
|
|
125
|
+
require_paths:
|
|
126
|
+
- lib
|
|
127
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
128
|
+
requirements:
|
|
129
|
+
- - ">="
|
|
130
|
+
- !ruby/object:Gem::Version
|
|
131
|
+
version: 3.0.0
|
|
132
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
133
|
+
requirements:
|
|
134
|
+
- - ">="
|
|
135
|
+
- !ruby/object:Gem::Version
|
|
136
|
+
version: '0'
|
|
137
|
+
requirements: []
|
|
138
|
+
rubygems_version: 3.5.3
|
|
139
|
+
signing_key:
|
|
140
|
+
specification_version: 4
|
|
141
|
+
summary: Automatic data retention and purging for ActiveRecord models.
|
|
142
|
+
test_files: []
|