historiographer 4.1.2 → 4.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +168 -1
- data/lib/historiographer/history.rb +24 -3
- data/lib/historiographer/relation.rb +4 -15
- data/lib/historiographer/version.rb +1 -1
- data/lib/historiographer.rb +62 -30
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ec4560a7984db99bb67a91f6ed46ffd5e0e0b68b208b9f5c56079d5d0c967c06
|
4
|
+
data.tar.gz: 558f4f69363da666c55ad58bdf4aa1108e405c96d54e2ae4d96f4d7baa87e4a0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 803285240c47f720ae89ba027d1143e7b0e63ed765bb26399b7d126e2c04449664604a7488553f8996f89c3736d86909ed52702fcc5fb4bf7df2cb2fa5de4a44
|
7
|
+
data.tar.gz: 263c7cd005f3cd3589a1ed47c62d7127844686c5b945e1835995b99151b0bbceb6b72c86bac80e09b17efde796e3eeef2b34a0d9f33fb99945915e14b1c9aef8
|
data/README.md
CHANGED
@@ -130,7 +130,174 @@ This can be useful when:
|
|
130
130
|
- You're versioning training data for machine learning models
|
131
131
|
- You need to maintain immutable audit trails at specific checkpoints
|
132
132
|
|
133
|
-
|
133
|
+
## Single Table Inheritance (STI)
|
134
|
+
|
135
|
+
Historiographer fully supports Single Table Inheritance, both with the default `type` column and with custom inheritance columns.
|
136
|
+
|
137
|
+
### Default STI with `type` column
|
138
|
+
|
139
|
+
```ruby
|
140
|
+
class Post < ApplicationRecord
|
141
|
+
include Historiographer
|
142
|
+
end
|
143
|
+
|
144
|
+
class PrivatePost < Post
|
145
|
+
end
|
146
|
+
|
147
|
+
# The history classes follow the same inheritance pattern:
|
148
|
+
class PostHistory < ApplicationRecord
|
149
|
+
include Historiographer::History
|
150
|
+
end
|
151
|
+
|
152
|
+
class PrivatePostHistory < PostHistory
|
153
|
+
end
|
154
|
+
```
|
155
|
+
|
156
|
+
History records automatically maintain the correct STI type:
|
157
|
+
|
158
|
+
```ruby
|
159
|
+
private_post = PrivatePost.create(title: "Secret", history_user_id: current_user.id)
|
160
|
+
private_post.snapshot
|
161
|
+
|
162
|
+
# History records are the correct subclass
|
163
|
+
history = PostHistory.last
|
164
|
+
history.is_a?(PrivatePostHistory) #=> true
|
165
|
+
history.type #=> "PrivatePostHistory"
|
166
|
+
```
|
167
|
+
|
168
|
+
### Custom Inheritance Columns
|
169
|
+
|
170
|
+
You can also use a custom column for STI instead of the default `type`:
|
171
|
+
|
172
|
+
```ruby
|
173
|
+
class MLModel < ApplicationRecord
|
174
|
+
include Historiographer
|
175
|
+
self.inheritance_column = :model_type
|
176
|
+
end
|
177
|
+
|
178
|
+
class XGBoost < MLModel
|
179
|
+
self.table_name = "ml_models"
|
180
|
+
end
|
181
|
+
|
182
|
+
# History classes use the same custom column
|
183
|
+
class MLModelHistory < MLModel
|
184
|
+
self.inheritance_column = :model_type
|
185
|
+
self.table_name = "ml_model_histories"
|
186
|
+
end
|
187
|
+
|
188
|
+
class XGBoostHistory < MLModelHistory
|
189
|
+
end
|
190
|
+
```
|
191
|
+
|
192
|
+
Migration for custom inheritance column:
|
193
|
+
|
194
|
+
```ruby
|
195
|
+
create_table :ml_models do |t|
|
196
|
+
t.string :name
|
197
|
+
t.string :model_type # Custom inheritance column
|
198
|
+
t.jsonb :parameters
|
199
|
+
t.timestamps
|
200
|
+
|
201
|
+
t.index :model_type
|
202
|
+
end
|
203
|
+
|
204
|
+
create_table :ml_model_histories do |t|
|
205
|
+
t.histories # Includes all columns from parent table
|
206
|
+
end
|
207
|
+
```
|
208
|
+
|
209
|
+
The custom inheritance column works just like the default `type`:
|
210
|
+
|
211
|
+
```ruby
|
212
|
+
model = XGBoost.create(name: "My Model", history_user_id: current_user.id)
|
213
|
+
model.snapshot
|
214
|
+
|
215
|
+
# History records maintain the correct subclass
|
216
|
+
history = MLModelHistory.last
|
217
|
+
history.is_a?(XGBoostHistory) #=> true
|
218
|
+
history.model_type #=> "XGBoostHistory"
|
219
|
+
```
|
220
|
+
|
221
|
+
### STI and Snapshots: Perfect for Model Versioning
|
222
|
+
|
223
|
+
Single Table Inheritance combined with Historiographer's snapshot feature is particularly powerful for versioning machine learning models and other complex systems that need immutable historical records. Here's why:
|
224
|
+
|
225
|
+
1. **Type-Safe History**: When you snapshot an ML model, both the model and its parameters are preserved with their exact implementation type. This ensures that when you retrieve historical versions, you get back exactly the right subclass with its specific behavior:
|
226
|
+
|
227
|
+
```ruby
|
228
|
+
# Create and configure an XGBoost model
|
229
|
+
model = XGBoost.create(
|
230
|
+
name: "Customer Churn Predictor v1",
|
231
|
+
parameters: { max_depth: 3, eta: 0.1 },
|
232
|
+
history_user_id: current_user.id
|
233
|
+
)
|
234
|
+
|
235
|
+
# Take a snapshot before training
|
236
|
+
model.snapshot
|
237
|
+
|
238
|
+
# Update the model after training
|
239
|
+
model.update(
|
240
|
+
name: "Customer Churn Predictor v2",
|
241
|
+
parameters: { max_depth: 5, eta: 0.2 },
|
242
|
+
history_user_id: current_user.id
|
243
|
+
)
|
244
|
+
|
245
|
+
# Later, retrieve the exact pre-training version
|
246
|
+
historical_model = MLModel.latest_snapshot
|
247
|
+
historical_model.is_a?(XGBoostHistory) #=> true
|
248
|
+
historical_model.parameters #=> { max_depth: 3, eta: 0.1 }
|
249
|
+
```
|
250
|
+
|
251
|
+
2. **Implementation Versioning**: Different model types often have different parameters, preprocessing steps, or scoring methods. STI ensures these differences are preserved in history:
|
252
|
+
|
253
|
+
```ruby
|
254
|
+
class XGBoost < MLModel
|
255
|
+
def predict(data)
|
256
|
+
# XGBoost-specific prediction logic
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
class RandomForest < MLModel
|
261
|
+
def predict(data)
|
262
|
+
# RandomForest-specific prediction logic
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
# Your historical records maintain these implementation differences
|
267
|
+
old_model = MLModel.latest_snapshot
|
268
|
+
old_model.predict(data) # Uses the exact prediction logic from that point in time
|
269
|
+
```
|
270
|
+
|
271
|
+
3. **Reproducibility**: Essential for ML workflows where you need to reproduce results or audit model behavior:
|
272
|
+
|
273
|
+
```ruby
|
274
|
+
# Create model and snapshot at each significant stage
|
275
|
+
model = XGBoost.create(name: "Risk Scorer v1", history_user_id: current_user.id)
|
276
|
+
|
277
|
+
# Snapshot after initial configuration
|
278
|
+
model.snapshot(metadata: { stage: "configuration" })
|
279
|
+
|
280
|
+
# Snapshot after training
|
281
|
+
model.update(parameters: trained_parameters)
|
282
|
+
model.snapshot(metadata: { stage: "post_training" })
|
283
|
+
|
284
|
+
# Snapshot after validation
|
285
|
+
model.update(parameters: validated_parameters)
|
286
|
+
model.snapshot(metadata: { stage: "validated" })
|
287
|
+
|
288
|
+
# Later, you can retrieve any version to reproduce results
|
289
|
+
initial_version = model.histories.find_by(metadata: { stage: "configuration" })
|
290
|
+
trained_version = model.histories.find_by(metadata: { stage: "post_training" })
|
291
|
+
```
|
292
|
+
|
293
|
+
This combination of STI and snapshots is particularly valuable for:
|
294
|
+
- Model governance and compliance
|
295
|
+
- A/B testing different model types
|
296
|
+
- Debugging model behavior
|
297
|
+
- Reproducing historical predictions
|
298
|
+
- Maintaining audit trails for regulatory requirements
|
299
|
+
|
300
|
+
## Getting Started
|
134
301
|
|
135
302
|
Whenever you include the `Historiographer` gem in your ActiveRecord model, it allows you to insert, update, or delete data as you normally would.
|
136
303
|
|
@@ -88,6 +88,7 @@ module Historiographer
|
|
88
88
|
# "RetailerProductHistory."
|
89
89
|
#
|
90
90
|
foreign_class_name = base.name.gsub(/History$/) {} # e.g. "RetailerProductHistory" => "RetailerProduct"
|
91
|
+
foreign_class = foreign_class_name.constantize
|
91
92
|
association_name = foreign_class_name.split("::").last.underscore.to_sym # e.g. "RetailerProduct" => :retailer_product
|
92
93
|
|
93
94
|
#
|
@@ -115,6 +116,14 @@ module Historiographer
|
|
115
116
|
belongs_to association_name, class_name: foreign_class_name
|
116
117
|
end
|
117
118
|
|
119
|
+
# Enable STI for history classes
|
120
|
+
if foreign_class.sti_enabled?
|
121
|
+
self.inheritance_column = 'type'
|
122
|
+
end
|
123
|
+
|
124
|
+
# Ensure we can't destroy history records
|
125
|
+
before_destroy { |record| raise "Cannot destroy history records" }
|
126
|
+
|
118
127
|
#
|
119
128
|
# A History record should never be destroyed.
|
120
129
|
#
|
@@ -158,19 +167,31 @@ module Historiographer
|
|
158
167
|
# Orders by history_started_at and id to handle cases where multiple records
|
159
168
|
# have the same history_started_at timestamp
|
160
169
|
scope :latest_snapshot, -> {
|
161
|
-
where.not(snapshot_id: nil).order('id DESC').limit(1)&.first
|
170
|
+
where.not(snapshot_id: nil).order('id DESC').limit(1)&.first || none
|
162
171
|
}
|
163
172
|
end
|
164
173
|
|
165
174
|
class_methods do
|
166
|
-
|
167
175
|
#
|
168
176
|
# The foreign key to the primary class.
|
169
177
|
#
|
170
178
|
# E.g. PostHistory.history_foreign_key => post_id
|
171
179
|
#
|
172
180
|
def history_foreign_key
|
173
|
-
|
181
|
+
return @history_foreign_key if @history_foreign_key
|
182
|
+
|
183
|
+
@history_foreign_key = sti_base_class.table_name.singularize.foreign_key
|
184
|
+
end
|
185
|
+
|
186
|
+
def sti_base_class
|
187
|
+
return @sti_base_class if @sti_base_class
|
188
|
+
|
189
|
+
base_name = name.gsub(/History$/, '')
|
190
|
+
base_class = base_name.constantize
|
191
|
+
while base_class.superclass != ActiveRecord::Base
|
192
|
+
base_class = base_class.superclass
|
193
|
+
end
|
194
|
+
@sti_base_class = base_class
|
174
195
|
end
|
175
196
|
end
|
176
197
|
end
|
@@ -36,13 +36,7 @@ module Historiographer
|
|
36
36
|
history_user_id = updates[:history_user_id]
|
37
37
|
|
38
38
|
new_histories = records.map do |record|
|
39
|
-
attrs
|
40
|
-
foreign_key = history_class.history_foreign_key
|
41
|
-
|
42
|
-
attrs.merge!(foreign_key => attrs["id"], history_started_at: now, history_user_id: history_user_id)
|
43
|
-
|
44
|
-
attrs = attrs.except("id")
|
45
|
-
|
39
|
+
attrs = record.history_attrs(now: now)
|
46
40
|
record.histories.build(attrs)
|
47
41
|
end
|
48
42
|
|
@@ -72,14 +66,9 @@ module Historiographer
|
|
72
66
|
|
73
67
|
if records.first.respond_to?(:paranoia_destroy)
|
74
68
|
new_histories = records.map do |record|
|
75
|
-
attrs
|
76
|
-
|
77
|
-
|
78
|
-
now = UTC.now
|
79
|
-
attrs.merge!(foreign_key => attrs["id"], history_started_at: now, history_user_id: history_user_id, deleted_at: now)
|
80
|
-
|
81
|
-
attrs = attrs.except("id")
|
82
|
-
|
69
|
+
attrs = record.history_attrs(now: now)
|
70
|
+
attrs[:history_user_id] = history_user_id
|
71
|
+
attrs[:deleted_at] = now
|
83
72
|
record.histories.build(attrs)
|
84
73
|
end
|
85
74
|
history_class.import new_histories
|
data/lib/historiographer.rb
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
require 'active_support/all'
|
4
4
|
require 'securerandom'
|
5
|
+
require_relative './historiographer/configuration'
|
5
6
|
require_relative './historiographer/history'
|
6
7
|
require_relative './historiographer/postgres_migration'
|
7
8
|
require_relative './historiographer/safe'
|
@@ -84,10 +85,6 @@ module Historiographer
|
|
84
85
|
after_save :record_history, if: :should_record_history?
|
85
86
|
validate :validate_history_user_id_present, if: :should_validate_history_user_id_present?
|
86
87
|
|
87
|
-
# Add scope to fetch latest histories
|
88
|
-
scope :latest_snapshot, -> {
|
89
|
-
history_class.latest_snapshot
|
90
|
-
}
|
91
88
|
|
92
89
|
def should_alert_history_user_id_present?
|
93
90
|
!snapshot_mode? && !is_history_class? && Thread.current[:skip_history_user_id_validation] != true
|
@@ -188,12 +185,30 @@ module Historiographer
|
|
188
185
|
begin
|
189
186
|
class_name.constantize
|
190
187
|
rescue StandardError
|
188
|
+
# Get the base table name without _histories suffix
|
189
|
+
base_table = base.table_name.sub(/_histories$/, '')
|
190
|
+
|
191
191
|
history_class_initializer = Class.new(base) do
|
192
|
-
self.table_name = "#{
|
193
|
-
|
192
|
+
self.table_name = "#{base_table}_histories"
|
193
|
+
|
194
|
+
# Handle STI properly
|
195
|
+
self.inheritance_column = base.inheritance_column if base.respond_to?(:inheritance_column)
|
194
196
|
end
|
195
197
|
|
196
|
-
|
198
|
+
# Split the class name into module parts and the actual class name
|
199
|
+
module_parts = class_name.split('::')
|
200
|
+
final_class_name = module_parts.pop
|
201
|
+
|
202
|
+
# Navigate through module hierarchy
|
203
|
+
target_module = module_parts.inject(Object) do |mod, module_name|
|
204
|
+
mod.const_defined?(module_name) ? mod.const_get(module_name) : mod.const_set(module_name, Module.new)
|
205
|
+
end
|
206
|
+
|
207
|
+
# Set the constant in the correct module
|
208
|
+
history_class = target_module.const_set(final_class_name, history_class_initializer)
|
209
|
+
|
210
|
+
# Now that the class is named, include the History module and extend class methods
|
211
|
+
history_class.send(:include, Historiographer::History)
|
197
212
|
end
|
198
213
|
|
199
214
|
klass = class_name.constantize
|
@@ -262,21 +277,13 @@ module Historiographer
|
|
262
277
|
end
|
263
278
|
end)
|
264
279
|
|
265
|
-
|
266
|
-
|
267
|
-
else
|
268
|
-
opts = { class_name: class_name }
|
269
|
-
opts[:foreign_key] = klass.history_foreign_key if klass.respond_to?(:history_foreign_key)
|
270
|
-
if RUBY_VERSION.to_i >= 3
|
271
|
-
has_many :histories, **opts
|
272
|
-
has_one :current_history, -> { current }, **opts
|
273
|
-
else
|
274
|
-
has_many :histories, opts
|
275
|
-
has_one :current_history, -> { current }, opts
|
276
|
-
end
|
280
|
+
def histories
|
281
|
+
history_class.where(history_class.history_foreign_key => self.send(self.class.primary_key))
|
277
282
|
end
|
278
283
|
|
279
|
-
|
284
|
+
def current_history
|
285
|
+
history_class.where(history_class.history_foreign_key => self.send(self.class.primary_key)).current&.first
|
286
|
+
end
|
280
287
|
|
281
288
|
#
|
282
289
|
# The acts_as_paranoid gem, which we tend to use with our History classes,
|
@@ -352,6 +359,30 @@ module Historiographer
|
|
352
359
|
end
|
353
360
|
end
|
354
361
|
|
362
|
+
def history_class
|
363
|
+
self.class.history_class
|
364
|
+
end
|
365
|
+
|
366
|
+
def history_attrs(snapshot_id: nil, now: nil)
|
367
|
+
attrs = attributes.clone
|
368
|
+
history_class = self.class.history_class
|
369
|
+
foreign_key = history_class.history_foreign_key
|
370
|
+
|
371
|
+
now ||= UTC.now
|
372
|
+
attrs.merge!(foreign_key => attrs['id'], history_started_at: now, history_user_id: history_user_id)
|
373
|
+
attrs.merge!(snapshot_id: snapshot_id) if snapshot_id.present?
|
374
|
+
|
375
|
+
# For STI, ensure we use the correct history class type
|
376
|
+
if self.class.sti_enabled?
|
377
|
+
type_column = self.class.inheritance_column
|
378
|
+
attrs[type_column] = "#{self.class.name}History"
|
379
|
+
end
|
380
|
+
|
381
|
+
attrs = attrs.except('id')
|
382
|
+
|
383
|
+
attrs
|
384
|
+
end
|
385
|
+
|
355
386
|
private
|
356
387
|
|
357
388
|
def history_user_absent_action
|
@@ -367,19 +398,11 @@ module Historiographer
|
|
367
398
|
def record_history(snapshot_id: nil)
|
368
399
|
history_user_absent_action if history_user_id.nil? && should_alert_history_user_id_present?
|
369
400
|
|
370
|
-
attrs = attributes.clone
|
371
|
-
history_class = self.class.history_class
|
372
|
-
foreign_key = history_class.history_foreign_key
|
373
|
-
|
374
401
|
now = UTC.now
|
375
|
-
attrs
|
376
|
-
attrs.merge!(snapshot_id: snapshot_id) if snapshot_id.present?
|
377
|
-
|
378
|
-
attrs = attrs.except('id')
|
379
|
-
|
402
|
+
attrs = history_attrs(snapshot_id: snapshot_id, now: now)
|
380
403
|
current_history = histories.where(history_ended_at: nil).order('id desc').limit(1).last
|
381
404
|
|
382
|
-
if
|
405
|
+
if history_class.history_foreign_key.present? && history_class.present?
|
383
406
|
history_class.create!(attrs).tap do |history|
|
384
407
|
current_history.update!(history_ended_at: now) if current_history.present?
|
385
408
|
end
|
@@ -397,6 +420,11 @@ module Historiographer
|
|
397
420
|
end
|
398
421
|
|
399
422
|
class_methods do
|
423
|
+
def latest_snapshot
|
424
|
+
instance = history_class.latest_snapshot
|
425
|
+
instance.is_a?(history_class) ? instance : nil
|
426
|
+
end
|
427
|
+
|
400
428
|
def is_history_class?
|
401
429
|
name.match?(/History$/)
|
402
430
|
end
|
@@ -422,6 +450,10 @@ module Historiographer
|
|
422
450
|
def get_historiographer_mode
|
423
451
|
@historiographer_mode || Historiographer::Configuration.mode
|
424
452
|
end
|
453
|
+
|
454
|
+
def sti_enabled?
|
455
|
+
columns.map(&:name).include?(inheritance_column)
|
456
|
+
end
|
425
457
|
end
|
426
458
|
|
427
459
|
def is_history_class?
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: historiographer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.1.
|
4
|
+
version: 4.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- brettshollenberger
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-11-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -220,7 +220,7 @@ dependencies:
|
|
220
220
|
- - ">="
|
221
221
|
- !ruby/object:Gem::Version
|
222
222
|
version: '0'
|
223
|
-
description:
|
223
|
+
description: Append-only histories + chained snapshots of your ActiveRecord tables
|
224
224
|
email: brett.shollenberger@gmail.com
|
225
225
|
executables: []
|
226
226
|
extensions: []
|