historiographer 4.1.1 → 4.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +168 -1
- data/lib/historiographer/history.rb +24 -3
- data/lib/historiographer/history_migration.rb +1 -0
- data/lib/historiographer/relation.rb +4 -15
- data/lib/historiographer/version.rb +2 -2
- data/lib/historiographer.rb +35 -14
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 378dc2b078459333c3c0510904c394f27aab51358bec13097cdefdf84627e618
|
4
|
+
data.tar.gz: cc27a0afe8050aef01bbada81657f481277b21fcdc94fa5f32976d3f06c01969
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 28489a6de7ec69e1856f45d8336b0ce4f3de6ccdfcf6f9d3fa1e974cfc85dfbf5badc25a06fd121889c363d466dac8cb11ce2c2e836660ff629d594fe01ccb7c
|
7
|
+
data.tar.gz: 60082b9549d0b682e06760eb1af6491c14b0f6081ffbf92586ec70a030edc6021276344eef1ca66917f8e20fcddb50d3dbd67ff74bfc6f275041af001f20a184
|
data/README.md
CHANGED
@@ -130,7 +130,174 @@ This can be useful when:
|
|
130
130
|
- You're versioning training data for machine learning models
|
131
131
|
- You need to maintain immutable audit trails at specific checkpoints
|
132
132
|
|
133
|
-
|
133
|
+
## Single Table Inheritance (STI)
|
134
|
+
|
135
|
+
Historiographer fully supports Single Table Inheritance, both with the default `type` column and with custom inheritance columns.
|
136
|
+
|
137
|
+
### Default STI with `type` column
|
138
|
+
|
139
|
+
```ruby
|
140
|
+
class Post < ApplicationRecord
|
141
|
+
include Historiographer
|
142
|
+
end
|
143
|
+
|
144
|
+
class PrivatePost < Post
|
145
|
+
end
|
146
|
+
|
147
|
+
# The history classes follow the same inheritance pattern:
|
148
|
+
class PostHistory < ApplicationRecord
|
149
|
+
include Historiographer::History
|
150
|
+
end
|
151
|
+
|
152
|
+
class PrivatePostHistory < PostHistory
|
153
|
+
end
|
154
|
+
```
|
155
|
+
|
156
|
+
History records automatically maintain the correct STI type:
|
157
|
+
|
158
|
+
```ruby
|
159
|
+
private_post = PrivatePost.create(title: "Secret", history_user_id: current_user.id)
|
160
|
+
private_post.snapshot
|
161
|
+
|
162
|
+
# History records are the correct subclass
|
163
|
+
history = PostHistory.last
|
164
|
+
history.is_a?(PrivatePostHistory) #=> true
|
165
|
+
history.type #=> "PrivatePostHistory"
|
166
|
+
```
|
167
|
+
|
168
|
+
### Custom Inheritance Columns
|
169
|
+
|
170
|
+
You can also use a custom column for STI instead of the default `type`:
|
171
|
+
|
172
|
+
```ruby
|
173
|
+
class MLModel < ApplicationRecord
|
174
|
+
include Historiographer
|
175
|
+
self.inheritance_column = :model_type
|
176
|
+
end
|
177
|
+
|
178
|
+
class XGBoost < MLModel
|
179
|
+
self.table_name = "ml_models"
|
180
|
+
end
|
181
|
+
|
182
|
+
# History classes use the same custom column
|
183
|
+
class MLModelHistory < MLModel
|
184
|
+
self.inheritance_column = :model_type
|
185
|
+
self.table_name = "ml_model_histories"
|
186
|
+
end
|
187
|
+
|
188
|
+
class XGBoostHistory < MLModelHistory
|
189
|
+
end
|
190
|
+
```
|
191
|
+
|
192
|
+
Migration for custom inheritance column:
|
193
|
+
|
194
|
+
```ruby
|
195
|
+
create_table :ml_models do |t|
|
196
|
+
t.string :name
|
197
|
+
t.string :model_type # Custom inheritance column
|
198
|
+
t.jsonb :parameters
|
199
|
+
t.timestamps
|
200
|
+
|
201
|
+
t.index :model_type
|
202
|
+
end
|
203
|
+
|
204
|
+
create_table :ml_model_histories do |t|
|
205
|
+
t.histories # Includes all columns from parent table
|
206
|
+
end
|
207
|
+
```
|
208
|
+
|
209
|
+
The custom inheritance column works just like the default `type`:
|
210
|
+
|
211
|
+
```ruby
|
212
|
+
model = XGBoost.create(name: "My Model", history_user_id: current_user.id)
|
213
|
+
model.snapshot
|
214
|
+
|
215
|
+
# History records maintain the correct subclass
|
216
|
+
history = MLModelHistory.last
|
217
|
+
history.is_a?(XGBoostHistory) #=> true
|
218
|
+
history.model_type #=> "XGBoostHistory"
|
219
|
+
```
|
220
|
+
|
221
|
+
### STI and Snapshots: Perfect for Model Versioning
|
222
|
+
|
223
|
+
Single Table Inheritance combined with Historiographer's snapshot feature is particularly powerful for versioning machine learning models and other complex systems that need immutable historical records. Here's why:
|
224
|
+
|
225
|
+
1. **Type-Safe History**: When you snapshot an ML model, both the model and its parameters are preserved with their exact implementation type. This ensures that when you retrieve historical versions, you get back exactly the right subclass with its specific behavior:
|
226
|
+
|
227
|
+
```ruby
|
228
|
+
# Create and configure an XGBoost model
|
229
|
+
model = XGBoost.create(
|
230
|
+
name: "Customer Churn Predictor v1",
|
231
|
+
parameters: { max_depth: 3, eta: 0.1 },
|
232
|
+
history_user_id: current_user.id
|
233
|
+
)
|
234
|
+
|
235
|
+
# Take a snapshot before training
|
236
|
+
model.snapshot
|
237
|
+
|
238
|
+
# Update the model after training
|
239
|
+
model.update(
|
240
|
+
name: "Customer Churn Predictor v2",
|
241
|
+
parameters: { max_depth: 5, eta: 0.2 },
|
242
|
+
history_user_id: current_user.id
|
243
|
+
)
|
244
|
+
|
245
|
+
# Later, retrieve the exact pre-training version
|
246
|
+
historical_model = MLModel.latest_snapshot
|
247
|
+
historical_model.is_a?(XGBoostHistory) #=> true
|
248
|
+
historical_model.parameters #=> { max_depth: 3, eta: 0.1 }
|
249
|
+
```
|
250
|
+
|
251
|
+
2. **Implementation Versioning**: Different model types often have different parameters, preprocessing steps, or scoring methods. STI ensures these differences are preserved in history:
|
252
|
+
|
253
|
+
```ruby
|
254
|
+
class XGBoost < MLModel
|
255
|
+
def predict(data)
|
256
|
+
# XGBoost-specific prediction logic
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
class RandomForest < MLModel
|
261
|
+
def predict(data)
|
262
|
+
# RandomForest-specific prediction logic
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
# Your historical records maintain these implementation differences
|
267
|
+
old_model = MLModel.latest_snapshot
|
268
|
+
old_model.predict(data) # Uses the exact prediction logic from that point in time
|
269
|
+
```
|
270
|
+
|
271
|
+
3. **Reproducibility**: Essential for ML workflows where you need to reproduce results or audit model behavior:
|
272
|
+
|
273
|
+
```ruby
|
274
|
+
# Create model and snapshot at each significant stage
|
275
|
+
model = XGBoost.create(name: "Risk Scorer v1", history_user_id: current_user.id)
|
276
|
+
|
277
|
+
# Snapshot after initial configuration
|
278
|
+
model.snapshot(metadata: { stage: "configuration" })
|
279
|
+
|
280
|
+
# Snapshot after training
|
281
|
+
model.update(parameters: trained_parameters)
|
282
|
+
model.snapshot(metadata: { stage: "post_training" })
|
283
|
+
|
284
|
+
# Snapshot after validation
|
285
|
+
model.update(parameters: validated_parameters)
|
286
|
+
model.snapshot(metadata: { stage: "validated" })
|
287
|
+
|
288
|
+
# Later, you can retrieve any version to reproduce results
|
289
|
+
initial_version = model.histories.find_by(metadata: { stage: "configuration" })
|
290
|
+
trained_version = model.histories.find_by(metadata: { stage: "post_training" })
|
291
|
+
```
|
292
|
+
|
293
|
+
This combination of STI and snapshots is particularly valuable for:
|
294
|
+
- Model governance and compliance
|
295
|
+
- A/B testing different model types
|
296
|
+
- Debugging model behavior
|
297
|
+
- Reproducing historical predictions
|
298
|
+
- Maintaining audit trails for regulatory requirements
|
299
|
+
|
300
|
+
## Getting Started
|
134
301
|
|
135
302
|
Whenever you include the `Historiographer` gem in your ActiveRecord model, it allows you to insert, update, or delete data as you normally would.
|
136
303
|
|
@@ -88,6 +88,7 @@ module Historiographer
|
|
88
88
|
# "RetailerProductHistory."
|
89
89
|
#
|
90
90
|
foreign_class_name = base.name.gsub(/History$/) {} # e.g. "RetailerProductHistory" => "RetailerProduct"
|
91
|
+
foreign_class = foreign_class_name.constantize
|
91
92
|
association_name = foreign_class_name.split("::").last.underscore.to_sym # e.g. "RetailerProduct" => :retailer_product
|
92
93
|
|
93
94
|
#
|
@@ -115,6 +116,14 @@ module Historiographer
|
|
115
116
|
belongs_to association_name, class_name: foreign_class_name
|
116
117
|
end
|
117
118
|
|
119
|
+
# Enable STI for history classes
|
120
|
+
if foreign_class.sti_enabled?
|
121
|
+
self.inheritance_column = 'type'
|
122
|
+
end
|
123
|
+
|
124
|
+
# Ensure we can't destroy history records
|
125
|
+
before_destroy { |record| raise "Cannot destroy history records" }
|
126
|
+
|
118
127
|
#
|
119
128
|
# A History record should never be destroyed.
|
120
129
|
#
|
@@ -158,19 +167,31 @@ module Historiographer
|
|
158
167
|
# Orders by history_started_at and id to handle cases where multiple records
|
159
168
|
# have the same history_started_at timestamp
|
160
169
|
scope :latest_snapshot, -> {
|
161
|
-
where.not(snapshot_id: nil).order('id DESC').limit(1)&.first
|
170
|
+
where.not(snapshot_id: nil).order('id DESC').limit(1)&.first || none
|
162
171
|
}
|
163
172
|
end
|
164
173
|
|
165
174
|
class_methods do
|
166
|
-
|
167
175
|
#
|
168
176
|
# The foreign key to the primary class.
|
169
177
|
#
|
170
178
|
# E.g. PostHistory.history_foreign_key => post_id
|
171
179
|
#
|
172
180
|
def history_foreign_key
|
173
|
-
|
181
|
+
return @history_foreign_key if @history_foreign_key
|
182
|
+
|
183
|
+
@history_foreign_key = sti_base_class.name.underscore.foreign_key
|
184
|
+
end
|
185
|
+
|
186
|
+
def sti_base_class
|
187
|
+
return @sti_base_class if @sti_base_class
|
188
|
+
|
189
|
+
base_name = name.gsub(/History$/, '')
|
190
|
+
base_class = base_name.constantize
|
191
|
+
while base_class.superclass != ActiveRecord::Base
|
192
|
+
base_class = base_class.superclass
|
193
|
+
end
|
194
|
+
@sti_base_class = base_class
|
174
195
|
end
|
175
196
|
end
|
176
197
|
end
|
@@ -26,6 +26,7 @@ module Historiographer
|
|
26
26
|
|
27
27
|
class_name = original_table_name.classify
|
28
28
|
klass = Object.const_set(class_name, class_definer)
|
29
|
+
klass.send("table_name=", original_table_name)
|
29
30
|
original_columns = klass.columns.reject { |c| c.name == "id" || except.include?(c.name) || (only.any? && only.exclude?(c.name)) || no_business_columns }
|
30
31
|
|
31
32
|
integer foreign_key.to_sym, null: false
|
@@ -36,13 +36,7 @@ module Historiographer
|
|
36
36
|
history_user_id = updates[:history_user_id]
|
37
37
|
|
38
38
|
new_histories = records.map do |record|
|
39
|
-
attrs
|
40
|
-
foreign_key = history_class.history_foreign_key
|
41
|
-
|
42
|
-
attrs.merge!(foreign_key => attrs["id"], history_started_at: now, history_user_id: history_user_id)
|
43
|
-
|
44
|
-
attrs = attrs.except("id")
|
45
|
-
|
39
|
+
attrs = record.history_attrs(now: now)
|
46
40
|
record.histories.build(attrs)
|
47
41
|
end
|
48
42
|
|
@@ -72,14 +66,9 @@ module Historiographer
|
|
72
66
|
|
73
67
|
if records.first.respond_to?(:paranoia_destroy)
|
74
68
|
new_histories = records.map do |record|
|
75
|
-
attrs
|
76
|
-
|
77
|
-
|
78
|
-
now = UTC.now
|
79
|
-
attrs.merge!(foreign_key => attrs["id"], history_started_at: now, history_user_id: history_user_id, deleted_at: now)
|
80
|
-
|
81
|
-
attrs = attrs.except("id")
|
82
|
-
|
69
|
+
attrs = record.history_attrs(now: now)
|
70
|
+
attrs[:history_user_id] = history_user_id
|
71
|
+
attrs[:deleted_at] = now
|
83
72
|
record.histories.build(attrs)
|
84
73
|
end
|
85
74
|
history_class.import new_histories
|
@@ -1,3 +1,3 @@
|
|
1
1
|
module Historiographer
|
2
|
-
VERSION = "4.1.
|
3
|
-
end
|
2
|
+
VERSION = "4.1.3"
|
3
|
+
end
|
data/lib/historiographer.rb
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
require 'active_support/all'
|
4
4
|
require 'securerandom'
|
5
|
+
require_relative './historiographer/configuration'
|
5
6
|
require_relative './historiographer/history'
|
6
7
|
require_relative './historiographer/postgres_migration'
|
7
8
|
require_relative './historiographer/safe'
|
@@ -84,10 +85,6 @@ module Historiographer
|
|
84
85
|
after_save :record_history, if: :should_record_history?
|
85
86
|
validate :validate_history_user_id_present, if: :should_validate_history_user_id_present?
|
86
87
|
|
87
|
-
# Add scope to fetch latest histories
|
88
|
-
scope :latest_snapshot, -> {
|
89
|
-
history_class.latest_snapshot
|
90
|
-
}
|
91
88
|
|
92
89
|
def should_alert_history_user_id_present?
|
93
90
|
!snapshot_mode? && !is_history_class? && Thread.current[:skip_history_user_id_validation] != true
|
@@ -352,6 +349,29 @@ module Historiographer
|
|
352
349
|
end
|
353
350
|
end
|
354
351
|
|
352
|
+
def history_class
|
353
|
+
self.class.history_class
|
354
|
+
end
|
355
|
+
|
356
|
+
def history_attrs(snapshot_id: nil, now: nil)
|
357
|
+
attrs = attributes.clone
|
358
|
+
history_class = self.class.history_class
|
359
|
+
foreign_key = history_class.history_foreign_key
|
360
|
+
|
361
|
+
now ||= UTC.now
|
362
|
+
attrs.merge!(foreign_key => attrs['id'], history_started_at: now, history_user_id: history_user_id)
|
363
|
+
attrs.merge!(snapshot_id: snapshot_id) if snapshot_id.present?
|
364
|
+
|
365
|
+
# For STI, ensure we use the correct history class type
|
366
|
+
if self.class.sti_enabled?
|
367
|
+
attrs[self.class.inheritance_column] = history_class.name
|
368
|
+
end
|
369
|
+
|
370
|
+
attrs = attrs.except('id')
|
371
|
+
|
372
|
+
attrs
|
373
|
+
end
|
374
|
+
|
355
375
|
private
|
356
376
|
|
357
377
|
def history_user_absent_action
|
@@ -367,19 +387,11 @@ module Historiographer
|
|
367
387
|
def record_history(snapshot_id: nil)
|
368
388
|
history_user_absent_action if history_user_id.nil? && should_alert_history_user_id_present?
|
369
389
|
|
370
|
-
attrs = attributes.clone
|
371
|
-
history_class = self.class.history_class
|
372
|
-
foreign_key = history_class.history_foreign_key
|
373
|
-
|
374
390
|
now = UTC.now
|
375
|
-
attrs
|
376
|
-
attrs.merge!(snapshot_id: snapshot_id) if snapshot_id.present?
|
377
|
-
|
378
|
-
attrs = attrs.except('id')
|
379
|
-
|
391
|
+
attrs = history_attrs(snapshot_id: snapshot_id, now: now)
|
380
392
|
current_history = histories.where(history_ended_at: nil).order('id desc').limit(1).last
|
381
393
|
|
382
|
-
if
|
394
|
+
if history_class.history_foreign_key.present? && history_class.present?
|
383
395
|
history_class.create!(attrs).tap do |history|
|
384
396
|
current_history.update!(history_ended_at: now) if current_history.present?
|
385
397
|
end
|
@@ -397,6 +409,11 @@ module Historiographer
|
|
397
409
|
end
|
398
410
|
|
399
411
|
class_methods do
|
412
|
+
def latest_snapshot
|
413
|
+
instance = history_class.latest_snapshot
|
414
|
+
instance.is_a?(history_class) ? instance : nil
|
415
|
+
end
|
416
|
+
|
400
417
|
def is_history_class?
|
401
418
|
name.match?(/History$/)
|
402
419
|
end
|
@@ -422,6 +439,10 @@ module Historiographer
|
|
422
439
|
def get_historiographer_mode
|
423
440
|
@historiographer_mode || Historiographer::Configuration.mode
|
424
441
|
end
|
442
|
+
|
443
|
+
def sti_enabled?
|
444
|
+
columns.map(&:name).include?(inheritance_column)
|
445
|
+
end
|
425
446
|
end
|
426
447
|
|
427
448
|
def is_history_class?
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: historiographer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.1.
|
4
|
+
version: 4.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- brettshollenberger
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-11-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -220,7 +220,7 @@ dependencies:
|
|
220
220
|
- - ">="
|
221
221
|
- !ruby/object:Gem::Version
|
222
222
|
version: '0'
|
223
|
-
description:
|
223
|
+
description: Append-only histories + chained snapshots of your ActiveRecord tables
|
224
224
|
email: brett.shollenberger@gmail.com
|
225
225
|
executables: []
|
226
226
|
extensions: []
|