historiographer 4.1.16 → 4.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +0 -168
- data/VERSION +1 -1
- data/historiographer.gemspec +8 -16
- data/lib/historiographer/history.rb +1 -14
- data/lib/historiographer.rb +27 -14
- data/spec/db/migrate/2025082100000_create_projects.rb +14 -0
- data/spec/db/migrate/2025082100001_create_project_files.rb +18 -0
- data/spec/db/schema.rb +44 -0
- data/spec/historiographer_spec.rb +122 -120
- data/spec/models/easy_ml/column.rb +0 -1
- data/spec/models/project.rb +4 -0
- data/spec/models/project_file.rb +5 -0
- data/spec/models/project_file_history.rb +4 -0
- data/spec/models/project_history.rb +4 -0
- metadata +7 -15
- data/spec/db/migrate/20241118000000_add_type_to_posts.rb +0 -6
- data/spec/db/migrate/20241118000001_add_type_to_post_histories.rb +0 -5
- data/spec/db/migrate/20241118000002_create_ml_models.rb +0 -19
- data/spec/db/migrate/20241118000003_create_easy_ml_columns.rb +0 -17
- data/spec/models/dataset.rb +0 -6
- data/spec/models/dataset_history.rb +0 -4
- data/spec/models/easy_ml/encrypted_column.rb +0 -10
- data/spec/models/easy_ml/encrypted_column_history.rb +0 -6
- data/spec/models/ml_model.rb +0 -6
- data/spec/models/ml_model_history.rb +0 -4
- data/spec/models/private_post.rb +0 -12
- data/spec/models/private_post_history.rb +0 -4
- data/spec/models/xgboost.rb +0 -10
- data/spec/models/xgboost_history.rb +0 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: df0f832698c8177c8785d913caa4c26e2374e10ea813896190481b091a3176a6
|
4
|
+
data.tar.gz: 3bcf25861fed71c432c47e97489b2ffae7a42d70960e6d321ce8b4b24c8a5c89
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: df1430488c6120b9126aff4a526fb2aba8f84a6aad8690592b977a45b5031674ee3722818c8679881c96cd950b50a81089d4a363948cddcf65e253051792a524
|
7
|
+
data.tar.gz: 4bb03df9ecd8998fb1866bc2d5ada28b509d1122787e6ef7b47375d0a3dc18bd8bd14f8e846d1b6e5702c9a2f5a53366680d6855126c742c48526d9048d211d4
|
data/README.md
CHANGED
@@ -130,174 +130,6 @@ This can be useful when:
|
|
130
130
|
- You're versioning training data for machine learning models
|
131
131
|
- You need to maintain immutable audit trails at specific checkpoints
|
132
132
|
|
133
|
-
## Single Table Inheritance (STI)
|
134
|
-
|
135
|
-
Historiographer fully supports Single Table Inheritance, both with the default `type` column and with custom inheritance columns.
|
136
|
-
|
137
|
-
### Default STI with `type` column
|
138
|
-
|
139
|
-
```ruby
|
140
|
-
class Post < ActiveRecord::Base
|
141
|
-
include Historiographer
|
142
|
-
end
|
143
|
-
|
144
|
-
class PrivatePost < Post
|
145
|
-
end
|
146
|
-
|
147
|
-
# The history classes follow the same inheritance pattern:
|
148
|
-
class PostHistory < ActiveRecord::Base
|
149
|
-
include Historiographer::History
|
150
|
-
end
|
151
|
-
|
152
|
-
class PrivatePostHistory < PostHistory
|
153
|
-
end
|
154
|
-
```
|
155
|
-
|
156
|
-
History records automatically maintain the correct STI type:
|
157
|
-
|
158
|
-
```ruby
|
159
|
-
private_post = PrivatePost.create(title: "Secret", history_user_id: current_user.id)
|
160
|
-
private_post.snapshot
|
161
|
-
|
162
|
-
# History records are the correct subclass
|
163
|
-
history = PostHistory.last
|
164
|
-
history.is_a?(PrivatePostHistory) #=> true
|
165
|
-
history.type #=> "PrivatePostHistory"
|
166
|
-
```
|
167
|
-
|
168
|
-
### Custom Inheritance Columns
|
169
|
-
|
170
|
-
You can also use a custom column for STI instead of the default `type`:
|
171
|
-
|
172
|
-
```ruby
|
173
|
-
class MLModel < ActiveRecord::Base
|
174
|
-
self.inheritance_column = :model_type
|
175
|
-
include Historiographer
|
176
|
-
end
|
177
|
-
|
178
|
-
class XGBoost < MLModel
|
179
|
-
self.table_name = "ml_models"
|
180
|
-
end
|
181
|
-
|
182
|
-
# History classes use the same custom column
|
183
|
-
class MLModelHistory < MLModel
|
184
|
-
self.inheritance_column = :model_type
|
185
|
-
self.table_name = "ml_model_histories"
|
186
|
-
end
|
187
|
-
|
188
|
-
class XGBoostHistory < MLModelHistory
|
189
|
-
end
|
190
|
-
```
|
191
|
-
|
192
|
-
Migration for custom inheritance column:
|
193
|
-
|
194
|
-
```ruby
|
195
|
-
create_table :ml_models do |t|
|
196
|
-
t.string :name
|
197
|
-
t.string :model_type # Custom inheritance column
|
198
|
-
t.jsonb :parameters
|
199
|
-
t.timestamps
|
200
|
-
|
201
|
-
t.index :model_type
|
202
|
-
end
|
203
|
-
|
204
|
-
create_table :ml_model_histories do |t|
|
205
|
-
t.histories # Includes all columns from parent table
|
206
|
-
end
|
207
|
-
```
|
208
|
-
|
209
|
-
The custom inheritance column works just like the default `type`:
|
210
|
-
|
211
|
-
```ruby
|
212
|
-
model = XGBoost.create(name: "My Model", history_user_id: current_user.id)
|
213
|
-
model.snapshot
|
214
|
-
|
215
|
-
# History records maintain the correct subclass
|
216
|
-
history = MLModelHistory.last
|
217
|
-
history.is_a?(XGBoostHistory) #=> true
|
218
|
-
history.model_type #=> "XGBoostHistory"
|
219
|
-
```
|
220
|
-
|
221
|
-
### STI and Snapshots: Perfect for Model Versioning
|
222
|
-
|
223
|
-
Single Table Inheritance combined with Historiographer's snapshot feature is particularly powerful for versioning machine learning models and other complex systems that need immutable historical records. Here's why:
|
224
|
-
|
225
|
-
1. **Type-Safe History**: When you snapshot an ML model, both the model and its parameters are preserved with their exact implementation type. This ensures that when you retrieve historical versions, you get back exactly the right subclass with its specific behavior:
|
226
|
-
|
227
|
-
```ruby
|
228
|
-
# Create and configure an XGBoost model
|
229
|
-
model = XGBoost.create(
|
230
|
-
name: "Customer Churn Predictor v1",
|
231
|
-
parameters: { max_depth: 3, eta: 0.1 },
|
232
|
-
history_user_id: current_user.id
|
233
|
-
)
|
234
|
-
|
235
|
-
# Take a snapshot before training
|
236
|
-
model.snapshot
|
237
|
-
|
238
|
-
# Update the model after training
|
239
|
-
model.update(
|
240
|
-
name: "Customer Churn Predictor v2",
|
241
|
-
parameters: { max_depth: 5, eta: 0.2 },
|
242
|
-
history_user_id: current_user.id
|
243
|
-
)
|
244
|
-
|
245
|
-
# Later, retrieve the exact pre-training version
|
246
|
-
historical_model = MLModel.latest_snapshot
|
247
|
-
historical_model.is_a?(XGBoostHistory) #=> true
|
248
|
-
historical_model.parameters #=> { max_depth: 3, eta: 0.1 }
|
249
|
-
```
|
250
|
-
|
251
|
-
2. **Implementation Versioning**: Different model types often have different parameters, preprocessing steps, or scoring methods. STI ensures these differences are preserved in history:
|
252
|
-
|
253
|
-
```ruby
|
254
|
-
class XGBoost < MLModel
|
255
|
-
def predict(data)
|
256
|
-
# XGBoost-specific prediction logic
|
257
|
-
end
|
258
|
-
end
|
259
|
-
|
260
|
-
class RandomForest < MLModel
|
261
|
-
def predict(data)
|
262
|
-
# RandomForest-specific prediction logic
|
263
|
-
end
|
264
|
-
end
|
265
|
-
|
266
|
-
# Your historical records maintain these implementation differences
|
267
|
-
old_model = MLModel.latest_snapshot
|
268
|
-
old_model.predict(data) # Uses the exact prediction logic from that point in time
|
269
|
-
```
|
270
|
-
|
271
|
-
3. **Reproducibility**: Essential for ML workflows where you need to reproduce results or audit model behavior:
|
272
|
-
|
273
|
-
```ruby
|
274
|
-
# Create model and snapshot at each significant stage
|
275
|
-
model = XGBoost.create(name: "Risk Scorer v1", history_user_id: current_user.id)
|
276
|
-
|
277
|
-
# Snapshot after initial configuration
|
278
|
-
model.snapshot(metadata: { stage: "configuration" })
|
279
|
-
|
280
|
-
# Snapshot after training
|
281
|
-
model.update(parameters: trained_parameters)
|
282
|
-
model.snapshot(metadata: { stage: "post_training" })
|
283
|
-
|
284
|
-
# Snapshot after validation
|
285
|
-
model.update(parameters: validated_parameters)
|
286
|
-
model.snapshot(metadata: { stage: "validated" })
|
287
|
-
|
288
|
-
# Later, you can retrieve any version to reproduce results
|
289
|
-
initial_version = model.histories.find_by(metadata: { stage: "configuration" })
|
290
|
-
trained_version = model.histories.find_by(metadata: { stage: "post_training" })
|
291
|
-
```
|
292
|
-
|
293
|
-
This combination of STI and snapshots is particularly valuable for:
|
294
|
-
|
295
|
-
- Model governance and compliance
|
296
|
-
- A/B testing different model types
|
297
|
-
- Debugging model behavior
|
298
|
-
- Reproducing historical predictions
|
299
|
-
- Maintaining audit trails for regulatory requirements
|
300
|
-
|
301
133
|
## Namespaced Models
|
302
134
|
|
303
135
|
When using namespaced models, Rails handles foreign key naming differently than with non-namespaced models. For example, if you have a model namespaced like this:
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
4.
|
1
|
+
4.3.0
|
data/historiographer.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: historiographer 4.
|
5
|
+
# stub: historiographer 4.3.0 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "historiographer".freeze
|
9
|
-
s.version = "4.
|
9
|
+
s.version = "4.3.0"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib".freeze]
|
@@ -63,11 +63,9 @@ Gem::Specification.new do |s|
|
|
63
63
|
"spec/db/migrate/20221018204255_create_silent_post_histories.rb",
|
64
64
|
"spec/db/migrate/20241109182017_create_comments.rb",
|
65
65
|
"spec/db/migrate/20241109182020_create_comment_histories.rb",
|
66
|
-
"spec/db/migrate/20241118000000_add_type_to_posts.rb",
|
67
|
-
"spec/db/migrate/20241118000001_add_type_to_post_histories.rb",
|
68
|
-
"spec/db/migrate/20241118000002_create_ml_models.rb",
|
69
|
-
"spec/db/migrate/20241118000003_create_easy_ml_columns.rb",
|
70
66
|
"spec/db/migrate/20241119000000_create_datasets.rb",
|
67
|
+
"spec/db/migrate/2025082100000_create_projects.rb",
|
68
|
+
"spec/db/migrate/2025082100001_create_project_files.rb",
|
71
69
|
"spec/db/schema.rb",
|
72
70
|
"spec/factories/post.rb",
|
73
71
|
"spec/historiographer_spec.rb",
|
@@ -76,18 +74,14 @@ Gem::Specification.new do |s|
|
|
76
74
|
"spec/models/author_history.rb",
|
77
75
|
"spec/models/comment.rb",
|
78
76
|
"spec/models/comment_history.rb",
|
79
|
-
"spec/models/dataset.rb",
|
80
|
-
"spec/models/dataset_history.rb",
|
81
77
|
"spec/models/easy_ml/column.rb",
|
82
78
|
"spec/models/easy_ml/column_history.rb",
|
83
|
-
"spec/models/easy_ml/encrypted_column.rb",
|
84
|
-
"spec/models/easy_ml/encrypted_column_history.rb",
|
85
|
-
"spec/models/ml_model.rb",
|
86
|
-
"spec/models/ml_model_history.rb",
|
87
79
|
"spec/models/post.rb",
|
88
80
|
"spec/models/post_history.rb",
|
89
|
-
"spec/models/
|
90
|
-
"spec/models/
|
81
|
+
"spec/models/project.rb",
|
82
|
+
"spec/models/project_file.rb",
|
83
|
+
"spec/models/project_file_history.rb",
|
84
|
+
"spec/models/project_history.rb",
|
91
85
|
"spec/models/safe_post.rb",
|
92
86
|
"spec/models/safe_post_history.rb",
|
93
87
|
"spec/models/silent_post.rb",
|
@@ -96,8 +90,6 @@ Gem::Specification.new do |s|
|
|
96
90
|
"spec/models/thing_with_compound_index_history.rb",
|
97
91
|
"spec/models/thing_without_history.rb",
|
98
92
|
"spec/models/user.rb",
|
99
|
-
"spec/models/xgboost.rb",
|
100
|
-
"spec/models/xgboost_history.rb",
|
101
93
|
"spec/spec_helper.rb"
|
102
94
|
]
|
103
95
|
s.homepage = "http://github.com/brettshollenberger/historiographer".freeze
|
@@ -179,11 +179,6 @@ module Historiographer
|
|
179
179
|
belongs_to association_name, class_name: foreign_class_name
|
180
180
|
end
|
181
181
|
|
182
|
-
# Enable STI for history classes
|
183
|
-
if foreign_class.sti_enabled?
|
184
|
-
self.inheritance_column = 'type'
|
185
|
-
end
|
186
|
-
|
187
182
|
# Ensure we can't destroy history records
|
188
183
|
before_destroy { |record| raise "Cannot destroy history records" }
|
189
184
|
|
@@ -312,13 +307,9 @@ module Historiographer
|
|
312
307
|
return @history_foreign_key if @history_foreign_key
|
313
308
|
|
314
309
|
# CAN THIS BE TABLE OR MODEL?
|
315
|
-
@history_foreign_key =
|
310
|
+
@history_foreign_key = original_class.base_class.name.singularize.foreign_key
|
316
311
|
end
|
317
312
|
|
318
|
-
def sti_base_class
|
319
|
-
return @sti_base_class if @sti_base_class
|
320
|
-
@sti_base_class = original_class.base_class
|
321
|
-
end
|
322
313
|
end
|
323
314
|
|
324
315
|
def original_class
|
@@ -337,10 +328,6 @@ module Historiographer
|
|
337
328
|
attrs = attributes.clone
|
338
329
|
# attrs[original_class.primary_key] = attrs[self.class.history_foreign_key]
|
339
330
|
|
340
|
-
if original_class.sti_enabled?
|
341
|
-
# Remove History suffix from type if present
|
342
|
-
attrs[original_class.inheritance_column] = attrs[original_class.inheritance_column]&.gsub(/History$/, '')
|
343
|
-
end
|
344
331
|
|
345
332
|
# Manually handle creating instance WITHOUT running find or initialize callbacks
|
346
333
|
# We will manually run callbacks below
|
data/lib/historiographer.rb
CHANGED
@@ -78,6 +78,7 @@ module Historiographer
|
|
78
78
|
extend ActiveSupport::Concern
|
79
79
|
|
80
80
|
class HistoryUserIdMissingError < StandardError; end
|
81
|
+
class HistoryInsertionError < StandardError; end
|
81
82
|
|
82
83
|
UTC = Time.now.in_time_zone('UTC').time_zone
|
83
84
|
|
@@ -190,9 +191,6 @@ module Historiographer
|
|
190
191
|
|
191
192
|
history_class_initializer = Class.new(ActiveRecord::Base) do
|
192
193
|
self.table_name = "#{base_table}_histories"
|
193
|
-
|
194
|
-
# Handle STI properly
|
195
|
-
self.inheritance_column = base.inheritance_column if base.sti_enabled?
|
196
194
|
end
|
197
195
|
|
198
196
|
# Split the class name into module parts and the actual class name
|
@@ -295,10 +293,11 @@ module Historiographer
|
|
295
293
|
existing_snapshot = history_class.where(foreign_key => attrs[primary_key], snapshot_id: snapshot_id)
|
296
294
|
return if existing_snapshot.present?
|
297
295
|
|
298
|
-
null_snapshot = history_class.where(foreign_key => attrs[primary_key], snapshot_id: nil)
|
296
|
+
null_snapshot = history_class.where(foreign_key => attrs[primary_key], snapshot_id: nil).first
|
299
297
|
snapshot = nil
|
300
298
|
if null_snapshot.present?
|
301
|
-
|
299
|
+
null_snapshot.update(snapshot_id: snapshot_id)
|
300
|
+
snapshot = null_snapshot
|
302
301
|
else
|
303
302
|
snapshot = record_history(snapshot_id: snapshot_id)
|
304
303
|
end
|
@@ -344,12 +343,6 @@ module Historiographer
|
|
344
343
|
attrs.merge!(foreign_key => attrs['id'], history_started_at: now, history_user_id: history_user_id)
|
345
344
|
attrs.merge!(snapshot_id: snapshot_id) if snapshot_id.present?
|
346
345
|
|
347
|
-
# For STI, ensure we use the correct history class type
|
348
|
-
if self.class.sti_enabled?
|
349
|
-
type_column = self.class.inheritance_column
|
350
|
-
attrs[type_column] = "#{self.class.name}History"
|
351
|
-
end
|
352
|
-
|
353
346
|
attrs = attrs.except('id')
|
354
347
|
attrs.stringify_keys!
|
355
348
|
|
@@ -385,6 +378,29 @@ module Historiographer
|
|
385
378
|
|
386
379
|
if history_class.history_foreign_key.present? && history_class.present?
|
387
380
|
result = history_class.insert_all([attrs])
|
381
|
+
|
382
|
+
# Check if the insertion was successful
|
383
|
+
if result.rows.empty?
|
384
|
+
# insert_all returned empty rows, likely due to a duplicate/conflict
|
385
|
+
# Try to find the existing record that prevented insertion
|
386
|
+
foreign_key = history_class.history_foreign_key
|
387
|
+
existing_history = history_class.where(
|
388
|
+
foreign_key => attrs[foreign_key],
|
389
|
+
history_started_at: attrs['history_started_at']
|
390
|
+
).first
|
391
|
+
|
392
|
+
if existing_history
|
393
|
+
# A duplicate history already exists (race condition or retry)
|
394
|
+
# This is acceptable - return the existing history
|
395
|
+
Rails.logger.warn("Duplicate history detected for #{self.class.name} ##{id} at #{attrs['history_started_at']}. Using existing history record ##{existing_history.id}.") if Rails.logger
|
396
|
+
current_history.update_columns(history_ended_at: now) if current_history.present?
|
397
|
+
return existing_history
|
398
|
+
else
|
399
|
+
# No rows inserted and can't find an existing record - this is unexpected
|
400
|
+
raise HistoryInsertionError, "Failed to insert history record for #{self.class.name} ##{id}, and no existing history was found. This may indicate a database constraint preventing insertion."
|
401
|
+
end
|
402
|
+
end
|
403
|
+
|
388
404
|
inserted_id = result.rows.first.first if history_class.primary_key == 'id'
|
389
405
|
instance = history_class.find(inserted_id)
|
390
406
|
current_history.update_columns(history_ended_at: now) if current_history.present?
|
@@ -434,9 +450,6 @@ module Historiographer
|
|
434
450
|
@historiographer_mode || Historiographer::Configuration.mode
|
435
451
|
end
|
436
452
|
|
437
|
-
def sti_enabled?
|
438
|
-
columns.map(&:name).include?(inheritance_column)
|
439
|
-
end
|
440
453
|
end
|
441
454
|
|
442
455
|
def is_history_class?
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'historiographer/postgres_migration'
|
2
|
+
|
3
|
+
class CreateProjects < ActiveRecord::Migration[7.1]
|
4
|
+
def change
|
5
|
+
create_table :projects do |t|
|
6
|
+
t.string :name, null: false
|
7
|
+
t.timestamps
|
8
|
+
end
|
9
|
+
|
10
|
+
create_table :project_histories do |t|
|
11
|
+
t.histories
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
|
2
|
+
require 'historiographer/postgres_migration'
|
3
|
+
|
4
|
+
class CreateProjectFiles < ActiveRecord::Migration[7.1]
|
5
|
+
def change
|
6
|
+
create_table :project_files do |t|
|
7
|
+
t.bigint :project_id
|
8
|
+
t.string :name, null: false
|
9
|
+
t.string :content
|
10
|
+
t.timestamps
|
11
|
+
t.index :project_id
|
12
|
+
end
|
13
|
+
|
14
|
+
create_table :project_file_histories do |t|
|
15
|
+
t.histories
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/spec/db/schema.rb
CHANGED
@@ -195,6 +195,50 @@ ActiveRecord::Schema[7.1].define(version: 2024_11_19_000000) do
|
|
195
195
|
t.index ["type"], name: "index_posts_on_type"
|
196
196
|
end
|
197
197
|
|
198
|
+
create_table "project_file_histories", force: :cascade do |t|
|
199
|
+
t.integer "project_file_id", null: false
|
200
|
+
t.string "name", null: false
|
201
|
+
t.datetime "created_at", null: false
|
202
|
+
t.datetime "updated_at", null: false
|
203
|
+
t.datetime "history_started_at", null: false
|
204
|
+
t.datetime "history_ended_at"
|
205
|
+
t.integer "history_user_id"
|
206
|
+
t.string "snapshot_id"
|
207
|
+
t.index ["history_ended_at"], name: "index_project_file_histories_on_history_ended_at"
|
208
|
+
t.index ["history_started_at"], name: "index_project_file_histories_on_history_started_at"
|
209
|
+
t.index ["history_user_id"], name: "index_project_file_histories_on_history_user_id"
|
210
|
+
t.index ["project_file_id"], name: "index_project_file_histories_on_project_file_id"
|
211
|
+
t.index ["snapshot_id"], name: "index_project_file_histories_on_snapshot_id"
|
212
|
+
end
|
213
|
+
|
214
|
+
create_table "project_files", force: :cascade do |t|
|
215
|
+
t.string "name", null: false
|
216
|
+
t.datetime "created_at", null: false
|
217
|
+
t.datetime "updated_at", null: false
|
218
|
+
end
|
219
|
+
|
220
|
+
create_table "project_histories", force: :cascade do |t|
|
221
|
+
t.integer "project_id", null: false
|
222
|
+
t.string "name", null: false
|
223
|
+
t.datetime "created_at", null: false
|
224
|
+
t.datetime "updated_at", null: false
|
225
|
+
t.datetime "history_started_at", null: false
|
226
|
+
t.datetime "history_ended_at"
|
227
|
+
t.integer "history_user_id"
|
228
|
+
t.string "snapshot_id"
|
229
|
+
t.index ["history_ended_at"], name: "index_project_histories_on_history_ended_at"
|
230
|
+
t.index ["history_started_at"], name: "index_project_histories_on_history_started_at"
|
231
|
+
t.index ["history_user_id"], name: "index_project_histories_on_history_user_id"
|
232
|
+
t.index ["project_id"], name: "index_project_histories_on_project_id"
|
233
|
+
t.index ["snapshot_id"], name: "index_project_histories_on_snapshot_id"
|
234
|
+
end
|
235
|
+
|
236
|
+
create_table "projects", force: :cascade do |t|
|
237
|
+
t.string "name", null: false
|
238
|
+
t.datetime "created_at", null: false
|
239
|
+
t.datetime "updated_at", null: false
|
240
|
+
end
|
241
|
+
|
198
242
|
create_table "safe_post_histories", force: :cascade do |t|
|
199
243
|
t.integer "safe_post_id", null: false
|
200
244
|
t.string "title", null: false
|
@@ -397,20 +397,6 @@ describe Historiographer do
|
|
397
397
|
end
|
398
398
|
end
|
399
399
|
|
400
|
-
describe 'Method stubbing' do
|
401
|
-
it 'handles adding method appropriately' do
|
402
|
-
post = PrivatePost.create(title: 'Post 1', body: "Hello", author_id: 1, history_user_id: 1)
|
403
|
-
expect(post.formatted_title).to eq("Private — You cannot see!")
|
404
|
-
|
405
|
-
allow_any_instance_of(PrivatePost).to receive(:formatted_title).and_return("New Title")
|
406
|
-
expect(post.formatted_title).to eq("New Title")
|
407
|
-
|
408
|
-
# Ensure history still works
|
409
|
-
post.update(title: 'Updated Title', history_user_id: user.id)
|
410
|
-
expect(post.histories.count).to eq(2)
|
411
|
-
expect(post.histories.first.class).to eq(PrivatePostHistory) # Verify correct history class
|
412
|
-
end
|
413
|
-
end
|
414
400
|
|
415
401
|
describe 'Scopes' do
|
416
402
|
it 'finds current histories' do
|
@@ -517,6 +503,118 @@ describe Historiographer do
|
|
517
503
|
end
|
518
504
|
end
|
519
505
|
|
506
|
+
describe 'Empty insertion handling' do
|
507
|
+
it 'handles duplicate history gracefully by returning existing record' do
|
508
|
+
# Create post without history tracking to avoid initial history
|
509
|
+
post = Post.new(
|
510
|
+
title: 'Post 1',
|
511
|
+
body: 'Great post',
|
512
|
+
author_id: 1,
|
513
|
+
history_user_id: user.id
|
514
|
+
)
|
515
|
+
post.save_without_history
|
516
|
+
|
517
|
+
# Freeze time to ensure same timestamp
|
518
|
+
Timecop.freeze do
|
519
|
+
# Create a history record with current timestamp
|
520
|
+
now = Historiographer::UTC.now
|
521
|
+
attrs = post.send(:history_attrs, now: now)
|
522
|
+
existing_history = PostHistory.create!(attrs)
|
523
|
+
|
524
|
+
# Mock insert_all to return empty result (simulating duplicate constraint)
|
525
|
+
empty_result = double('result')
|
526
|
+
allow(empty_result).to receive(:rows).and_return([])
|
527
|
+
|
528
|
+
allow(PostHistory).to receive(:insert_all).and_return(empty_result)
|
529
|
+
|
530
|
+
# The method should find and return the existing history
|
531
|
+
allow(Rails.logger).to receive(:warn).with(/Duplicate history detected/) if Rails.logger
|
532
|
+
result = post.send(:record_history)
|
533
|
+
expect(result.id).to eq(existing_history.id)
|
534
|
+
expect(result.post_id).to eq(post.id)
|
535
|
+
end
|
536
|
+
end
|
537
|
+
|
538
|
+
it 'raises error when insert fails and no existing record found' do
|
539
|
+
post = create_post
|
540
|
+
|
541
|
+
# Mock insert_all to return an empty result
|
542
|
+
empty_result = double('result')
|
543
|
+
allow(empty_result).to receive(:rows).and_return([])
|
544
|
+
|
545
|
+
allow(PostHistory).to receive(:insert_all).and_return(empty_result)
|
546
|
+
|
547
|
+
# Mock the where clause for finding existing history to return nothing
|
548
|
+
# We need to be specific about the where clause we're mocking
|
549
|
+
original_where = PostHistory.method(:where)
|
550
|
+
allow(PostHistory).to receive(:where) do |*args|
|
551
|
+
# Check if this is the specific query for finding duplicates
|
552
|
+
# The foreign key is "post_id" (string) and we're checking for history_started_at
|
553
|
+
if args.first.is_a?(Hash) && args.first.keys.include?("post_id") && args.first.keys.include?(:history_started_at)
|
554
|
+
# Return a double that returns nil when .first is called
|
555
|
+
double('where').tap { |d| allow(d).to receive(:first).and_return(nil) }
|
556
|
+
else
|
557
|
+
# For all other queries, use the original behavior
|
558
|
+
original_where.call(*args)
|
559
|
+
end
|
560
|
+
end
|
561
|
+
|
562
|
+
# This should raise a meaningful error
|
563
|
+
expect {
|
564
|
+
post.send(:record_history)
|
565
|
+
}.to raise_error(Historiographer::HistoryInsertionError, /Failed to insert history record.*no existing history was found/)
|
566
|
+
end
|
567
|
+
|
568
|
+
it 'provides meaningful error when insertion fails' do
|
569
|
+
post = create_post
|
570
|
+
|
571
|
+
# Mock insert_all to simulate a database-level failure
|
572
|
+
# This could happen due to various reasons:
|
573
|
+
# - Database is read-only
|
574
|
+
# - Connection issues
|
575
|
+
# - Constraint violations that prevent insertion
|
576
|
+
allow(PostHistory).to receive(:insert_all).and_raise(ActiveRecord::StatementInvalid, "PG::ReadOnlySqlTransaction: ERROR: cannot execute INSERT in a read-only transaction")
|
577
|
+
|
578
|
+
expect {
|
579
|
+
post.send(:record_history)
|
580
|
+
}.to raise_error(ActiveRecord::StatementInvalid)
|
581
|
+
end
|
582
|
+
|
583
|
+
it 'successfully inserts history when everything is valid' do
|
584
|
+
post = create_post
|
585
|
+
|
586
|
+
# Clear existing histories
|
587
|
+
PostHistory.where(post_id: post.id).destroy_all
|
588
|
+
|
589
|
+
# Record a new history
|
590
|
+
history = post.send(:record_history)
|
591
|
+
|
592
|
+
expect(history).to be_a(PostHistory)
|
593
|
+
expect(history).to be_persisted
|
594
|
+
expect(history.post_id).to eq(post.id)
|
595
|
+
expect(history.title).to eq(post.title)
|
596
|
+
expect(history.body).to eq(post.body)
|
597
|
+
end
|
598
|
+
|
599
|
+
it 'handles race conditions by returning existing history' do
|
600
|
+
post = create_post
|
601
|
+
|
602
|
+
# Simulate a race condition where the same history_started_at timestamp is used
|
603
|
+
now = Time.now
|
604
|
+
allow(Historiographer::UTC).to receive(:now).and_return(now)
|
605
|
+
|
606
|
+
# First process creates history
|
607
|
+
history1 = post.histories.last
|
608
|
+
|
609
|
+
# Second process tries to create history with same timestamp
|
610
|
+
# This would normally cause insert_all to return empty rows
|
611
|
+
history2 = post.send(:record_history)
|
612
|
+
|
613
|
+
# Should handle gracefully
|
614
|
+
expect(history2).to be_a(PostHistory)
|
615
|
+
end
|
616
|
+
end
|
617
|
+
|
520
618
|
describe 'Scopes' do
|
521
619
|
it 'finds current' do
|
522
620
|
post = create_post
|
@@ -724,102 +822,24 @@ describe Historiographer do
|
|
724
822
|
expect(post.comment_count).to eq 2
|
725
823
|
expect(post.latest_snapshot.comment_count).to eq 1
|
726
824
|
end
|
727
|
-
end
|
728
825
|
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
PrivatePost.create(
|
733
|
-
title: 'Private Post',
|
734
|
-
body: 'Test',
|
735
|
-
history_user_id: user.id,
|
736
|
-
author_id: 1
|
737
|
-
)
|
738
|
-
end
|
826
|
+
it "doesn't explode" do
|
827
|
+
project = Project.create(name: "test_project")
|
828
|
+
project_file = ProjectFile.create(project: project, name: "test_file", content: "Hello world")
|
739
829
|
|
740
|
-
|
741
|
-
post_history = private_post.histories.first
|
742
|
-
expect(post_history.original_class).to eq(PrivatePost)
|
743
|
-
end
|
830
|
+
original_snapshot = project.snapshot
|
744
831
|
|
745
|
-
|
746
|
-
|
747
|
-
expect(post_history.original_class).to eq(PrivatePost)
|
748
|
-
expect(post_history.title).to eq('Private — You cannot see!')
|
749
|
-
end
|
832
|
+
project_file.update(content: "Goodnight moon")
|
833
|
+
new_snapshot = project.snapshot
|
750
834
|
|
751
|
-
|
752
|
-
|
753
|
-
new_history = private_post.histories.current&.first
|
754
|
-
expect(new_history.original_class).to eq(PrivatePost)
|
755
|
-
expect(new_history.title).to eq('Private — You cannot see!')
|
756
|
-
end
|
835
|
+
expect(original_snapshot.files.map(&:class)).to eq [ProjectFileHistory]
|
836
|
+
expect(new_snapshot.files.map(&:class)).to eq [ProjectFileHistory]
|
757
837
|
|
758
|
-
|
759
|
-
|
760
|
-
old_history = private_post.histories.first
|
761
|
-
reified = old_history
|
762
|
-
expect(reified.title).to eq('Private — You cannot see!')
|
763
|
-
expect(reified.original_class).to eq(PrivatePost)
|
838
|
+
expect(new_snapshot.files.first.content).to eq "Goodnight moon"
|
839
|
+
expect(original_snapshot.files.first.content).to eq "Hello world"
|
764
840
|
end
|
765
841
|
end
|
766
842
|
|
767
|
-
describe 'Single Table Inheritance with Associations' do
|
768
|
-
let(:user) { User.create(name: 'Test User') }
|
769
|
-
|
770
|
-
it 'inherits associations in history classes' do
|
771
|
-
dataset = Dataset.create(name: "test_dataset", history_user_id: user.id)
|
772
|
-
model = XGBoost.create(name: "test_model", dataset: dataset, history_user_id: user.id)
|
773
|
-
model.snapshot
|
774
|
-
|
775
|
-
dataset.update(name: "new_dataset", history_user_id: user.id)
|
776
|
-
|
777
|
-
expect(dataset.ml_model).to eq model # This is still a live model
|
778
|
-
expect(model.dataset).to eq(dataset)
|
779
|
-
expect(model.histories.first).to respond_to(:dataset)
|
780
|
-
expect(model.histories.first.dataset).to be_a(DatasetHistory)
|
781
|
-
|
782
|
-
model_history = model.latest_snapshot
|
783
|
-
expect(model_history.dataset.name).to eq "test_dataset"
|
784
|
-
end
|
785
|
-
end
|
786
|
-
|
787
|
-
describe 'Single Table Inheritance with custom inheritance column' do
|
788
|
-
let(:user) { User.create(name: 'Test User') }
|
789
|
-
let(:xgboost) do
|
790
|
-
XGBoost.create(
|
791
|
-
name: 'My XGBoost Model',
|
792
|
-
parameters: { max_depth: 3, eta: 0.1 },
|
793
|
-
history_user_id: user.id
|
794
|
-
)
|
795
|
-
end
|
796
|
-
|
797
|
-
it 'creates history records with correct inheritance' do
|
798
|
-
model = xgboost
|
799
|
-
expect(model.model_name).to eq('XGBoost')
|
800
|
-
expect(model.current_history).to be_a(XGBoostHistory)
|
801
|
-
expect(model.current_history.model_name).to eq('XGBoostHistory')
|
802
|
-
end
|
803
|
-
|
804
|
-
it 'maintains inheritance through updates' do
|
805
|
-
model = xgboost
|
806
|
-
model.update(name: 'Updated XGBoost Model', history_user_id: user.id)
|
807
|
-
|
808
|
-
expect(model.histories.count).to eq(2)
|
809
|
-
expect(model.histories.all? { |h| h.is_a?(XGBoostHistory) }).to be true
|
810
|
-
end
|
811
|
-
|
812
|
-
it 'reifies with correct class' do
|
813
|
-
model = xgboost
|
814
|
-
original_name = model.name
|
815
|
-
model.update(name: 'Updated XGBoost Model', history_user_id: user.id)
|
816
|
-
model.snapshot
|
817
|
-
|
818
|
-
reified = model.latest_snapshot
|
819
|
-
expect(reified).to be_a(XGBoostHistory)
|
820
|
-
expect(reified.name).to eq("Updated XGBoost Model")
|
821
|
-
end
|
822
|
-
end
|
823
843
|
|
824
844
|
describe 'Class-level mode setting' do
|
825
845
|
before(:each) do
|
@@ -882,24 +902,6 @@ describe Historiographer do
|
|
882
902
|
expect(col_history).to be_a(EasyML::ColumnHistory)
|
883
903
|
end
|
884
904
|
|
885
|
-
it 'establishes correct associations for child classes' do
|
886
|
-
encrypted_col = EasyML::Column.create(
|
887
|
-
name: 'secret_feature',
|
888
|
-
data_type: 'numeric',
|
889
|
-
history_user_id: user.id,
|
890
|
-
column_type: "EasyML::EncryptedColumn"
|
891
|
-
)
|
892
|
-
|
893
|
-
# Verify the base record
|
894
|
-
expect(encrypted_col).to be_a(EasyML::EncryptedColumn)
|
895
|
-
expect(encrypted_col.encrypted?).to be true
|
896
|
-
|
897
|
-
# Verify history record
|
898
|
-
col_history = encrypted_col.histories.last
|
899
|
-
expect(col_history).to be_a(EasyML::EncryptedColumnHistory)
|
900
|
-
expect(col_history.class.history_foreign_key).to eq('column_id')
|
901
|
-
expect(col_history.encrypted?).to be true
|
902
|
-
end
|
903
905
|
|
904
906
|
it 'uses correct table names' do
|
905
907
|
expect(EasyML::Column.table_name).to eq('easy_ml_columns')
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: historiographer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- brettshollenberger
|
@@ -258,11 +258,9 @@ files:
|
|
258
258
|
- spec/db/migrate/20221018204255_create_silent_post_histories.rb
|
259
259
|
- spec/db/migrate/20241109182017_create_comments.rb
|
260
260
|
- spec/db/migrate/20241109182020_create_comment_histories.rb
|
261
|
-
- spec/db/migrate/20241118000000_add_type_to_posts.rb
|
262
|
-
- spec/db/migrate/20241118000001_add_type_to_post_histories.rb
|
263
|
-
- spec/db/migrate/20241118000002_create_ml_models.rb
|
264
|
-
- spec/db/migrate/20241118000003_create_easy_ml_columns.rb
|
265
261
|
- spec/db/migrate/20241119000000_create_datasets.rb
|
262
|
+
- spec/db/migrate/2025082100000_create_projects.rb
|
263
|
+
- spec/db/migrate/2025082100001_create_project_files.rb
|
266
264
|
- spec/db/schema.rb
|
267
265
|
- spec/factories/post.rb
|
268
266
|
- spec/historiographer_spec.rb
|
@@ -271,18 +269,14 @@ files:
|
|
271
269
|
- spec/models/author_history.rb
|
272
270
|
- spec/models/comment.rb
|
273
271
|
- spec/models/comment_history.rb
|
274
|
-
- spec/models/dataset.rb
|
275
|
-
- spec/models/dataset_history.rb
|
276
272
|
- spec/models/easy_ml/column.rb
|
277
273
|
- spec/models/easy_ml/column_history.rb
|
278
|
-
- spec/models/easy_ml/encrypted_column.rb
|
279
|
-
- spec/models/easy_ml/encrypted_column_history.rb
|
280
|
-
- spec/models/ml_model.rb
|
281
|
-
- spec/models/ml_model_history.rb
|
282
274
|
- spec/models/post.rb
|
283
275
|
- spec/models/post_history.rb
|
284
|
-
- spec/models/
|
285
|
-
- spec/models/
|
276
|
+
- spec/models/project.rb
|
277
|
+
- spec/models/project_file.rb
|
278
|
+
- spec/models/project_file_history.rb
|
279
|
+
- spec/models/project_history.rb
|
286
280
|
- spec/models/safe_post.rb
|
287
281
|
- spec/models/safe_post_history.rb
|
288
282
|
- spec/models/silent_post.rb
|
@@ -291,8 +285,6 @@ files:
|
|
291
285
|
- spec/models/thing_with_compound_index_history.rb
|
292
286
|
- spec/models/thing_without_history.rb
|
293
287
|
- spec/models/user.rb
|
294
|
-
- spec/models/xgboost.rb
|
295
|
-
- spec/models/xgboost_history.rb
|
296
288
|
- spec/spec_helper.rb
|
297
289
|
homepage: http://github.com/brettshollenberger/historiographer
|
298
290
|
licenses:
|
@@ -1,19 +0,0 @@
|
|
1
|
-
require "historiographer/postgres_migration"
|
2
|
-
require "historiographer/mysql_migration"
|
3
|
-
|
4
|
-
class CreateMlModels < ActiveRecord::Migration[7.0]
|
5
|
-
def change
|
6
|
-
create_table :ml_models do |t|
|
7
|
-
t.string :name
|
8
|
-
t.string :model_type
|
9
|
-
t.jsonb :parameters
|
10
|
-
t.timestamps
|
11
|
-
|
12
|
-
t.index :model_type
|
13
|
-
end
|
14
|
-
|
15
|
-
create_table :ml_model_histories do |t|
|
16
|
-
t.histories
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
@@ -1,17 +0,0 @@
|
|
1
|
-
require "historiographer/postgres_migration"
|
2
|
-
require "historiographer/mysql_migration"
|
3
|
-
|
4
|
-
class CreateEasyMlColumns < ActiveRecord::Migration[7.1]
|
5
|
-
def change
|
6
|
-
create_table :easy_ml_columns do |t|
|
7
|
-
t.string :name, null: false
|
8
|
-
t.string :data_type, null: false
|
9
|
-
t.string :column_type
|
10
|
-
t.timestamps
|
11
|
-
end
|
12
|
-
|
13
|
-
create_table :easy_ml_column_histories do |t|
|
14
|
-
t.histories(foreign_key: :column_id)
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
data/spec/models/dataset.rb
DELETED
data/spec/models/ml_model.rb
DELETED
data/spec/models/private_post.rb
DELETED
data/spec/models/xgboost.rb
DELETED