low_card_tables 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +18 -0
  3. data/.travis.yml +59 -0
  4. data/Gemfile +17 -0
  5. data/LICENSE +21 -0
  6. data/README.md +75 -0
  7. data/Rakefile +6 -0
  8. data/lib/low_card_tables.rb +72 -0
  9. data/lib/low_card_tables/active_record/base.rb +55 -0
  10. data/lib/low_card_tables/active_record/migrations.rb +223 -0
  11. data/lib/low_card_tables/active_record/relation.rb +35 -0
  12. data/lib/low_card_tables/active_record/scoping.rb +87 -0
  13. data/lib/low_card_tables/errors.rb +74 -0
  14. data/lib/low_card_tables/has_low_card_table/base.rb +114 -0
  15. data/lib/low_card_tables/has_low_card_table/low_card_association.rb +273 -0
  16. data/lib/low_card_tables/has_low_card_table/low_card_associations_manager.rb +143 -0
  17. data/lib/low_card_tables/has_low_card_table/low_card_dynamic_method_manager.rb +224 -0
  18. data/lib/low_card_tables/has_low_card_table/low_card_objects_manager.rb +80 -0
  19. data/lib/low_card_tables/low_card_table/base.rb +184 -0
  20. data/lib/low_card_tables/low_card_table/cache.rb +214 -0
  21. data/lib/low_card_tables/low_card_table/cache_expiration/exponential_cache_expiration_policy.rb +151 -0
  22. data/lib/low_card_tables/low_card_table/cache_expiration/fixed_cache_expiration_policy.rb +23 -0
  23. data/lib/low_card_tables/low_card_table/cache_expiration/has_cache_expiration.rb +100 -0
  24. data/lib/low_card_tables/low_card_table/cache_expiration/no_caching_expiration_policy.rb +13 -0
  25. data/lib/low_card_tables/low_card_table/cache_expiration/unlimited_cache_expiration_policy.rb +13 -0
  26. data/lib/low_card_tables/low_card_table/row_collapser.rb +175 -0
  27. data/lib/low_card_tables/low_card_table/row_manager.rb +681 -0
  28. data/lib/low_card_tables/low_card_table/table_unique_index.rb +134 -0
  29. data/lib/low_card_tables/version.rb +4 -0
  30. data/lib/low_card_tables/version_support.rb +52 -0
  31. data/low_card_tables.gemspec +69 -0
  32. data/spec/low_card_tables/helpers/database_helper.rb +148 -0
  33. data/spec/low_card_tables/helpers/query_spy_helper.rb +47 -0
  34. data/spec/low_card_tables/helpers/system_helpers.rb +63 -0
  35. data/spec/low_card_tables/system/basic_system_spec.rb +254 -0
  36. data/spec/low_card_tables/system/bulk_system_spec.rb +334 -0
  37. data/spec/low_card_tables/system/caching_system_spec.rb +531 -0
  38. data/spec/low_card_tables/system/migrations_system_spec.rb +747 -0
  39. data/spec/low_card_tables/system/options_system_spec.rb +581 -0
  40. data/spec/low_card_tables/system/queries_system_spec.rb +142 -0
  41. data/spec/low_card_tables/system/validations_system_spec.rb +88 -0
  42. data/spec/low_card_tables/unit/active_record/base_spec.rb +53 -0
  43. data/spec/low_card_tables/unit/active_record/migrations_spec.rb +207 -0
  44. data/spec/low_card_tables/unit/active_record/relation_spec.rb +47 -0
  45. data/spec/low_card_tables/unit/active_record/scoping_spec.rb +101 -0
  46. data/spec/low_card_tables/unit/has_low_card_table/base_spec.rb +79 -0
  47. data/spec/low_card_tables/unit/has_low_card_table/low_card_association_spec.rb +287 -0
  48. data/spec/low_card_tables/unit/has_low_card_table/low_card_associations_manager_spec.rb +190 -0
  49. data/spec/low_card_tables/unit/has_low_card_table/low_card_dynamic_method_manager_spec.rb +234 -0
  50. data/spec/low_card_tables/unit/has_low_card_table/low_card_objects_manager_spec.rb +70 -0
  51. data/spec/low_card_tables/unit/low_card_table/base_spec.rb +207 -0
  52. data/spec/low_card_tables/unit/low_card_table/cache_expiration/exponential_cache_expiration_policy_spec.rb +128 -0
  53. data/spec/low_card_tables/unit/low_card_table/cache_expiration/fixed_cache_expiration_policy_spec.rb +25 -0
  54. data/spec/low_card_tables/unit/low_card_table/cache_expiration/has_cache_expiration_policy_spec.rb +100 -0
  55. data/spec/low_card_tables/unit/low_card_table/cache_expiration/no_caching_expiration_policy_spec.rb +14 -0
  56. data/spec/low_card_tables/unit/low_card_table/cache_expiration/unlimited_cache_expiration_policy_spec.rb +14 -0
  57. data/spec/low_card_tables/unit/low_card_table/cache_spec.rb +282 -0
  58. data/spec/low_card_tables/unit/low_card_table/row_collapser_spec.rb +109 -0
  59. data/spec/low_card_tables/unit/low_card_table/row_manager_spec.rb +918 -0
  60. data/spec/low_card_tables/unit/low_card_table/table_unique_index_spec.rb +117 -0
  61. metadata +206 -0
@@ -0,0 +1,13 @@
1
+ module LowCardTables
2
+ module LowCardTable
3
+ module CacheExpiration
4
+ # This is a very simple cache-expiration policy that disables caching entirely -- it makes the cache always
5
+ # stale, which means we will reload it from the database every single time.
6
+ class NoCachingExpirationPolicy
7
+ def stale?(cache_time, current_time)
8
+ true
9
+ end
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,13 @@
1
+ module LowCardTables
2
+ module LowCardTable
3
+ module CacheExpiration
4
+ # This is a very simple cache-expiration policy that makes the cache last forever -- it will never be reloaded
5
+ # from disk, unless you explicitly flush it.
6
+ class UnlimitedCacheExpirationPolicy
7
+ def stale?(cache_time, current_time)
8
+ false
9
+ end
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,175 @@
1
+ module LowCardTables
2
+ module LowCardTable
3
+ # The RowCollapser is an object that exists solely to contain the code required to collapse rows when someone
4
+ # removes a column from a low-card table in a migration. It's not a particularly well-defined object and resulted
5
+ # from an extraction from RowManager; however, it's still nicer to have this code in a separate object rather than
6
+ # making the RowManager even bigger than it already is.
7
+ #
8
+ # What are we trying to accomplish here? Well, imagine you have this:
9
+ #
10
+ # user_statuses
11
+ # id deleted donation_level gender
12
+ # 1 false 3 female
13
+ # 2 false 5 female
14
+ # 3 false 7 female
15
+ # 4 false 3 male
16
+ # 5 false 5 male
17
+ # 6 false 7 male
18
+ #
19
+ # ...and now imagine we decide to remove the +deceased+ column. If we do nothing, we'll end up with this:
20
+ #
21
+ # user_statuses
22
+ # id deleted gender
23
+ # 1 false female
24
+ # 2 false female
25
+ # 3 false female
26
+ # 4 false male
27
+ # 5 false male
28
+ # 6 false male
29
+ #
30
+ # ...but this violates the principle of low-card tables that they have only one row for each unique combination of
31
+ # values. What we need to do is reduce it to this...
32
+ #
33
+ # user_statuses
34
+ # id deleted gender
35
+ # 1 false female
36
+ # 4 false male
37
+ #
38
+ # ...and then update all columns in all tables that have a +user_status_id+ like so:
39
+ #
40
+ # UPDATE users SET user_status_id = 1 WHERE user_status_id IN (2, 3)
41
+ # UPDATE users SET user_status_id = 4 WHERE user_status_id IN (5, 6)
42
+ #
43
+ # That's the job of this class. LowCardTables::HasLowCardTable::LowCardAssociation is responsible for updating the
44
+ # referring tables themselves; however, this class is responsible for the fundamental operation.
45
+ #
46
+ # In this class, we often refer to the "collapse map"; in the above example, this would be:
47
+ #
48
+ # #<UserStatus id: 1> => [ #<UserStatus id: 2>, #<UserStatus id: 3> ]
49
+ # #<UserStatus id: 4> => [ #<UserStatus id: 5>, #<UserStatus id: 6> ]
50
+ #
51
+ # The keys are the rows of the table that have been collapsed _to_; the values are arrays of rows that have been
52
+ # collapsed _from_.
53
+ class RowCollapser
54
+ # Creates a new instance. +low_card_model+ is the ActiveRecord model class of the low-card table itself;
55
+ # +low_card_options+ is the set of options passed to whatever migration method (e.g., +remove_column+) was
56
+ # invoked to cause the need for a collapse. Options that we pay attention to are:
57
+ #
58
+ # [:low_card_collapse_rows] If present but +false+ or +nil+, then no row collapsing will happen due to the
59
+ # migration command; you'll be left with an invalid low-card table with no unique
60
+ # index, and will need to fix this problem yourself before you can use the table.
61
+ # [:low_card_referrers] Adds one or more models as "referring models" that will have any references to this
62
+ # model updated when the collapsing is done. Generally speaking, it should not be necessary
63
+ # to do this -- this code is aggressive about eagerly loading all models, and ensuring that
64
+ # any that refer to this table are used. But this is available in case you need it.
65
+ # [:low_card_update_referring_models] If present but +false+ or +nil+, then row collapsing will occur as normal,
66
+ # but no referring columns will be updated. You'll thus have dangling foreign
67
+ # keys in any referring models; you'll have to update them yourself.
68
+ def initialize(low_card_model, low_card_options)
69
+ unless low_card_model.respond_to?(:is_low_card_table?) && low_card_model.is_low_card_table?
70
+ raise ArgumentError, "You must supply a low-card AR model class, not: #{low_card_model.inspect}"
71
+ end
72
+
73
+ @low_card_model = low_card_model
74
+ @low_card_options = low_card_options
75
+ end
76
+
77
+ # This should be called after any migration operation on the table that may have caused it to now have
78
+ # duplicate rows. This method looks at the table, detects duplicate rows, picks out winners (and the
79
+ # corresponding losers), and updates rows and referring rows, contingent upon the +low_card_options+ passed
80
+ # in the constructor.
81
+ #
82
+ # Notably, you don't need to tell this method _what_ you did to the table; it simply looks at the current state
83
+ # of the table and deals with duplicate rows. It also means this method is perfectly safe to call on a table that
84
+ # has had no changes, or a table that has had migrations performed on it that don't result in duplicate rows;
85
+ # it will simply see that there are no duplicate rows in the table, and do nothing.
86
+ #
87
+ # This method returns the "collapse map"; see the comment on this class overall for more information. This allows
88
+ # you to do anything you want with the calculated collapse. Normally, you don't _have_ to do anything with it and
89
+ # can ignore it, but it can also be useful if you pass <tt>:low_card_update_referring_models => false</tt> in
90
+ # the +low_card_options+.
91
+ def collapse!
92
+ # :low_card_collapse_rows tells this method to do nothing at all.
93
+ return if low_card_options.has_key?(:low_card_collapse_rows) && (! low_card_options[:low_card_collapse_rows])
94
+
95
+ additional_referring_models = low_card_options[:low_card_referrers]
96
+
97
+ # First, we build a map. The keys are Hashes representing each unique combination of attributes found for
98
+ # the table; the value is an Array of all rows (model objects) for that key. (In a normal state, each value
99
+ # would have exactly one element in the array; however, because we may just have migrated the table into a
100
+ # state where we need to collapse the rows, this may not be true at the moment.)
101
+ attributes_to_rows_map = { }
102
+ low_card_model.all.sort_by(&:id).each do |row|
103
+ attributes = value_attributes(row)
104
+
105
+ attributes_to_rows_map[attributes] ||= [ ]
106
+ attributes_to_rows_map[attributes] << row
107
+ end
108
+
109
+ return { } if (! attributes_to_rows_map.values.detect { |a| a.length > 1 })
110
+
111
+ # Now we build the collapse_map, which is very similar to the attributes_to_rows_map, above. We pick the first
112
+ # of the values to be the winner in each case, which, because we've sorted the rows by ID, should be the
113
+ # duplicate row with the lowest ID -- this is as reasonable a way to pick winners as any.
114
+ collapse_map = { }
115
+ attributes_to_rows_map.each do |attributes, rows|
116
+ if rows.length > 1
117
+ winner = rows.shift
118
+ losers = rows
119
+
120
+ collapse_map[winner] = losers
121
+ end
122
+ end
123
+
124
+ # Figure out which rows we need to delete; this is just all the losers.
125
+ ids_to_delete = collapse_map.values.map { |row_array| row_array.map(&:id) }.flatten.sort
126
+ low_card_model.delete_all([ "id IN (:ids)", { :ids => ids_to_delete } ])
127
+
128
+ # Figure out what referring models we need to update.
129
+ all_referring_models = low_card_model.low_card_referring_models | (additional_referring_models || [ ])
130
+
131
+ # Run transactions on all of these, plus the low-card model as well.
132
+ #
133
+ # Why do we do this? Isn't just one transaction enough? Well, in default Rails configuration, yes, because all
134
+ # models live on the same database. However, it's so common to use gems (for example, +db_charmer_) that allow
135
+ # different models to live on different databases that we make sure to run transactions on all of them;
136
+ # running nested transactions on the same database is harmless.
137
+ transaction_models = all_referring_models + [ low_card_model ]
138
+
139
+ unless low_card_options.has_key?(:low_card_update_referring_models) && (! low_card_options[:low_card_update_referring_models])
140
+ transactions_on(transaction_models) do
141
+ all_referring_models.each do |referring_model|
142
+ referring_model._low_card_update_collapsed_rows(low_card_model, collapse_map)
143
+ end
144
+ end
145
+ end
146
+
147
+ # Return the collapse_map.
148
+ collapse_map
149
+ end
150
+
151
+ private
152
+ attr_reader :low_card_options, :low_card_model
153
+
154
+ # Given a model object, extracts a Hash that maps each of the value-column names to the value this model object
155
+ # has for that value column.
156
+ def value_attributes(row)
157
+ attributes = row.attributes
158
+ out = { }
159
+ low_card_model.low_card_value_column_names.each { |n| out[n] = attributes[n] }
160
+ out
161
+ end
162
+
163
+ # Runs transactions on all of the specified models. Because of ActiveRecord's semantics for transactions (which
164
+ # for almost all other use cases are excellent), this has to be a recursive call.
165
+ def transactions_on(transaction_models, &block)
166
+ if transaction_models.length == 0
167
+ block.call
168
+ else
169
+ model = transaction_models.shift
170
+ model.transaction { transactions_on(transaction_models, &block) }
171
+ end
172
+ end
173
+ end
174
+ end
175
+ end
@@ -0,0 +1,681 @@
1
+ require 'active_support'
2
+ require 'activerecord-import'
3
+ require 'low_card_tables/low_card_table/cache'
4
+ require 'low_card_tables/low_card_table/table_unique_index'
5
+ require 'low_card_tables/low_card_table/row_collapser'
6
+
7
+ module LowCardTables
8
+ module LowCardTable
9
+ # In many ways, the RowManager is the beating heart of +low_card_tables+. It is responsible for finding and
10
+ # creating rows in low-card tables, as well as maintaining the unique index across all columns in the table and
11
+ # dealing with any needs from migrations.
12
+ #
13
+ # Because this class is quite complex, some pieces of functionality have been broken out into other classes.
14
+ # The TableUniqueIndex is responsible for maintaining the unique index across all columns in the table, and
15
+ # the RowCollapser handles the case where rows need to be collapsed (unified) because a column was removed from
16
+ # the low-card table.
17
+ #
18
+ # === Cache Notifications
19
+ #
20
+ # This class uses the ActiveSupport::Notifications interface to notify anyone who's interested of cache-related
21
+ # events. In particular, it fires the following events with the following payloads:
22
+ #
23
+ # [low_card_tables.cache_load] <tt>{ :low_card_model => <ActiveRecord model class> }</tt>; this is fired when
24
+ # the cache is loaded from the database, whether that's the first time after startup
25
+ # or after a cache flush.
26
+ # [low_card_tables.cache_flush] <tt>{ :low_card_model => <ActiveRecord model class>, :reason => <some reason> }</tt>;
27
+ # this is fired when there's a cache that is flushed. Additional payload depends on
28
+ # the +:reason+.
29
+ #
30
+ # Reasons for +low_card_tables.cache_flush+ include:
31
+ #
32
+ # [:manually_requested] You called +low_card_flush_cache!+ on the low-card model.
33
+ # [:id_not_found] You requested a low-card row by ID, and we didn't find that ID in the cache. We assume that the ID
34
+ # is likely valid and that it's simply been created since we retrieved the cache from the database,
35
+ # so we flush the cache and try again. +:ids+ is present in the payload, mapping to an array of
36
+ # one or more IDs -- the ID or IDs that weren't found in the cache.
37
+ # [:collapse_rows_and_update_referrers] The low-card table has been migrated and has had a column removed; we've
38
+ # collapsed any now-duplicate rows properly. As such, we need to flush the
39
+ # cache.
40
+ # [:schema_change] We have detected that the schema of the low-card table has changed, and need to flush the cache.
41
+ # [:creating_rows] We're about to create one or more new rows in the low-card table, because a set of attributes
42
+ # that has never been seen before was asked for. Before we actually go try to create them, we
43
+ # lock the table and flush the cache, so that, in the case where some other process has already
44
+ # created them, we simply pick them up now. Then, after we create them, we flush the cache again
45
+ # to pick up the newly-created rows. +:context+ is present in the payload, mapped to either
46
+ # +:before_import+ or +:after_import+ (corresponding to the two situations above). +:new_rows+ is
47
+ # also present in the payload, mapped to an array of one or more Hashes, each of which represents
48
+ # a unique combination of attributes to be created.
49
+ # [:stale] By far the most common case -- the cache is simply stale based upon the current cache-expiration policy,
50
+ # and needs to be reloaded. The payload will contain +:loaded+, which is the time that the cache was
51
+ # loaded, and +:now+, which is the time at which the cache was checked for validity. (+:now+ will always
52
+ # be very close to, but not after, the current time; any delay is just due to the time it took to
53
+ # receive the notification via ActiveSupport::Notifications.)
54
+ class RowManager
55
+ attr_reader :low_card_model
56
+
57
+ # Creates a new instance for the given low-card model.
58
+ def initialize(low_card_model)
59
+ unless low_card_model.respond_to?(:is_low_card_table?) && low_card_model.is_low_card_table?
60
+ raise ArgumentError, "You must supply a low-card AR model class, not: #{low_card_model.inspect}"
61
+ end
62
+
63
+ @low_card_model = low_card_model
64
+ @table_unique_index = LowCardTables::LowCardTable::TableUniqueIndex.new(low_card_model)
65
+ @referring_models = [ ]
66
+ end
67
+
68
+ attr_reader :referring_models
69
+
70
+ # Tells us that the low-card model we're operating on behalf of is referenced by the given +referring_model_class+.
71
+ # This +referring_model_class+ should be an ActiveRecord class that has declared 'has_low_card_table' on this
72
+ # low-card table.
73
+ #
74
+ # We keep track of this and expose it for a few reasons:
75
+ #
76
+ # * If we need to collapse the rows in this low-card table because a column has been removed, we use this list of
77
+ # referring models to know which columns have a foreign key to this table;
78
+ # * When someone calls #reset_column_information on the low-card table, we re-compute (and re-install) the set of
79
+ # delegated methods from all models that refer to this low-card table.
80
+ def referred_to_by(referring_model_class)
81
+ @referring_models |= [ referring_model_class ]
82
+ end
83
+
84
+ # Tells us that someone called #reset_column_information on the low-card table; we'll inform all referring models
85
+ # of that fact.
86
+ def column_information_reset!
87
+ @referring_models.each { |m| m._low_card_associations_manager.low_card_column_information_reset!(@low_card_model) }
88
+ end
89
+
90
+ # Returns all rows in the low-card table. This behaves semantically identically to simply calling ActiveRecord's
91
+ # #all method on the low-card table itself, but it returns the data from cache.
92
+ def all_rows
93
+ cache.all_rows
94
+ end
95
+
96
+ # Flushes the cache immediately (assuming we have any cached data at all).
97
+ def flush_cache!
98
+ flush!(:manually_requested)
99
+ end
100
+
101
+ # Given a single primary-key ID of a low-card row, returns the row for that ID. Given an Array of one or more
102
+ # primary-key IDs, returns a Hash mapping each of those IDs to the corresponding row. Properly flushes the cache
103
+ # and tries again if given an ID that doesn't exist in cache.
104
+ def rows_for_ids(id_or_ids)
105
+ begin
106
+ cache.rows_for_ids(id_or_ids)
107
+ rescue LowCardTables::Errors::LowCardIdNotFoundError => lcinfe
108
+ flush!(:id_not_found, :ids => lcinfe.ids)
109
+ cache.rows_for_ids(id_or_ids)
110
+ end
111
+ end
112
+
113
+ # A synonym for #rows_for_ids.
114
+ def row_for_id(id)
115
+ rows_for_ids(id)
116
+ end
117
+
118
+ # Given a single Hash specifying zero or more constraints for low-card rows (i.e., mapping zero or more columns
119
+ # of the low-card table to specific values for those columns), returns a (possibly empty) Array of IDs of
120
+ # low-card rows that match those constraints.
121
+ #
122
+ # Given an array of one or more Hashes, each of which specify zero or more constraints for low-card rows, returns
123
+ # a Hash mapping each of those Hashes to a (possibly empty) Array of IDs of low-card rows that match each
124
+ # Hash.
125
+ #
126
+ # Given a block (in which case no hashes may be passed), returns an Array of IDs of low-card rows that match the
127
+ # block. The block is passed an instance of the low-card model class, and the return value of the block (truthy
128
+ # or falsy) determines whether the ID of that row is included in the return value or not.
129
+ def ids_matching(hash_or_hashes = nil, &block)
130
+ do_matching(hash_or_hashes, block, :ids_matching)
131
+ end
132
+
133
+ # Given a single Hash specifying zero or more constraints for low-card rows (i.e., mapping zero or more columns
134
+ # of the low-card table to specific values for those columns), returns a (possibly empty) Array of
135
+ # low-card rows that match those constraints.
136
+ #
137
+ # Given an array of one or more Hashes, each of which specify zero or more constraints for low-card rows, returns
138
+ # a Hash mapping each of those Hashes to a (possibly empty) Array of low-card rows that match each
139
+ # Hash.
140
+ #
141
+ # Given a block (in which case no hashes may be passed), returns an Array of low-card rows that match the
142
+ # block. The block is passed an instance of the low-card model class, and the return value of the block (truthy
143
+ # or falsy) determines whether that row is included in the return value or not.
144
+ def rows_matching(hash_or_hashes = nil, &block)
145
+ do_matching(hash_or_hashes, block, :rows_matching)
146
+ end
147
+
148
+ # Given a single Hash specifying values for every column in the low-card table, returns an instance of the
149
+ # low-card table, already existing in the database, for that combination of values.
150
+ #
151
+ # Given an array of Hashes, each specifying values for every column in the low-card table, returns a Hash
152
+ # mapping each of those Hashes to an instance of the low-card table, already existing in the database, for that
153
+ # combination of values.
154
+ #
155
+ # If you request an instance for a combination of values that doesn't exist in the table, it will simply be
156
+ # mapped to +nil+. Under no circumstances will rows be added to the database.
157
+ def find_rows_for(hash_hashes_object_or_objects)
158
+ do_find_or_create(hash_hashes_object_or_objects, false)
159
+ end
160
+
161
+ # Given a single Hash specifying values for every column in the low-card table, returns an instance of the
162
+ # low-card table for that combination of values. The row in question will be created if it doesn't already
163
+ # exist.
164
+ #
165
+ # Given an array of Hashes, each specifying values for every column in the low-card table, returns a Hash
166
+ # mapping each of those Hashes to an instance of the low-card table for that combination of values. Rows for
167
+ # any missing combinations of values will be created. (Creation is done in bulk, using +activerecord_import+,
168
+ # so this method will be fast no matter how many rows need to be created.)
169
+ def find_or_create_rows_for(hash_hashes_object_or_objects)
170
+ do_find_or_create(hash_hashes_object_or_objects, true)
171
+ end
172
+
173
+ # Behaves identically to #find_rows_for, except that it returns IDs instead of rows.
174
+ def find_ids_for(hash_hashes_object_or_objects)
175
+ row_map_to_id_map(find_rows_for(hash_hashes_object_or_objects))
176
+ end
177
+
178
+ # Behaves identically to #find_or_create_rows_for, except that it returns IDs instead of rows.
179
+ def find_or_create_ids_for(hash_hashes_object_or_objects)
180
+ row_map_to_id_map(find_or_create_rows_for(hash_hashes_object_or_objects))
181
+ end
182
+
183
+ # Returns the set of columns on the low-card table that we should consider "value columns" -- i.e., those that
184
+ # contain data values, rather than metadata, like the primary key, created_at/updated_at, and so on.
185
+ #
186
+ # Columns that are excluded:
187
+ #
188
+ # * The primary key
189
+ # * created_at and updated_at
190
+ # * Any additional columns specified using the +:exclude_column_names+ option when declaring +is_low_card_table+.
191
+ def value_column_names
192
+ value_columns.map(&:name)
193
+ end
194
+
195
+ # Iterates through this table, finding duplicate rows and collapsing them. See RowCollapser for far more
196
+ # information.
197
+ def collapse_rows_and_update_referrers!(low_card_options = { })
198
+ collapser = LowCardTables::LowCardTable::RowCollapser.new(@low_card_model, low_card_options)
199
+ collapse_map = collapser.collapse!
200
+
201
+ flush!(:collapse_rows_and_update_referrers)
202
+ collapse_map
203
+ end
204
+
205
+ # If this table already has the correct unique index across all value columns, does nothing.
206
+ #
207
+ # If this table does not have the correct unique index, and +create_if_needed+ is truthy, then creates the index.
208
+ # If this table does not have the correct unique index, and +create_if_needed+ is falsy, then raises
209
+ # LowCardTables::Errors::LowCardNoUniqueIndexError.
210
+ def ensure_has_unique_index!(create_if_needed = false)
211
+ @table_unique_index.ensure_present!(create_if_needed)
212
+ end
213
+
214
+ # If this table currently has a unique index across all value columns, removes it.
215
+ def remove_unique_index!
216
+ @table_unique_index.remove!
217
+ end
218
+
219
+
220
+ private
221
+ # Given a Hash that maps keys to instances of the low-card class, returns a Hash that is identical in every way
222
+ # except that rows are replaced with their IDs. This is what we use to implement, for example,
223
+ # #find_or_create_ids_for on top of #find_or_create_rows_for, trivially.
224
+ def row_map_to_id_map(m)
225
+ if m.kind_of?(Hash)
226
+ out = { }
227
+ m.each do |k,v|
228
+ if v
229
+ out[k] = v.id
230
+ else
231
+ out[k] = nil
232
+ end
233
+ end
234
+ out
235
+ else
236
+ m.id if m
237
+ end
238
+ end
239
+
240
+ # This is used to implement #rows_matching and #ids_matching on top of the Cache. +hash_or_hashes+ is a single
241
+ # Hash or an array of Hashes, +block+ is a callable block (and you're only allowed to pass +hash_or_hashes+ _or_
242
+ # +block+, not both), and +method_name+ is the name of the method on Cache that we should call to implement this
243
+ # method.
244
+ #
245
+ # Since Cache does most of the work for us, this method is basically responsible for sanitizing input/output, and
246
+ # detecting schema-change issues (as evidenced by LowCardColumnNotPresentError) and retrying, once. (This is so
247
+ # that if code starts using a column name that was not present on the table at the last time the cache was read,
248
+ # but since has been migrated in, we'll detect the change and react correctly.)
249
+ def do_matching(hash_or_hashes, block, method_name)
250
+ result = begin
251
+ hashes = to_array_of_partial_hashes(hash_or_hashes)
252
+ cache.send(method_name, hashes, &block)
253
+ rescue LowCardTables::Errors::LowCardColumnNotPresentError => lccnpe
254
+ flush!(:schema_change)
255
+ hashes = to_array_of_partial_hashes(hash_or_hashes)
256
+ cache.send(method_name, hashes, &block)
257
+ end
258
+
259
+ if hash_or_hashes.kind_of?(Array)
260
+ result
261
+ else
262
+ raise "We passed in #{hash_or_hashes.inspect}, but got back #{result.inspect}?" unless result.kind_of?(Hash) && result.size <= 1
263
+ result.values[0] if result.size > 0
264
+ end
265
+ end
266
+
267
+ # This is used to implement #find_rows_for and #find_or_create_rows_for; the two methods are very similar except
268
+ # in how they handle nonexistent rows, so we use this method to deal with both of them.
269
+ def do_find_or_create(hash_hashes_object_or_objects, do_create)
270
+ # Input manipulation...
271
+ input_to_complete_hash_map = map_input_to_complete_hashes(hash_hashes_object_or_objects)
272
+ complete_hashes = input_to_complete_hash_map.values
273
+
274
+ # Do the actual lookup in the cache.
275
+ existing = rows_matching(complete_hashes)
276
+ not_found = complete_hashes.reject { |h| existing[h].length > 0 }
277
+
278
+ # See if there's something we still don't have and if we need to create it.
279
+ if not_found.length > 0 && do_create
280
+ # We actually pass in _all_ the rows we want here, rather than just the ones that aren't found yet. Why?
281
+ # Under the covers, #flush_lock_and_create_rows_for! is at the heart of our transactional core -- it locks
282
+ # the table and checks again for which rows are present. We want to give it all of the data required, so that,
283
+ # once it acquires the exclusive table-level lock, it knows exactly which data it needs to ensure is present
284
+ # in the table.
285
+ #
286
+ # Passing data acquired outside a transaction into a transaction that's supposed to act on it is just asking
287
+ # for trouble.
288
+ existing = flush_lock_and_create_rows_for!(complete_hashes)
289
+ end
290
+
291
+ # Output manipulation and validation.
292
+ out = { }
293
+ input_to_complete_hash_map.each do |input, complete_hash|
294
+ values = existing[complete_hash]
295
+
296
+ if values.length == 0 && do_create
297
+ raise %{Whoa: we asked for a row for this hash: #{key.inspect};
298
+ since this has been asserted to be a complete key, we should only ever get back a single row,
299
+ and we should always get back one row since we will have created the row if necessary,
300
+ but we got back these rows:
301
+
302
+ #{values.inspect}}
303
+ end
304
+
305
+ out[input] = values[0]
306
+ end
307
+
308
+ if hash_hashes_object_or_objects.kind_of?(Array)
309
+ out
310
+ else
311
+ out[out.keys.first]
312
+ end
313
+ end
314
+
315
+ # Returns all of the ::ActiveRecord::ConnectionAdapters::Column objects for all of the value columns in this
316
+ # table.
317
+ #
318
+ # If this table doesn't exist yet, we return an empty set. This is important: this allows your Rails app to still
319
+ # pass through the boot phase if you have a model for a low-card model whose underlying database table hasn't
320
+ # actually been created yet. (If we didn't do this, then the traditional pattern of adding a migration and a model
321
+ # in the same commit would fail -- other developers would get the model but not have the table, and Rails wouldn't
322
+ # even be able to boot to migrate the table in.)
323
+ def value_columns
324
+ return [ ] unless @low_card_model.table_exists?
325
+
326
+ @low_card_model.columns.select do |column|
327
+ column_name = column.name.to_s.strip.downcase
328
+
329
+ use = true
330
+ use = false if column.primary
331
+ use = false if column_names_to_skip.include?(column_name)
332
+ use
333
+ end
334
+ end
335
+
336
+ # This simply raises an exception when we can't create new rows in the low-card table for some reason. We want
337
+ # to get a very nice, detailed message in return, so we have a method that composes something telling us exactly
338
+ # what happened.
339
+ #
340
+ # +exception+ is the exception we got upon creation, if any. +keys+ is the set of keys (names of columns) we
341
+ # passed to the #import call, and +failed_instances+ is the set of instances that +activerecord_import+ reported
342
+ # as failing.
343
+ #
344
+ # This method eventually raises a LowCardInvalidLowCardRowsError.
345
+ def could_not_create_new_rows!(exception, keys, failed_instances)
346
+ message = %{The low_card_tables gem was trying to create one or more new rows in
347
+ the low-card table '#{@low_card_model.table_name}', but, when we went to create those rows...
348
+
349
+ }
350
+
351
+
352
+ if exception
353
+ message << %{- The database refused to create them. This is usually because one or more of these rows
354
+ violates a database constraint -- like a NOT NULL or CHECK constraint.
355
+
356
+ The exception we got was:
357
+
358
+ (#{exception.class.name}) #{exception.message}
359
+ #{exception.backtrace.join("\n ")}}
360
+ elsif failed_instances
361
+ message << "- They failed validation."
362
+ end
363
+
364
+ if failed_instances.length > 0
365
+ message << %{Here's what we tried to import:
366
+
367
+ Keys: #{keys.inspect}
368
+ Values:
369
+
370
+ }
371
+
372
+ failed_instances.each do |failed_instance|
373
+ line = " #{failed_instance.inspect}"
374
+
375
+ if failed_instance.respond_to?(:errors)
376
+ line << " ERRORS: #{failed_instance.errors.full_messages.join("; ")}"
377
+ end
378
+
379
+ message << "#{line}\n"
380
+ end
381
+ end
382
+
383
+ raise LowCardTables::Errors::LowCardInvalidLowCardRowsError, message
384
+ end
385
+
386
+ # This method is called when someone has called #find_or_create_rows_for or #find_or_create_ids_for, and we've
387
+ # discovered that we do, in fact, need to create one or more rows in the database.
388
+ #
389
+ # Because we need to be careful of race conditions -- many other processes may be running the exact same code at
390
+ # the exact same time -- we do the following:
391
+ #
392
+ # * First, obtain an exclusive table lock for the database we're using. Exactly how this works is database-
393
+ # dependent and not something ActiveRecord knows how to handle for us.
394
+ # * Flush the cache and re-check if we still need to create rows. We do this because it's possible some other
395
+ # process created the rows in-between the time we checked and the time we locked the table. But, now, if we
396
+ # still are missing rows, we know we're the only process who can create them, since we have the exclusive
397
+ # table lock.
398
+ # * Create the rows in the database, raising a detailed exception if the database raises an exception or if
399
+ # +activerecord_import+ reports any failed instances.
400
+ # * Fire an ActiveSupport::Notifications event telling everybody that we just created rows.
401
+ # * Flush the cache again, since we just created rows in the database and so the cache is guaranteed to be
402
+ # out-of-date.
403
+ # * Return the rows that now should be present and match the input.
404
+ def flush_lock_and_create_rows_for!(input)
405
+ with_locked_table do
406
+ flush!(:creating_rows, :context => :before_import, :new_rows => input)
407
+
408
+ # because it's possible there was a schema modification that we just now picked up
409
+ input_to_hashes_map = map_input_to_complete_hashes(input)
410
+ hashes = input_to_hashes_map.values
411
+
412
+ existing = rows_matching(hashes)
413
+ still_not_found = hashes.reject { |h| existing[h].length > 0 }
414
+
415
+ if still_not_found.length > 0
416
+ keys = value_column_names
417
+ values = still_not_found.map do |hash|
418
+ keys.map { |k| hash[k] }
419
+ end
420
+
421
+ begin
422
+ import_result = @low_card_model.import(keys, values, :validate => true)
423
+ could_not_create_new_rows!(nil, keys, import_result.failed_instances) if import_result.failed_instances.length > 0
424
+ rescue ::ActiveRecord::StatementInvalid => si
425
+ could_not_create_new_rows!(si, keys, values)
426
+ end
427
+
428
+ instrument('rows_created', :keys => keys, :values => values)
429
+ end
430
+
431
+ flush!(:creating_rows, :context => :after_import, :new_rows => hashes)
432
+
433
+ existing = rows_matching(hashes)
434
+ still_not_found = hashes.reject { |h| existing[h].length > 0 }
435
+
436
+ if still_not_found.length > 0
437
+ raise %{You asked for low-card IDs for one or more hashes specifying rows that didn't exist,
438
+ but, when we tried to create them, even after an import that appeared to succeed, we couldn't
439
+ find the models that should've now existed. This should never happen, and may be indicative
440
+ of a bug in the low-card tables system. Here's what we tried to create, but then couldn't find:
441
+
442
+ #{still_not_found.join("\n")}}
443
+ end
444
+
445
+ existing
446
+ end
447
+ end
448
+
449
+ # Locks the table for this @low_card_model, using whatever database-specific code is required. This also surrounds
450
+ # the block passed with a transaction on the table in question.
451
+ def with_locked_table(&block)
452
+ @low_card_model.transaction do
453
+ with_database_exclusive_table_lock do
454
+ block.call
455
+ end
456
+ end
457
+ end
458
+
459
+ # Obtains an exclusive lock on the table for this low-card model. This is much, much stronger than the built-in
460
+ # ActiveRecord #lock! or #with_lock support (see ActiveRecord::Locking::Pessimistic); this should always lock
461
+ # the entire table against reading and writing.
462
+ #
463
+ # We could've handled this by injecting methods into the ActiveRecord connection adapters for each specific
464
+ # database type, but that's actually quite a bit more tricky metaprogramming (what if the adapters haven't been
465
+ # loaded yet when our Gem starts up, but will get loaded later? -- and we definitely don't want to make this
466
+ # Gem depend on the union of all supported database adapters!) than doing it this way.
467
+ #
468
+ # In other words, this may be a bit of a gross hack (groping the class name of the adapter in question), but it's
469
+ # arguably a lot more reliable and easier to understand than the other way of doing this.
470
+ def with_database_exclusive_table_lock(&block)
471
+ case @low_card_model.connection.class.name
472
+ when /postgresql/i then with_database_exclusive_table_lock_postgresql(&block)
473
+ when /mysql/i then with_database_exclusive_table_lock_mysql(&block)
474
+ when /sqlite/i then with_database_exclusive_table_lock_sqlite(&block)
475
+ else
476
+ raise LowCardTables::Errors::LowCardUnsupportedDatabaseError, %{You asked for low-card IDs for one or more hashes specifying rows that didn't exist,
477
+ but, when we went to create them, we discovered that we don't know how to exclusively
478
+ lock tables in your database. (This is very important so that we don't accidentally
479
+ create duplicate rows.)
480
+
481
+ Your database adapter's class name is '#{@low_card_model.connection.class.name}'; please submit at least
482
+ a bug report, or, even better, a patch. :) Adding support is quite easy, as long as you know the
483
+ equivalent of 'LOCK TABLE'(s) in your database.}
484
+ end
485
+ end
486
+
487
+ # Obtains an exclusive table lock for a PostgreSQL database. PostgreSQL releases all table locks at the end of
488
+ # the current transaction, so we just need to lock the table -- unlocking happens automatically when we release
489
+ # our transaction, above.
490
+ def with_database_exclusive_table_lock_postgresql(&block)
491
+ # If we just use the regular :sanitize_sql support, we get:
492
+ # LOCK TABLE 'foo'
493
+ # ...which, for whatever reason, PostgreSQL doesn't like. Escaping it this way works fine.
494
+ escaped = @low_card_model.connection.quote_table_name(@low_card_model.table_name)
495
+ run_sql("LOCK TABLE #{escaped}", { })
496
+ block.call
497
+ end
498
+
499
+ # Obtains an exclusive table lock for a SQLite database. There is no locking possible or needed, since SQLite is
500
+ # a single-user database.
501
+ def with_database_exclusive_table_lock_sqlite(&block)
502
+ block.call
503
+ end
504
+
505
+ # Obtains an exclusive table lock for a MySQL database. We need to make sure we unlock the table once the block
506
+ # is complete.
507
+ def with_database_exclusive_table_lock_mysql(&block)
508
+ begin
509
+ escaped = @low_card_model.connection.quote_table_name(@low_card_model.table_name)
510
+ run_sql("LOCK TABLES #{escaped} WRITE", { })
511
+ block.call
512
+ ensure
513
+ begin
514
+ run_sql("UNLOCK TABLES", { })
515
+ rescue ::ActiveRecord::StatementInvalid => si
516
+ # we tried our best!
517
+ end
518
+ end
519
+ end
520
+
521
+ # Runs a SQL statement, specified as a string with substitution parameters.
522
+ def run_sql(statement, params)
523
+ @low_card_model.connection.execute(@low_card_model.send(:sanitize_sql, [ statement, params ]))
524
+ end
525
+
526
+ # Names of columns in low-card tables that we should always skip, no matter what.
527
+ COLUMN_NAMES_TO_ALWAYS_SKIP = %w{created_at updated_at}
528
+
529
+ # Returns the names of all columns in this table that we should skip when determining what to treat as a value
530
+ # column for this table (as opposed to things like the primary key, created_at, updated_at, and so on, which are
531
+ # metadata and shouldn't play a direct role in the low-card system).
532
+ def column_names_to_skip
533
+ @column_names_to_skip ||= begin
534
+ COLUMN_NAMES_TO_ALWAYS_SKIP +
535
+ Array(@low_card_model.low_card_options[:exclude_column_names] || [ ]).map { |n| n.to_s.strip.downcase }
536
+ end
537
+ end
538
+
539
+ # Given something that can be a single Hash, an array of Hashes, a single instance of the @low_card_model class,
540
+ # or an array of instances of the @low_card_model class, returns a new Hash.
541
+ #
542
+ # This new Hash has, as keys, each of the inputs to this method, and, as values, a Hash for that input that is
543
+ # a complete, normalized Hash representing that input.
544
+ #
545
+ # This method will also raise an exception if any of the inputs do not include all of the necessary keys for the
546
+ # low-card table -- thus, this method can only be used for methods like #find_rows_for or #find_or_create_ids_for,
547
+ # where the input must each specify exactly one low-card row, rather than methods like
548
+ # #rows_matching/#ids_matching, where each input may match multiple low-card rows.
549
+ def map_input_to_complete_hashes(hash_hashes_object_or_objects)
550
+ # We can't use Array(), because that will turn a single Hash into an Array, and we definitely don't want
551
+ # to do that here! I kind of hate that behavior of Array()...
552
+ as_array = if hash_hashes_object_or_objects.kind_of?(Array) then hash_hashes_object_or_objects else [ hash_hashes_object_or_objects ] end
553
+
554
+ out = { }
555
+ as_array.uniq.each do |hash_or_object|
556
+ hash = nil
557
+
558
+ if hash_or_object.kind_of?(Hash)
559
+ # Allow us to use Strings or Symbols as indexes into the Hash
560
+ hash = hash_or_object.with_indifferent_access
561
+ elsif hash_or_object.kind_of?(@low_card_model)
562
+ hash = hash_or_object.attributes.dup.with_indifferent_access
563
+ hash.delete(@low_card_model.primary_key)
564
+ else
565
+ raise "Invalid input to this method -- this must be a Hash, or an instance of #{@low_card_model}: #{hash_or_object.inspect}"
566
+ end
567
+
568
+ hash = ensure_complete_key(hash)
569
+ out[hash_or_object] = hash
570
+ end
571
+
572
+ out
573
+ end
574
+
575
+ # Given a single Hash that should contain values for all value columns in the low-card table -- no less, no more --
576
+ # validates that the Hash contains no extra columns and no missing columns, and returns it. This method will allow
577
+ # you to skip any columns in the input that have defaults in the database, and will correctly fill in those defaults
578
+ # in the returned Hash.
579
+ #
580
+ # Because this requires all columns to be present in the input, it can only be used for methods like
581
+ # #find_rows_for or #find_or_create_ids_for that require fully-specified input hashes.
582
+ def ensure_complete_key(hash)
583
+ keys_as_strings = hash.keys.map(&:to_s)
584
+ missing = value_column_names - keys_as_strings
585
+ extra = keys_as_strings - value_column_names
586
+
587
+ missing = missing.select do |missing_column_name|
588
+ column = @low_card_model.columns.detect { |c| c.name.to_s.strip.downcase == missing_column_name.to_s.strip.downcase }
589
+ if column && column.default
590
+ hash[column.name] = column.default
591
+ false
592
+ else
593
+ true
594
+ end
595
+ end
596
+
597
+ if missing.length > 0
598
+ raise LowCardTables::Errors::LowCardColumnNotSpecifiedError, "The following is not a complete specification of all columns in low-card table '#{@low_card_model.table_name}'; it is missing these columns: #{missing.join(", ")}: #{hash.inspect}"
599
+ end
600
+
601
+ if extra.length > 0
602
+ raise LowCardTables::Errors::LowCardColumnNotPresentError, "The following specifies extra columns that are not present in low-card table '#{@low_card_model.table_name}'; these columns are not present in the underlying model: #{extra.join(", ")}: #{hash.inspect}"
603
+ end
604
+
605
+ hash
606
+ end
607
+
608
+ # Given something that may be a single Hash or an array of Hashes, returns an array of Hashes, and makes sure that
609
+ # the input (or each element of the input) is a valid partial key into the low-card table. A partial key is a Hash
610
+ # that specifies zero or more value columns from the low-card table -- but you're not allowed to specify anything
611
+ # in the Hash that isn't a value column in the low-card table.
612
+ def to_array_of_partial_hashes(array)
613
+ array = if array.kind_of?(Array) then array else [ array ] end
614
+ array.each { |h| assert_partial_key!(h) }
615
+ array
616
+ end
617
+
618
+ # Given a Hash, raises an error if that Hash is not a valid partial key into the low-card table -- i.e., if it
619
+ # contains keys that are not valid value columns in the low-card table.
620
+ def assert_partial_key!(hash)
621
+ keys_as_strings = hash.keys.map(&:to_s)
622
+ extra = keys_as_strings - value_column_names
623
+
624
+ if extra.length > 0
625
+ raise LowCardTables::Errors::LowCardColumnNotPresentError, "The following specifies extra columns that are not present in low-card table '#{@low_card_model.table_name}'; these columns are not present in the underlying model: #{extra.join(", ")}: #{hash.inspect}"
626
+ end
627
+ end
628
+
629
+ # Fetches the cache we should use. This takes care of creating a cache if one is not present; it also takes care
630
+ # of flushing the cache and creating a new one if the current cache is stale.
631
+ def cache
632
+ the_current_time = current_time
633
+ cache_loaded_at = @cache.loaded_at if @cache
634
+
635
+ if @cache && cache_expiration_policy_object.stale?(cache_loaded_at, the_current_time)
636
+ flush!(:stale, :loaded => cache_loaded_at, :now => the_current_time)
637
+ end
638
+
639
+ unless @cache
640
+ instrument('cache_load') do
641
+ @cache = LowCardTables::LowCardTable::Cache.new(@low_card_model, @low_card_model.low_card_options)
642
+ end
643
+ end
644
+
645
+ @cache
646
+ end
647
+
648
+ # Flushes the cache, for the reason given, and fires the appropriate ActiveSupport::Notification instrumentation.
649
+ # +reason+ is the reason given in the notification, and +notification_options+ are added to the payload for the
650
+ # notification.
651
+ #
652
+ # Whenever we flush the cache, we also ask ActiveRecord to purge its idea of what columns are on the table. This
653
+ # ensures that we'll stay in sync with any underlying schema changes, and hence adapt to an evolving schema on
654
+ # the fly, as best we can.
655
+ def flush!(reason, notification_options = { })
656
+ if @cache
657
+ instrument('cache_flush', notification_options.merge(:reason => reason)) do
658
+ @cache = nil
659
+ end
660
+ end
661
+
662
+ @low_card_model.reset_column_information
663
+ end
664
+
665
+ # A thin wrapper around ActiveSupport::Notifications.
666
+ def instrument(event, options = { }, &block)
667
+ ::ActiveSupport::Notifications.instrument("low_card_tables.#{event}", options.merge(:low_card_model => @low_card_model), &block)
668
+ end
669
+
670
+ # Returns the correct cache-expiration policy object to use for the table in question.
671
+ def cache_expiration_policy_object
672
+ @low_card_model.low_card_cache_expiration_policy_object || LowCardTables.low_card_cache_expiration_policy_object
673
+ end
674
+
675
+ # Returns the current time. Broken out into a separate method so that we can easily override it in tests.
676
+ def current_time
677
+ Time.now
678
+ end
679
+ end
680
+ end
681
+ end