low_card_tables 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +18 -0
  3. data/.travis.yml +59 -0
  4. data/Gemfile +17 -0
  5. data/LICENSE +21 -0
  6. data/README.md +75 -0
  7. data/Rakefile +6 -0
  8. data/lib/low_card_tables.rb +72 -0
  9. data/lib/low_card_tables/active_record/base.rb +55 -0
  10. data/lib/low_card_tables/active_record/migrations.rb +223 -0
  11. data/lib/low_card_tables/active_record/relation.rb +35 -0
  12. data/lib/low_card_tables/active_record/scoping.rb +87 -0
  13. data/lib/low_card_tables/errors.rb +74 -0
  14. data/lib/low_card_tables/has_low_card_table/base.rb +114 -0
  15. data/lib/low_card_tables/has_low_card_table/low_card_association.rb +273 -0
  16. data/lib/low_card_tables/has_low_card_table/low_card_associations_manager.rb +143 -0
  17. data/lib/low_card_tables/has_low_card_table/low_card_dynamic_method_manager.rb +224 -0
  18. data/lib/low_card_tables/has_low_card_table/low_card_objects_manager.rb +80 -0
  19. data/lib/low_card_tables/low_card_table/base.rb +184 -0
  20. data/lib/low_card_tables/low_card_table/cache.rb +214 -0
  21. data/lib/low_card_tables/low_card_table/cache_expiration/exponential_cache_expiration_policy.rb +151 -0
  22. data/lib/low_card_tables/low_card_table/cache_expiration/fixed_cache_expiration_policy.rb +23 -0
  23. data/lib/low_card_tables/low_card_table/cache_expiration/has_cache_expiration.rb +100 -0
  24. data/lib/low_card_tables/low_card_table/cache_expiration/no_caching_expiration_policy.rb +13 -0
  25. data/lib/low_card_tables/low_card_table/cache_expiration/unlimited_cache_expiration_policy.rb +13 -0
  26. data/lib/low_card_tables/low_card_table/row_collapser.rb +175 -0
  27. data/lib/low_card_tables/low_card_table/row_manager.rb +681 -0
  28. data/lib/low_card_tables/low_card_table/table_unique_index.rb +134 -0
  29. data/lib/low_card_tables/version.rb +4 -0
  30. data/lib/low_card_tables/version_support.rb +52 -0
  31. data/low_card_tables.gemspec +69 -0
  32. data/spec/low_card_tables/helpers/database_helper.rb +148 -0
  33. data/spec/low_card_tables/helpers/query_spy_helper.rb +47 -0
  34. data/spec/low_card_tables/helpers/system_helpers.rb +63 -0
  35. data/spec/low_card_tables/system/basic_system_spec.rb +254 -0
  36. data/spec/low_card_tables/system/bulk_system_spec.rb +334 -0
  37. data/spec/low_card_tables/system/caching_system_spec.rb +531 -0
  38. data/spec/low_card_tables/system/migrations_system_spec.rb +747 -0
  39. data/spec/low_card_tables/system/options_system_spec.rb +581 -0
  40. data/spec/low_card_tables/system/queries_system_spec.rb +142 -0
  41. data/spec/low_card_tables/system/validations_system_spec.rb +88 -0
  42. data/spec/low_card_tables/unit/active_record/base_spec.rb +53 -0
  43. data/spec/low_card_tables/unit/active_record/migrations_spec.rb +207 -0
  44. data/spec/low_card_tables/unit/active_record/relation_spec.rb +47 -0
  45. data/spec/low_card_tables/unit/active_record/scoping_spec.rb +101 -0
  46. data/spec/low_card_tables/unit/has_low_card_table/base_spec.rb +79 -0
  47. data/spec/low_card_tables/unit/has_low_card_table/low_card_association_spec.rb +287 -0
  48. data/spec/low_card_tables/unit/has_low_card_table/low_card_associations_manager_spec.rb +190 -0
  49. data/spec/low_card_tables/unit/has_low_card_table/low_card_dynamic_method_manager_spec.rb +234 -0
  50. data/spec/low_card_tables/unit/has_low_card_table/low_card_objects_manager_spec.rb +70 -0
  51. data/spec/low_card_tables/unit/low_card_table/base_spec.rb +207 -0
  52. data/spec/low_card_tables/unit/low_card_table/cache_expiration/exponential_cache_expiration_policy_spec.rb +128 -0
  53. data/spec/low_card_tables/unit/low_card_table/cache_expiration/fixed_cache_expiration_policy_spec.rb +25 -0
  54. data/spec/low_card_tables/unit/low_card_table/cache_expiration/has_cache_expiration_policy_spec.rb +100 -0
  55. data/spec/low_card_tables/unit/low_card_table/cache_expiration/no_caching_expiration_policy_spec.rb +14 -0
  56. data/spec/low_card_tables/unit/low_card_table/cache_expiration/unlimited_cache_expiration_policy_spec.rb +14 -0
  57. data/spec/low_card_tables/unit/low_card_table/cache_spec.rb +282 -0
  58. data/spec/low_card_tables/unit/low_card_table/row_collapser_spec.rb +109 -0
  59. data/spec/low_card_tables/unit/low_card_table/row_manager_spec.rb +918 -0
  60. data/spec/low_card_tables/unit/low_card_table/table_unique_index_spec.rb +117 -0
  61. metadata +206 -0
@@ -0,0 +1,13 @@
1
+ module LowCardTables
2
+ module LowCardTable
3
+ module CacheExpiration
4
+ # This is a very simple cache-expiration policy that disables caching entirely -- it makes the cache always
5
+ # stale, which means we will reload it from the database every single time.
6
+ class NoCachingExpirationPolicy
7
+ def stale?(cache_time, current_time)
8
+ true
9
+ end
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,13 @@
1
+ module LowCardTables
2
+ module LowCardTable
3
+ module CacheExpiration
4
+ # This is a very simple cache-expiration policy that makes the cache last forever -- it will never be reloaded
5
+ # from disk, unless you explicitly flush it.
6
+ class UnlimitedCacheExpirationPolicy
7
+ def stale?(cache_time, current_time)
8
+ false
9
+ end
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,175 @@
1
+ module LowCardTables
2
+ module LowCardTable
3
+ # The RowCollapser is an object that exists solely to contain the code required to collapse rows when someone
4
+ # removes a column from a low-card table in a migration. It's not a particularly well-defined object and resulted
5
+ # from an extraction from RowManager; however, it's still nicer to have this code in a separate object rather than
6
+ # making the RowManager even bigger than it already is.
7
+ #
8
+ # What are we trying to accomplish here? Well, imagine you have this:
9
+ #
10
+ # user_statuses
11
+ # id deleted donation_level gender
12
+ # 1 false 3 female
13
+ # 2 false 5 female
14
+ # 3 false 7 female
15
+ # 4 false 3 male
16
+ # 5 false 5 male
17
+ # 6 false 7 male
18
+ #
19
+ # ...and now imagine we decide to remove the +deceased+ column. If we do nothing, we'll end up with this:
20
+ #
21
+ # user_statuses
22
+ # id deleted gender
23
+ # 1 false female
24
+ # 2 false female
25
+ # 3 false female
26
+ # 4 false male
27
+ # 5 false male
28
+ # 6 false male
29
+ #
30
+ # ...but this violates the principle of low-card tables that they have only one row for each unique combination of
31
+ # values. What we need to do is reduce it to this...
32
+ #
33
+ # user_statuses
34
+ # id deleted gender
35
+ # 1 false female
36
+ # 4 false male
37
+ #
38
+ # ...and then update all columns in all tables that have a +user_status_id+ like so:
39
+ #
40
+ # UPDATE users SET user_status_id = 1 WHERE user_status_id IN (2, 3)
41
+ # UPDATE users SET user_status_id = 4 WHERE user_status_id IN (5, 6)
42
+ #
43
+ # That's the job of this class. LowCardTables::HasLowCardTable::LowCardAssociation is responsible for updating the
44
+ # referring tables themselves; however, this class is responsible for the fundamental operation.
45
+ #
46
+ # In this class, we often refer to the "collapse map"; in the above example, this would be:
47
+ #
48
+ # #<UserStatus id: 1> => [ #<UserStatus id: 2>, #<UserStatus id: 3> ]
49
+ # #<UserStatus id: 4> => [ #<UserStatus id: 5>, #<UserStatus id: 6> ]
50
+ #
51
+ # The keys are the rows of the table that have been collapsed _to_; the values are arrays of rows that have been
52
+ # collapsed _from_.
53
+ class RowCollapser
54
+ # Creates a new instance. +low_card_model+ is the ActiveRecord model class of the low-card table itself;
55
+ # +low_card_options+ is the set of options passed to whatever migration method (e.g., +remove_column+) was
56
+ # invoked to cause the need for a collapse. Options that we pay attention to are:
57
+ #
58
+ # [:low_card_collapse_rows] If present but +false+ or +nil+, then no row collapsing will happen due to the
59
+ # migration command; you'll be left with an invalid low-card table with no unique
60
+ # index, and will need to fix this problem yourself before you can use the table.
61
+ # [:low_card_referrers] Adds one or more models as "referring models" that will have any references to this
62
+ # model updated when the collapsing is done. Generally speaking, it should not be necessary
63
+ # to do this -- this code is aggressive about eagerly loading all models, and ensuring that
64
+ # any that refer to this table are used. But this is available in case you need it.
65
+ # [:low_card_update_referring_models] If present but +false+ or +nil+, then row collapsing will occur as normal,
66
+ # but no referring columns will be updated. You'll thus have dangling foreign
67
+ # keys in any referring models; you'll have to update them yourself.
68
+ def initialize(low_card_model, low_card_options)
69
+ unless low_card_model.respond_to?(:is_low_card_table?) && low_card_model.is_low_card_table?
70
+ raise ArgumentError, "You must supply a low-card AR model class, not: #{low_card_model.inspect}"
71
+ end
72
+
73
+ @low_card_model = low_card_model
74
+ @low_card_options = low_card_options
75
+ end
76
+
77
+ # This should be called after any migration operation on the table that may have caused it to now have
78
+ # duplicate rows. This method looks at the table, detects duplicate rows, picks out winners (and the
79
+ # corresponding losers), and updates rows and referring rows, contingent upon the +low_card_options+ passed
80
+ # in the constructor.
81
+ #
82
+ # Notably, you don't need to tell this method _what_ you did to the table; it simply looks at the current state
83
+ # of the table and deals with duplicate rows. It also means this method is perfectly safe to call on a table that
84
+ # has had no changes, or a table that has had migrations performed on it that don't result in duplicate rows;
85
+ # it will simply see that there are no duplicate rows in the table, and do nothing.
86
+ #
87
+ # This method returns the "collapse map"; see the comment on this class overall for more information. This allows
88
+ # you to do anything you want with the calculated collapse. Normally, you don't _have_ to do anything with it and
89
+ # can ignore it, but it can also be useful if you pass <tt>:low_card_update_referring_models => false</tt> in
90
+ # the +low_card_options+.
91
+ def collapse!
92
+ # :low_card_collapse_rows tells this method to do nothing at all.
93
+ return if low_card_options.has_key?(:low_card_collapse_rows) && (! low_card_options[:low_card_collapse_rows])
94
+
95
+ additional_referring_models = low_card_options[:low_card_referrers]
96
+
97
+ # First, we build a map. The keys are Hashes representing each unique combination of attributes found for
98
+ # the table; the value is an Array of all rows (model objects) for that key. (In a normal state, each value
99
+ # would have exactly one element in the array; however, because we may just have migrated the table into a
100
+ # state where we need to collapse the rows, this may not be true at the moment.)
101
+ attributes_to_rows_map = { }
102
+ low_card_model.all.sort_by(&:id).each do |row|
103
+ attributes = value_attributes(row)
104
+
105
+ attributes_to_rows_map[attributes] ||= [ ]
106
+ attributes_to_rows_map[attributes] << row
107
+ end
108
+
109
+ return { } if (! attributes_to_rows_map.values.detect { |a| a.length > 1 })
110
+
111
+ # Now we build the collapse_map, which is very similar to the attributes_to_rows_map, above. We pick the first
112
+ # of the values to be the winner in each case, which, because we've sorted the rows by ID, should be the
113
+ # duplicate row with the lowest ID -- this is as reasonable a way to pick winners as any.
114
+ collapse_map = { }
115
+ attributes_to_rows_map.each do |attributes, rows|
116
+ if rows.length > 1
117
+ winner = rows.shift
118
+ losers = rows
119
+
120
+ collapse_map[winner] = losers
121
+ end
122
+ end
123
+
124
+ # Figure out which rows we need to delete; this is just all the losers.
125
+ ids_to_delete = collapse_map.values.map { |row_array| row_array.map(&:id) }.flatten.sort
126
+ low_card_model.delete_all([ "id IN (:ids)", { :ids => ids_to_delete } ])
127
+
128
+ # Figure out what referring models we need to update.
129
+ all_referring_models = low_card_model.low_card_referring_models | (additional_referring_models || [ ])
130
+
131
+ # Run transactions on all of these, plus the low-card model as well.
132
+ #
133
+ # Why do we do this? Isn't just one transaction enough? Well, in default Rails configuration, yes, because all
134
+ # models live on the same database. However, it's so common to use gems (for example, +db_charmer_) that allow
135
+ # different models to live on different databases that we make sure to run transactions on all of them;
136
+ # running nested transactions on the same database is harmless.
137
+ transaction_models = all_referring_models + [ low_card_model ]
138
+
139
+ unless low_card_options.has_key?(:low_card_update_referring_models) && (! low_card_options[:low_card_update_referring_models])
140
+ transactions_on(transaction_models) do
141
+ all_referring_models.each do |referring_model|
142
+ referring_model._low_card_update_collapsed_rows(low_card_model, collapse_map)
143
+ end
144
+ end
145
+ end
146
+
147
+ # Return the collapse_map.
148
+ collapse_map
149
+ end
150
+
151
+ private
152
+ attr_reader :low_card_options, :low_card_model
153
+
154
+ # Given a model object, extracts a Hash that maps each of the value-column names to the value this model object
155
+ # has for that value column.
156
+ def value_attributes(row)
157
+ attributes = row.attributes
158
+ out = { }
159
+ low_card_model.low_card_value_column_names.each { |n| out[n] = attributes[n] }
160
+ out
161
+ end
162
+
163
+ # Runs transactions on all of the specified models. Because of ActiveRecord's semantics for transactions (which
164
+ # for almost all other use cases are excellent), this has to be a recursive call.
165
+ def transactions_on(transaction_models, &block)
166
+ if transaction_models.length == 0
167
+ block.call
168
+ else
169
+ model = transaction_models.shift
170
+ model.transaction { transactions_on(transaction_models, &block) }
171
+ end
172
+ end
173
+ end
174
+ end
175
+ end
@@ -0,0 +1,681 @@
1
+ require 'active_support'
2
+ require 'activerecord-import'
3
+ require 'low_card_tables/low_card_table/cache'
4
+ require 'low_card_tables/low_card_table/table_unique_index'
5
+ require 'low_card_tables/low_card_table/row_collapser'
6
+
7
+ module LowCardTables
8
+ module LowCardTable
9
+ # In many ways, the RowManager is the beating heart of +low_card_tables+. It is responsible for finding and
10
+ # creating rows in low-card tables, as well as maintaining the unique index across all columns in the table and
11
+ # dealing with any needs from migrations.
12
+ #
13
+ # Because this class is quite complex, some pieces of functionality have been broken out into other classes.
14
+ # The TableUniqueIndex is responsible for maintaining the unique index across all columns in the table, and
15
+ # the RowCollapser handles the case where rows need to be collapsed (unified) because a column was removed from
16
+ # the low-card table.
17
+ #
18
+ # === Cache Notifications
19
+ #
20
+ # This class uses the ActiveSupport::Notifications interface to notify anyone who's interested of cache-related
21
+ # events. In particular, it fires the following events with the following payloads:
22
+ #
23
+ # [low_card_tables.cache_load] <tt>{ :low_card_model => <ActiveRecord model class> }</tt>; this is fired when
24
+ # the cache is loaded from the database, whether that's the first time after startup
25
+ # or after a cache flush.
26
+ # [low_card_tables.cache_flush] <tt>{ :low_card_model => <ActiveRecord model class>, :reason => <some reason> }</tt>;
27
+ # this is fired when there's a cache that is flushed. Additional payload depends on
28
+ # the +:reason+.
29
+ #
30
+ # Reasons for +low_card_tables.cache_flush+ include:
31
+ #
32
+ # [:manually_requested] You called +low_card_flush_cache!+ on the low-card model.
33
+ # [:id_not_found] You requested a low-card row by ID, and we didn't find that ID in the cache. We assume that the ID
34
+ # is likely valid and that it's simply been created since we retrieved the cache from the database,
35
+ # so we flush the cache and try again. +:ids+ is present in the payload, mapping to an array of
36
+ # one or more IDs -- the ID or IDs that weren't found in the cache.
37
+ # [:collapse_rows_and_update_referrers] The low-card table has been migrated and has had a column removed; we've
38
+ # collapsed any now-duplicate rows properly. As such, we need to flush the
39
+ # cache.
40
+ # [:schema_change] We have detected that the schema of the low-card table has changed, and need to flush the cache.
41
+ # [:creating_rows] We're about to create one or more new rows in the low-card table, because a set of attributes
42
+ # that has never been seen before was asked for. Before we actually go try to create them, we
43
+ # lock the table and flush the cache, so that, in the case where some other process has already
44
+ # created them, we simply pick them up now. Then, after we create them, we flush the cache again
45
+ # to pick up the newly-created rows. +:context+ is present in the payload, mapped to either
46
+ # +:before_import+ or +:after_import+ (corresponding to the two situations above). +:new_rows+ is
47
+ # also present in the payload, mapped to an array of one or more Hashes, each of which represents
48
+ # a unique combination of attributes to be created.
49
+ # [:stale] By far the most common case -- the cache is simply stale based upon the current cache-expiration policy,
50
+ # and needs to be reloaded. The payload will contain +:loaded+, which is the time that the cache was
51
+ # loaded, and +:now+, which is the time at which the cache was checked for validity. (+:now+ will always
52
+ # be very close to, but not after, the current time; any delay is just due to the time it took to
53
+ # receive the notification via ActiveSupport::Notifications.)
54
+ class RowManager
55
+ attr_reader :low_card_model
56
+
57
+ # Creates a new instance for the given low-card model.
58
+ def initialize(low_card_model)
59
+ unless low_card_model.respond_to?(:is_low_card_table?) && low_card_model.is_low_card_table?
60
+ raise ArgumentError, "You must supply a low-card AR model class, not: #{low_card_model.inspect}"
61
+ end
62
+
63
+ @low_card_model = low_card_model
64
+ @table_unique_index = LowCardTables::LowCardTable::TableUniqueIndex.new(low_card_model)
65
+ @referring_models = [ ]
66
+ end
67
+
68
+ attr_reader :referring_models
69
+
70
+ # Tells us that the low-card model we're operating on behalf of is referenced by the given +referring_model_class+.
71
+ # This +referring_model_class+ should be an ActiveRecord class that has declared 'has_low_card_table' on this
72
+ # low-card table.
73
+ #
74
+ # We keep track of this and expose it for a few reasons:
75
+ #
76
+ # * If we need to collapse the rows in this low-card table because a column has been removed, we use this list of
77
+ # referring models to know which columns have a foreign key to this table;
78
+ # * When someone calls #reset_column_information on the low-card table, we re-compute (and re-install) the set of
79
+ # delegated methods from all models that refer to this low-card table.
80
+ def referred_to_by(referring_model_class)
81
+ @referring_models |= [ referring_model_class ]
82
+ end
83
+
84
+ # Tells us that someone called #reset_column_information on the low-card table; we'll inform all referring models
85
+ # of that fact.
86
+ def column_information_reset!
87
+ @referring_models.each { |m| m._low_card_associations_manager.low_card_column_information_reset!(@low_card_model) }
88
+ end
89
+
90
+ # Returns all rows in the low-card table. This behaves semantically identically to simply calling ActiveRecord's
91
+ # #all method on the low-card table itself, but it returns the data from cache.
92
+ def all_rows
93
+ cache.all_rows
94
+ end
95
+
96
+ # Flushes the cache immediately (assuming we have any cached data at all).
97
+ def flush_cache!
98
+ flush!(:manually_requested)
99
+ end
100
+
101
+ # Given a single primary-key ID of a low-card row, returns the row for that ID. Given an Array of one or more
102
+ # primary-key IDs, returns a Hash mapping each of those IDs to the corresponding row. Properly flushes the cache
103
+ # and tries again if given an ID that doesn't exist in cache.
104
+ def rows_for_ids(id_or_ids)
105
+ begin
106
+ cache.rows_for_ids(id_or_ids)
107
+ rescue LowCardTables::Errors::LowCardIdNotFoundError => lcinfe
108
+ flush!(:id_not_found, :ids => lcinfe.ids)
109
+ cache.rows_for_ids(id_or_ids)
110
+ end
111
+ end
112
+
113
+ # A synonym for #rows_for_ids.
114
+ def row_for_id(id)
115
+ rows_for_ids(id)
116
+ end
117
+
118
+ # Given a single Hash specifying zero or more constraints for low-card rows (i.e., mapping zero or more columns
119
+ # of the low-card table to specific values for those columns), returns a (possibly empty) Array of IDs of
120
+ # low-card rows that match those constraints.
121
+ #
122
+ # Given an array of one or more Hashes, each of which specify zero or more constraints for low-card rows, returns
123
+ # a Hash mapping each of those Hashes to a (possibly empty) Array of IDs of low-card rows that match each
124
+ # Hash.
125
+ #
126
+ # Given a block (in which case no hashes may be passed), returns an Array of IDs of low-card rows that match the
127
+ # block. The block is passed an instance of the low-card model class, and the return value of the block (truthy
128
+ # or falsy) determines whether the ID of that row is included in the return value or not.
129
+ def ids_matching(hash_or_hashes = nil, &block)
130
+ do_matching(hash_or_hashes, block, :ids_matching)
131
+ end
132
+
133
+ # Given a single Hash specifying zero or more constraints for low-card rows (i.e., mapping zero or more columns
134
+ # of the low-card table to specific values for those columns), returns a (possibly empty) Array of
135
+ # low-card rows that match those constraints.
136
+ #
137
+ # Given an array of one or more Hashes, each of which specify zero or more constraints for low-card rows, returns
138
+ # a Hash mapping each of those Hashes to a (possibly empty) Array of low-card rows that match each
139
+ # Hash.
140
+ #
141
+ # Given a block (in which case no hashes may be passed), returns an Array of low-card rows that match the
142
+ # block. The block is passed an instance of the low-card model class, and the return value of the block (truthy
143
+ # or falsy) determines whether that row is included in the return value or not.
144
+ def rows_matching(hash_or_hashes = nil, &block)
145
+ do_matching(hash_or_hashes, block, :rows_matching)
146
+ end
147
+
148
+ # Given a single Hash specifying values for every column in the low-card table, returns an instance of the
149
+ # low-card table, already existing in the database, for that combination of values.
150
+ #
151
+ # Given an array of Hashes, each specifying values for every column in the low-card table, returns a Hash
152
+ # mapping each of those Hashes to an instance of the low-card table, already existing in the database, for that
153
+ # combination of values.
154
+ #
155
+ # If you request an instance for a combination of values that doesn't exist in the table, it will simply be
156
+ # mapped to +nil+. Under no circumstances will rows be added to the database.
157
+ def find_rows_for(hash_hashes_object_or_objects)
158
+ do_find_or_create(hash_hashes_object_or_objects, false)
159
+ end
160
+
161
+ # Given a single Hash specifying values for every column in the low-card table, returns an instance of the
162
+ # low-card table for that combination of values. The row in question will be created if it doesn't already
163
+ # exist.
164
+ #
165
+ # Given an array of Hashes, each specifying values for every column in the low-card table, returns a Hash
166
+ # mapping each of those Hashes to an instance of the low-card table for that combination of values. Rows for
167
+ # any missing combinations of values will be created. (Creation is done in bulk, using +activerecord_import+,
168
+ # so this method will be fast no matter how many rows need to be created.)
169
+ def find_or_create_rows_for(hash_hashes_object_or_objects)
170
+ do_find_or_create(hash_hashes_object_or_objects, true)
171
+ end
172
+
173
+ # Behaves identically to #find_rows_for, except that it returns IDs instead of rows.
174
+ def find_ids_for(hash_hashes_object_or_objects)
175
+ row_map_to_id_map(find_rows_for(hash_hashes_object_or_objects))
176
+ end
177
+
178
+ # Behaves identically to #find_or_create_rows_for, except that it returns IDs instead of rows.
179
+ def find_or_create_ids_for(hash_hashes_object_or_objects)
180
+ row_map_to_id_map(find_or_create_rows_for(hash_hashes_object_or_objects))
181
+ end
182
+
183
+ # Returns the set of columns on the low-card table that we should consider "value columns" -- i.e., those that
184
+ # contain data values, rather than metadata, like the primary key, created_at/updated_at, and so on.
185
+ #
186
+ # Columns that are excluded:
187
+ #
188
+ # * The primary key
189
+ # * created_at and updated_at
190
+ # * Any additional columns specified using the +:exclude_column_names+ option when declaring +is_low_card_table+.
191
+ def value_column_names
192
+ value_columns.map(&:name)
193
+ end
194
+
195
+ # Iterates through this table, finding duplicate rows and collapsing them. See RowCollapser for far more
196
+ # information.
197
+ def collapse_rows_and_update_referrers!(low_card_options = { })
198
+ collapser = LowCardTables::LowCardTable::RowCollapser.new(@low_card_model, low_card_options)
199
+ collapse_map = collapser.collapse!
200
+
201
+ flush!(:collapse_rows_and_update_referrers)
202
+ collapse_map
203
+ end
204
+
205
+ # If this table already has the correct unique index across all value columns, does nothing.
206
+ #
207
+ # If this table does not have the correct unique index, and +create_if_needed+ is truthy, then creates the index.
208
+ # If this table does not have the correct unique index, and +create_if_needed+ is falsy, then raises
209
+ # LowCardTables::Errors::LowCardNoUniqueIndexError.
210
+ def ensure_has_unique_index!(create_if_needed = false)
211
+ @table_unique_index.ensure_present!(create_if_needed)
212
+ end
213
+
214
+ # If this table currently has a unique index across all value columns, removes it.
215
+ def remove_unique_index!
216
+ @table_unique_index.remove!
217
+ end
218
+
219
+
220
+ private
221
+ # Given a Hash that maps keys to instances of the low-card class, returns a Hash that is identical in every way
222
+ # except that rows are replaced with their IDs. This is what we use to implement, for example,
223
+ # #find_or_create_ids_for on top of #find_or_create_rows_for, trivially.
224
+ def row_map_to_id_map(m)
225
+ if m.kind_of?(Hash)
226
+ out = { }
227
+ m.each do |k,v|
228
+ if v
229
+ out[k] = v.id
230
+ else
231
+ out[k] = nil
232
+ end
233
+ end
234
+ out
235
+ else
236
+ m.id if m
237
+ end
238
+ end
239
+
240
+ # This is used to implement #rows_matching and #ids_matching on top of the Cache. +hash_or_hashes+ is a single
241
+ # Hash or an array of Hashes, +block+ is a callable block (and you're only allowed to pass +hash_or_hashes+ _or_
242
+ # +block+, not both), and +method_name+ is the name of the method on Cache that we should call to implement this
243
+ # method.
244
+ #
245
+ # Since Cache does most of the work for us, this method is basically responsible for sanitizing input/output, and
246
+ # detecting schema-change issues (as evidenced by LowCardColumnNotPresentError) and retrying, once. (This is so
247
+ # that if code starts using a column name that was not present on the table at the last time the cache was read,
248
+ # but since has been migrated in, we'll detect the change and react correctly.)
249
+ def do_matching(hash_or_hashes, block, method_name)
250
+ result = begin
251
+ hashes = to_array_of_partial_hashes(hash_or_hashes)
252
+ cache.send(method_name, hashes, &block)
253
+ rescue LowCardTables::Errors::LowCardColumnNotPresentError => lccnpe
254
+ flush!(:schema_change)
255
+ hashes = to_array_of_partial_hashes(hash_or_hashes)
256
+ cache.send(method_name, hashes, &block)
257
+ end
258
+
259
+ if hash_or_hashes.kind_of?(Array)
260
+ result
261
+ else
262
+ raise "We passed in #{hash_or_hashes.inspect}, but got back #{result.inspect}?" unless result.kind_of?(Hash) && result.size <= 1
263
+ result.values[0] if result.size > 0
264
+ end
265
+ end
266
+
267
+ # This is used to implement #find_rows_for and #find_or_create_rows_for; the two methods are very similar except
268
+ # in how they handle nonexistent rows, so we use this method to deal with both of them.
269
+ def do_find_or_create(hash_hashes_object_or_objects, do_create)
270
+ # Input manipulation...
271
+ input_to_complete_hash_map = map_input_to_complete_hashes(hash_hashes_object_or_objects)
272
+ complete_hashes = input_to_complete_hash_map.values
273
+
274
+ # Do the actual lookup in the cache.
275
+ existing = rows_matching(complete_hashes)
276
+ not_found = complete_hashes.reject { |h| existing[h].length > 0 }
277
+
278
+ # See if there's something we still don't have and if we need to create it.
279
+ if not_found.length > 0 && do_create
280
+ # We actually pass in _all_ the rows we want here, rather than just the ones that aren't found yet. Why?
281
+ # Under the covers, #flush_lock_and_create_rows_for! is at the heart of our transactional core -- it locks
282
+ # the table and checks again for which rows are present. We want to give it all of the data required, so that,
283
+ # once it acquires the exclusive table-level lock, it knows exactly which data it needs to ensure is present
284
+ # in the table.
285
+ #
286
+ # Passing data acquired outside a transaction into a transaction that's supposed to act on it is just asking
287
+ # for trouble.
288
+ existing = flush_lock_and_create_rows_for!(complete_hashes)
289
+ end
290
+
291
+ # Output manipulation and validation.
292
+ out = { }
293
+ input_to_complete_hash_map.each do |input, complete_hash|
294
+ values = existing[complete_hash]
295
+
296
+ if values.length == 0 && do_create
297
+ raise %{Whoa: we asked for a row for this hash: #{key.inspect};
298
+ since this has been asserted to be a complete key, we should only ever get back a single row,
299
+ and we should always get back one row since we will have created the row if necessary,
300
+ but we got back these rows:
301
+
302
+ #{values.inspect}}
303
+ end
304
+
305
+ out[input] = values[0]
306
+ end
307
+
308
+ if hash_hashes_object_or_objects.kind_of?(Array)
309
+ out
310
+ else
311
+ out[out.keys.first]
312
+ end
313
+ end
314
+
315
+ # Returns all of the ::ActiveRecord::ConnectionAdapters::Column objects for all of the value columns in this
316
+ # table.
317
+ #
318
+ # If this table doesn't exist yet, we return an empty set. This is important: this allows your Rails app to still
319
+ # pass through the boot phase if you have a model for a low-card model whose underlying database table hasn't
320
+ # actually been created yet. (If we didn't do this, then the traditional pattern of adding a migration and a model
321
+ # in the same commit would fail -- other developers would get the model but not have the table, and Rails wouldn't
322
+ # even be able to boot to migrate the table in.)
323
+ def value_columns
324
+ return [ ] unless @low_card_model.table_exists?
325
+
326
+ @low_card_model.columns.select do |column|
327
+ column_name = column.name.to_s.strip.downcase
328
+
329
+ use = true
330
+ use = false if column.primary
331
+ use = false if column_names_to_skip.include?(column_name)
332
+ use
333
+ end
334
+ end
335
+
336
+ # This simply raises an exception when we can't create new rows in the low-card table for some reason. We want
337
+ # to get a very nice, detailed message in return, so we have a method that composes something telling us exactly
338
+ # what happened.
339
+ #
340
+ # +exception+ is the exception we got upon creation, if any. +keys+ is the set of keys (names of columns) we
341
+ # passed to the #import call, and +failed_instances+ is the set of instances that +activerecord_import+ reported
342
+ # as failing.
343
+ #
344
+ # This method eventually raises a LowCardInvalidLowCardRowsError.
345
+ def could_not_create_new_rows!(exception, keys, failed_instances)
346
+ message = %{The low_card_tables gem was trying to create one or more new rows in
347
+ the low-card table '#{@low_card_model.table_name}', but, when we went to create those rows...
348
+
349
+ }
350
+
351
+
352
+ if exception
353
+ message << %{- The database refused to create them. This is usually because one or more of these rows
354
+ violates a database constraint -- like a NOT NULL or CHECK constraint.
355
+
356
+ The exception we got was:
357
+
358
+ (#{exception.class.name}) #{exception.message}
359
+ #{exception.backtrace.join("\n ")}}
360
+ elsif failed_instances
361
+ message << "- They failed validation."
362
+ end
363
+
364
+ if failed_instances.length > 0
365
+ message << %{Here's what we tried to import:
366
+
367
+ Keys: #{keys.inspect}
368
+ Values:
369
+
370
+ }
371
+
372
+ failed_instances.each do |failed_instance|
373
+ line = " #{failed_instance.inspect}"
374
+
375
+ if failed_instance.respond_to?(:errors)
376
+ line << " ERRORS: #{failed_instance.errors.full_messages.join("; ")}"
377
+ end
378
+
379
+ message << "#{line}\n"
380
+ end
381
+ end
382
+
383
+ raise LowCardTables::Errors::LowCardInvalidLowCardRowsError, message
384
+ end
385
+
386
+ # This method is called when someone has called #find_or_create_rows_for or #find_or_create_ids_for, and we've
387
+ # discovered that we do, in fact, need to create one or more rows in the database.
388
+ #
389
+ # Because we need to be careful of race conditions -- many other processes may be running the exact same code at
390
+ # the exact same time -- we do the following:
391
+ #
392
+ # * First, obtain an exclusive table lock for the database we're using. Exactly how this works is database-
393
+ # dependent and not something ActiveRecord knows how to handle for us.
394
+ # * Flush the cache and re-check if we still need to create rows. We do this because it's possible some other
395
+ # process created the rows in-between the time we checked and the time we locked the table. But, now, if we
396
+ # still are missing rows, we know we're the only process who can create them, since we have the exclusive
397
+ # table lock.
398
+ # * Create the rows in the database, raising a detailed exception if the database raises an exception or if
399
+ # +activerecord_import+ reports any failed instances.
400
+ # * Fire an ActiveSupport::Notifications event telling everybody that we just created rows.
401
+ # * Flush the cache again, since we just created rows in the database and so the cache is guaranteed to be
402
+ # out-of-date.
403
+ # * Return the rows that now should be present and match the input.
404
+ def flush_lock_and_create_rows_for!(input)
405
+ with_locked_table do
406
+ flush!(:creating_rows, :context => :before_import, :new_rows => input)
407
+
408
+ # because it's possible there was a schema modification that we just now picked up
409
+ input_to_hashes_map = map_input_to_complete_hashes(input)
410
+ hashes = input_to_hashes_map.values
411
+
412
+ existing = rows_matching(hashes)
413
+ still_not_found = hashes.reject { |h| existing[h].length > 0 }
414
+
415
+ if still_not_found.length > 0
416
+ keys = value_column_names
417
+ values = still_not_found.map do |hash|
418
+ keys.map { |k| hash[k] }
419
+ end
420
+
421
+ begin
422
+ import_result = @low_card_model.import(keys, values, :validate => true)
423
+ could_not_create_new_rows!(nil, keys, import_result.failed_instances) if import_result.failed_instances.length > 0
424
+ rescue ::ActiveRecord::StatementInvalid => si
425
+ could_not_create_new_rows!(si, keys, values)
426
+ end
427
+
428
+ instrument('rows_created', :keys => keys, :values => values)
429
+ end
430
+
431
+ flush!(:creating_rows, :context => :after_import, :new_rows => hashes)
432
+
433
+ existing = rows_matching(hashes)
434
+ still_not_found = hashes.reject { |h| existing[h].length > 0 }
435
+
436
+ if still_not_found.length > 0
437
+ raise %{You asked for low-card IDs for one or more hashes specifying rows that didn't exist,
438
+ but, when we tried to create them, even after an import that appeared to succeed, we couldn't
439
+ find the models that should've now existed. This should never happen, and may be indicative
440
+ of a bug in the low-card tables system. Here's what we tried to create, but then couldn't find:
441
+
442
+ #{still_not_found.join("\n")}}
443
+ end
444
+
445
+ existing
446
+ end
447
+ end
448
+
449
+ # Locks the table for this @low_card_model, using whatever database-specific code is required. This also surrounds
450
+ # the block passed with a transaction on the table in question.
451
+ def with_locked_table(&block)
452
+ @low_card_model.transaction do
453
+ with_database_exclusive_table_lock do
454
+ block.call
455
+ end
456
+ end
457
+ end
458
+
459
+ # Obtains an exclusive lock on the table for this low-card model. This is much, much stronger than the built-in
460
+ # ActiveRecord #lock! or #with_lock support (see ActiveRecord::Locking::Pessimistic); this should always lock
461
+ # the entire table against reading and writing.
462
+ #
463
+ # We could've handled this by injecting methods into the ActiveRecord connection adapters for each specific
464
+ # database type, but that's actually quite a bit more tricky metaprogramming (what if the adapters haven't been
465
+ # loaded yet when our Gem starts up, but will get loaded later? -- and we definitely don't want to make this
466
+ # Gem depend on the union of all supported database adapters!) than doing it this way.
467
+ #
468
+ # In other words, this may be a bit of a gross hack (groping the class name of the adapter in question), but it's
469
+ # arguably a lot more reliable and easier to understand than the other way of doing this.
470
+ def with_database_exclusive_table_lock(&block)
471
+ case @low_card_model.connection.class.name
472
+ when /postgresql/i then with_database_exclusive_table_lock_postgresql(&block)
473
+ when /mysql/i then with_database_exclusive_table_lock_mysql(&block)
474
+ when /sqlite/i then with_database_exclusive_table_lock_sqlite(&block)
475
+ else
476
+ raise LowCardTables::Errors::LowCardUnsupportedDatabaseError, %{You asked for low-card IDs for one or more hashes specifying rows that didn't exist,
477
+ but, when we went to create them, we discovered that we don't know how to exclusively
478
+ lock tables in your database. (This is very important so that we don't accidentally
479
+ create duplicate rows.)
480
+
481
+ Your database adapter's class name is '#{@low_card_model.connection.class.name}'; please submit at least
482
+ a bug report, or, even better, a patch. :) Adding support is quite easy, as long as you know the
483
+ equivalent of 'LOCK TABLE'(s) in your database.}
484
+ end
485
+ end
486
+
487
+ # Obtains an exclusive table lock for a PostgreSQL database. PostgreSQL releases all table locks at the end of
488
+ # the current transaction, so we just need to lock the table -- unlocking happens automatically when we release
489
+ # our transaction, above.
490
+ def with_database_exclusive_table_lock_postgresql(&block)
491
+ # If we just use the regular :sanitize_sql support, we get:
492
+ # LOCK TABLE 'foo'
493
+ # ...which, for whatever reason, PostgreSQL doesn't like. Escaping it this way works fine.
494
+ escaped = @low_card_model.connection.quote_table_name(@low_card_model.table_name)
495
+ run_sql("LOCK TABLE #{escaped}", { })
496
+ block.call
497
+ end
498
+
499
+ # Obtains an exclusive table lock for a SQLite database. There is no locking possible or needed, since SQLite is
500
+ # a single-user database.
501
+ def with_database_exclusive_table_lock_sqlite(&block)
502
+ block.call
503
+ end
504
+
505
+ # Obtains an exclusive table lock for a MySQL database. We need to make sure we unlock the table once the block
506
+ # is complete.
507
+ def with_database_exclusive_table_lock_mysql(&block)
508
+ begin
509
+ escaped = @low_card_model.connection.quote_table_name(@low_card_model.table_name)
510
+ run_sql("LOCK TABLES #{escaped} WRITE", { })
511
+ block.call
512
+ ensure
513
+ begin
514
+ run_sql("UNLOCK TABLES", { })
515
+ rescue ::ActiveRecord::StatementInvalid => si
516
+ # we tried our best!
517
+ end
518
+ end
519
+ end
520
+
521
+ # Runs a SQL statement, specified as a string with substitution parameters.
522
+ def run_sql(statement, params)
523
+ @low_card_model.connection.execute(@low_card_model.send(:sanitize_sql, [ statement, params ]))
524
+ end
525
+
526
+ # Names of columns in low-card tables that we should always skip, no matter what.
527
+ COLUMN_NAMES_TO_ALWAYS_SKIP = %w{created_at updated_at}
528
+
529
+ # Returns the names of all columns in this table that we should skip when determining what to treat as a value
530
+ # column for this table (as opposed to things like the primary key, created_at, updated_at, and so on, which are
531
+ # metadata and shouldn't play a direct role in the low-card system).
532
+ def column_names_to_skip
533
+ @column_names_to_skip ||= begin
534
+ COLUMN_NAMES_TO_ALWAYS_SKIP +
535
+ Array(@low_card_model.low_card_options[:exclude_column_names] || [ ]).map { |n| n.to_s.strip.downcase }
536
+ end
537
+ end
538
+
539
+ # Given something that can be a single Hash, an array of Hashes, a single instance of the @low_card_model class,
540
+ # or an array of instances of the @low_card_model class, returns a new Hash.
541
+ #
542
+ # This new Hash has, as keys, each of the inputs to this method, and, as values, a Hash for that input that is
543
+ # a complete, normalized Hash representing that input.
544
+ #
545
+ # This method will also raise an exception if any of the inputs do not include all of the necessary keys for the
546
+ # low-card table -- thus, this method can only be used for methods like #find_rows_for or #find_or_create_ids_for,
547
+ # where the input must each specify exactly one low-card row, rather than methods like
548
+ # #rows_matching/#ids_matching, where each input may match multiple low-card rows.
549
+ def map_input_to_complete_hashes(hash_hashes_object_or_objects)
550
+ # We can't use Array(), because that will turn a single Hash into an Array, and we definitely don't want
551
+ # to do that here! I kind of hate that behavior of Array()...
552
+ as_array = if hash_hashes_object_or_objects.kind_of?(Array) then hash_hashes_object_or_objects else [ hash_hashes_object_or_objects ] end
553
+
554
+ out = { }
555
+ as_array.uniq.each do |hash_or_object|
556
+ hash = nil
557
+
558
+ if hash_or_object.kind_of?(Hash)
559
+ # Allow us to use Strings or Symbols as indexes into the Hash
560
+ hash = hash_or_object.with_indifferent_access
561
+ elsif hash_or_object.kind_of?(@low_card_model)
562
+ hash = hash_or_object.attributes.dup.with_indifferent_access
563
+ hash.delete(@low_card_model.primary_key)
564
+ else
565
+ raise "Invalid input to this method -- this must be a Hash, or an instance of #{@low_card_model}: #{hash_or_object.inspect}"
566
+ end
567
+
568
+ hash = ensure_complete_key(hash)
569
+ out[hash_or_object] = hash
570
+ end
571
+
572
+ out
573
+ end
574
+
575
+ # Given a single Hash that should contain values for all value columns in the low-card table -- no less, no more --
576
+ # validates that the Hash contains no extra columns and no missing columns, and returns it. This method will allow
577
+ # you to skip any columns in the input that have defaults in the database, and will correctly fill in those defaults
578
+ # in the returned Hash.
579
+ #
580
+ # Because this requires all columns to be present in the input, it can only be used for methods like
581
+ # #find_rows_for or #find_or_create_ids_for that require fully-specified input hashes.
582
+ def ensure_complete_key(hash)
583
+ keys_as_strings = hash.keys.map(&:to_s)
584
+ missing = value_column_names - keys_as_strings
585
+ extra = keys_as_strings - value_column_names
586
+
587
+ missing = missing.select do |missing_column_name|
588
+ column = @low_card_model.columns.detect { |c| c.name.to_s.strip.downcase == missing_column_name.to_s.strip.downcase }
589
+ if column && column.default
590
+ hash[column.name] = column.default
591
+ false
592
+ else
593
+ true
594
+ end
595
+ end
596
+
597
+ if missing.length > 0
598
+ raise LowCardTables::Errors::LowCardColumnNotSpecifiedError, "The following is not a complete specification of all columns in low-card table '#{@low_card_model.table_name}'; it is missing these columns: #{missing.join(", ")}: #{hash.inspect}"
599
+ end
600
+
601
+ if extra.length > 0
602
+ raise LowCardTables::Errors::LowCardColumnNotPresentError, "The following specifies extra columns that are not present in low-card table '#{@low_card_model.table_name}'; these columns are not present in the underlying model: #{extra.join(", ")}: #{hash.inspect}"
603
+ end
604
+
605
+ hash
606
+ end
607
+
608
+ # Given something that may be a single Hash or an array of Hashes, returns an array of Hashes, and makes sure that
609
+ # the input (or each element of the input) is a valid partial key into the low-card table. A partial key is a Hash
610
+ # that specifies zero or more value columns from the low-card table -- but you're not allowed to specify anything
611
+ # in the Hash that isn't a value column in the low-card table.
612
+ def to_array_of_partial_hashes(array)
613
+ array = if array.kind_of?(Array) then array else [ array ] end
614
+ array.each { |h| assert_partial_key!(h) }
615
+ array
616
+ end
617
+
618
+ # Given a Hash, raises an error if that Hash is not a valid partial key into the low-card table -- i.e., if it
619
+ # contains keys that are not valid value columns in the low-card table.
620
+ def assert_partial_key!(hash)
621
+ keys_as_strings = hash.keys.map(&:to_s)
622
+ extra = keys_as_strings - value_column_names
623
+
624
+ if extra.length > 0
625
+ raise LowCardTables::Errors::LowCardColumnNotPresentError, "The following specifies extra columns that are not present in low-card table '#{@low_card_model.table_name}'; these columns are not present in the underlying model: #{extra.join(", ")}: #{hash.inspect}"
626
+ end
627
+ end
628
+
629
+ # Fetches the cache we should use. This takes care of creating a cache if one is not present; it also takes care
630
+ # of flushing the cache and creating a new one if the current cache is stale.
631
+ def cache
632
+ the_current_time = current_time
633
+ cache_loaded_at = @cache.loaded_at if @cache
634
+
635
+ if @cache && cache_expiration_policy_object.stale?(cache_loaded_at, the_current_time)
636
+ flush!(:stale, :loaded => cache_loaded_at, :now => the_current_time)
637
+ end
638
+
639
+ unless @cache
640
+ instrument('cache_load') do
641
+ @cache = LowCardTables::LowCardTable::Cache.new(@low_card_model, @low_card_model.low_card_options)
642
+ end
643
+ end
644
+
645
+ @cache
646
+ end
647
+
648
+ # Flushes the cache, for the reason given, and fires the appropriate ActiveSupport::Notification instrumentation.
649
+ # +reason+ is the reason given in the notification, and +notification_options+ are added to the payload for the
650
+ # notification.
651
+ #
652
+ # Whenever we flush the cache, we also ask ActiveRecord to purge its idea of what columns are on the table. This
653
+ # ensures that we'll stay in sync with any underlying schema changes, and hence adapt to an evolving schema on
654
+ # the fly, as best we can.
655
+ def flush!(reason, notification_options = { })
656
+ if @cache
657
+ instrument('cache_flush', notification_options.merge(:reason => reason)) do
658
+ @cache = nil
659
+ end
660
+ end
661
+
662
+ @low_card_model.reset_column_information
663
+ end
664
+
665
+ # A thin wrapper around ActiveSupport::Notifications.
666
+ def instrument(event, options = { }, &block)
667
+ ::ActiveSupport::Notifications.instrument("low_card_tables.#{event}", options.merge(:low_card_model => @low_card_model), &block)
668
+ end
669
+
670
+ # Returns the correct cache-expiration policy object to use for the table in question.
671
+ def cache_expiration_policy_object
672
+ @low_card_model.low_card_cache_expiration_policy_object || LowCardTables.low_card_cache_expiration_policy_object
673
+ end
674
+
675
+ # Returns the current time. Broken out into a separate method so that we can easily override it in tests.
676
+ def current_time
677
+ Time.now
678
+ end
679
+ end
680
+ end
681
+ end