inventory_refresh 0.3.5 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.codeclimate.yml +25 -30
  3. data/.github/workflows/ci.yaml +58 -0
  4. data/.rubocop.yml +3 -3
  5. data/.rubocop_cc.yml +3 -4
  6. data/.rubocop_local.yml +5 -2
  7. data/.whitesource +3 -0
  8. data/CHANGELOG.md +19 -0
  9. data/Gemfile +10 -4
  10. data/README.md +1 -2
  11. data/Rakefile +2 -2
  12. data/inventory_refresh.gemspec +9 -10
  13. data/lib/inventory_refresh/application_record_iterator.rb +25 -12
  14. data/lib/inventory_refresh/graph/topological_sort.rb +24 -26
  15. data/lib/inventory_refresh/graph.rb +2 -2
  16. data/lib/inventory_refresh/inventory_collection/builder.rb +37 -15
  17. data/lib/inventory_refresh/inventory_collection/data_storage.rb +9 -0
  18. data/lib/inventory_refresh/inventory_collection/helpers/initialize_helper.rb +147 -38
  19. data/lib/inventory_refresh/inventory_collection/helpers/questions_helper.rb +48 -4
  20. data/lib/inventory_refresh/inventory_collection/index/proxy.rb +35 -3
  21. data/lib/inventory_refresh/inventory_collection/index/type/base.rb +8 -0
  22. data/lib/inventory_refresh/inventory_collection/index/type/local_db.rb +2 -0
  23. data/lib/inventory_refresh/inventory_collection/index/type/skeletal.rb +1 -0
  24. data/lib/inventory_refresh/inventory_collection/reference.rb +1 -0
  25. data/lib/inventory_refresh/inventory_collection/references_storage.rb +17 -0
  26. data/lib/inventory_refresh/inventory_collection/scanner.rb +91 -3
  27. data/lib/inventory_refresh/inventory_collection/serialization.rb +16 -10
  28. data/lib/inventory_refresh/inventory_collection.rb +122 -64
  29. data/lib/inventory_refresh/inventory_object.rb +74 -40
  30. data/lib/inventory_refresh/inventory_object_lazy.rb +17 -10
  31. data/lib/inventory_refresh/null_logger.rb +2 -2
  32. data/lib/inventory_refresh/persister.rb +43 -93
  33. data/lib/inventory_refresh/save_collection/base.rb +4 -2
  34. data/lib/inventory_refresh/save_collection/saver/base.rb +114 -15
  35. data/lib/inventory_refresh/save_collection/saver/batch.rb +17 -0
  36. data/lib/inventory_refresh/save_collection/saver/concurrent_safe_batch.rb +129 -51
  37. data/lib/inventory_refresh/save_collection/saver/default.rb +57 -0
  38. data/lib/inventory_refresh/save_collection/saver/partial_upsert_helper.rb +2 -19
  39. data/lib/inventory_refresh/save_collection/saver/retention_helper.rb +68 -3
  40. data/lib/inventory_refresh/save_collection/saver/sql_helper.rb +125 -0
  41. data/lib/inventory_refresh/save_collection/saver/sql_helper_update.rb +10 -6
  42. data/lib/inventory_refresh/save_collection/saver/sql_helper_upsert.rb +28 -16
  43. data/lib/inventory_refresh/save_collection/sweeper.rb +17 -93
  44. data/lib/inventory_refresh/save_collection/topological_sort.rb +5 -5
  45. data/lib/inventory_refresh/save_inventory.rb +5 -12
  46. data/lib/inventory_refresh/target.rb +73 -0
  47. data/lib/inventory_refresh/target_collection.rb +92 -0
  48. data/lib/inventory_refresh/version.rb +1 -1
  49. data/lib/inventory_refresh.rb +2 -0
  50. metadata +42 -39
  51. data/.travis.yml +0 -23
  52. data/lib/inventory_refresh/exception.rb +0 -8
@@ -12,6 +12,8 @@ module InventoryRefresh::SaveCollection
12
12
  # @param inventory_collection [InventoryRefresh::InventoryCollection] InventoryCollection object we will be saving
13
13
  def initialize(inventory_collection)
14
14
  @inventory_collection = inventory_collection
15
+ # TODO(lsmola) do I need to reload every time? Also it should be enough to clear the associations.
16
+ inventory_collection.parent&.reload
15
17
  @association = inventory_collection.db_collection_for_comparison
16
18
 
17
19
  # Private attrs
@@ -19,17 +21,21 @@ module InventoryRefresh::SaveCollection
19
21
  @table_name = @model_class.table_name
20
22
  @q_table_name = get_connection.quote_table_name(@table_name)
21
23
  @primary_key = @model_class.primary_key
22
- @arel_primary_key = @model_class.arel_attribute(@primary_key)
24
+ @arel_primary_key = @model_class.arel_table[@primary_key]
23
25
  @unique_index_keys = inventory_collection.unique_index_keys
24
26
  @unique_index_keys_to_s = inventory_collection.manager_ref_to_cols.map(&:to_s)
25
27
  @select_keys = [@primary_key] + @unique_index_keys_to_s + internal_columns.map(&:to_s)
26
28
  @unique_db_primary_keys = Set.new
27
29
  @unique_db_indexes = Set.new
28
30
 
31
+ # Right now ApplicationRecordIterator in association is used for targeted refresh. Given the small amount of
32
+ # records flowing through there, we probably don't need to optimize that association to fetch a pure SQL.
33
+ @pure_sql_records_fetching = !inventory_collection.use_ar_object? && !@association.kind_of?(InventoryRefresh::ApplicationRecordIterator)
34
+
29
35
  @batch_size_for_persisting = inventory_collection.batch_size_pure_sql
30
- @batch_size = inventory_collection.use_ar_object? ? @batch_size_for_persisting : inventory_collection.batch_size
31
36
 
32
- @record_key_method = inventory_collection.pure_sql_record_fetching? ? :pure_sql_record_key : :ar_record_key
37
+ @batch_size = @pure_sql_records_fetching ? @batch_size_for_persisting : inventory_collection.batch_size
38
+ @record_key_method = @pure_sql_records_fetching ? :pure_sql_record_key : :ar_record_key
33
39
  @select_keys_indexes = @select_keys.each_with_object({}).with_index { |(key, obj), index| obj[key.to_s] = index }
34
40
  @pg_types = @model_class.attribute_names.each_with_object({}) do |key, obj|
35
41
  obj[key.to_sym] = inventory_collection.model_class.columns_hash[key]
@@ -69,8 +75,14 @@ module InventoryRefresh::SaveCollection
69
75
 
70
76
  # Saves the InventoryCollection
71
77
  def save_inventory_collection!
78
+ # If we have a targeted InventoryCollection that wouldn't do anything, quickly skip it
79
+ return if inventory_collection.noop?
80
+
81
+ # Delete_complement strategy using :all_manager_uuids attribute
82
+ delete_complement unless inventory_collection.delete_complement_noop?
83
+
72
84
  # Create/Update/Archive/Delete records based on InventoryCollection data and scope
73
- save!(association)
85
+ save!(association) unless inventory_collection.saving_noop?
74
86
  end
75
87
 
76
88
  protected
@@ -89,8 +101,6 @@ module InventoryRefresh::SaveCollection
89
101
  # @param attributes [Hash] attributes hash
90
102
  # @return [Hash] modified hash from parameter attributes with casted values
91
103
  def values_for_database!(all_attribute_keys, attributes)
92
- # TODO(lsmola) we'll need to fill default value from the DB to the NOT_NULL columns here, since sending NULL
93
- # to column with NOT_NULL constraint always fails, even if there is a default value
94
104
  all_attribute_keys.each do |key|
95
105
  next unless attributes.key?(key)
96
106
 
@@ -102,7 +112,11 @@ module InventoryRefresh::SaveCollection
102
112
  end
103
113
 
104
114
  def transform_to_hash!(all_attribute_keys, hash)
105
- if serializable_keys?
115
+ if inventory_collection.use_ar_object?
116
+ record = inventory_collection.model_class.new(hash)
117
+ values_for_database!(all_attribute_keys,
118
+ record.attributes.slice(*record.changed_attributes.keys).symbolize_keys)
119
+ elsif serializable_keys?
106
120
  values_for_database!(all_attribute_keys,
107
121
  hash)
108
122
  else
@@ -113,15 +127,99 @@ module InventoryRefresh::SaveCollection
113
127
  private
114
128
 
115
129
  attr_reader :unique_index_keys, :unique_index_keys_to_s, :select_keys, :unique_db_primary_keys, :unique_db_indexes,
116
- :primary_key, :arel_primary_key, :record_key_method, :select_keys_indexes,
130
+ :primary_key, :arel_primary_key, :record_key_method, :pure_sql_records_fetching, :select_keys_indexes,
117
131
  :batch_size, :batch_size_for_persisting, :model_class, :serializable_keys, :deserializable_keys, :pg_types, :table_name,
118
132
  :q_table_name
119
133
 
120
134
  delegate :supports_column?, :to => :inventory_collection
121
135
 
136
+ # Saves the InventoryCollection
137
+ #
138
+ # @param association [Symbol] An existing association on manager
139
+ def save!(association)
140
+ attributes_index = {}
141
+ inventory_objects_index = {}
142
+ inventory_collection.each do |inventory_object|
143
+ attributes = inventory_object.attributes(inventory_collection)
144
+ index = build_stringified_reference(attributes, unique_index_keys)
145
+
146
+ attributes_index[index] = attributes
147
+ inventory_objects_index[index] = inventory_object
148
+ end
149
+
150
+ logger.debug("Processing #{inventory_collection} of size #{inventory_collection.size}...")
151
+ # Records that are in the DB, we will be updating or deleting them.
152
+ ActiveRecord::Base.transaction do
153
+ association.find_each do |record|
154
+ index = build_stringified_reference_for_record(record, unique_index_keys)
155
+
156
+ next unless assert_distinct_relation(record.id)
157
+ next unless assert_unique_record(record, index)
158
+
159
+ inventory_object = inventory_objects_index.delete(index)
160
+ hash = attributes_index.delete(index)
161
+
162
+ if inventory_object.nil?
163
+ # Record was found in the DB but not sent for saving, that means it doesn't exist anymore and we should
164
+ # delete it from the DB.
165
+ delete_record!(record) if inventory_collection.delete_allowed?
166
+ elsif assert_referential_integrity(hash)
167
+ # Record was found in the DB and sent for saving, we will be updating the DB.
168
+ update_record!(record, hash, inventory_object)
169
+ end
170
+ end
171
+ end
172
+
173
+ inventory_collection.custom_reconnect_block&.call(inventory_collection, inventory_objects_index, attributes_index)
174
+
175
+ # Records that were not found in the DB but sent for saving, we will be creating these in the DB.
176
+ if inventory_collection.create_allowed?
177
+ ActiveRecord::Base.transaction do
178
+ inventory_objects_index.each do |index, inventory_object|
179
+ hash = attributes_index.delete(index)
180
+
181
+ create_record!(hash, inventory_object) if assert_referential_integrity(hash)
182
+ end
183
+ end
184
+ end
185
+ logger.debug("Processing #{inventory_collection}, "\
186
+ "created=#{inventory_collection.created_records.count}, "\
187
+ "updated=#{inventory_collection.updated_records.count}, "\
188
+ "deleted=#{inventory_collection.deleted_records.count}...Complete")
189
+ rescue => e
190
+ logger.error("Error when saving #{inventory_collection} with #{inventory_collection_details}. Message: #{e.message}")
191
+ raise e
192
+ end
193
+
122
194
  # @return [String] a string for logging purposes
123
195
  def inventory_collection_details
124
- "strategy: #{inventory_collection.strategy}, saver_strategy: #{inventory_collection.saver_strategy}"
196
+ "strategy: #{inventory_collection.strategy}, saver_strategy: #{inventory_collection.saver_strategy}, targeted: #{inventory_collection.targeted?}"
197
+ end
198
+
199
+ # @param record [ApplicationRecord] ApplicationRecord object
200
+ # @param key [Symbol] A key that is an attribute of the AR object
201
+ # @return [Object] Value of attribute name :key on the :record
202
+ def record_key(record, key)
203
+ record.public_send(key)
204
+ end
205
+
206
+ # Deletes a complement of referenced data
207
+ def delete_complement
208
+ raise(":delete_complement method is supported only for :saver_strategy => [:batch, :concurrent_safe_batch]")
209
+ end
210
+
211
+ # Deletes/soft-deletes a given record
212
+ #
213
+ # @param [ApplicationRecord] record we want to delete
214
+ def delete_record!(record)
215
+ record.public_send(inventory_collection.delete_method)
216
+ inventory_collection.store_deleted_records(record)
217
+ end
218
+
219
+ # @return [TrueClass] always return true, this method is redefined in default saver
220
+ def assert_unique_record(_record, _index)
221
+ # TODO(lsmola) can go away once we indexed our DB with unique indexes
222
+ true
125
223
  end
126
224
 
127
225
  # Check if relation provided is distinct, i.e. the relation should not return the same primary key value twice.
@@ -134,12 +232,12 @@ module InventoryRefresh::SaveCollection
134
232
  # Change the InventoryCollection's :association or :arel parameter to return distinct results. The :through
135
233
  # relations can return the same record multiple times. We don't want to do SELECT DISTINCT by default, since
136
234
  # it can be very slow.
137
- unless inventory_collection.assert_graph_integrity
235
+ if inventory_collection.assert_graph_integrity
236
+ raise("Please update :association or :arel for #{inventory_collection} to return a DISTINCT result. ")
237
+ else
138
238
  logger.warn("Please update :association or :arel for #{inventory_collection} to return a DISTINCT result. "\
139
239
  " The duplicate value is being ignored.")
140
240
  return false
141
- else
142
- raise("Please update :association or :arel for #{inventory_collection} to return a DISTINCT result. ")
143
241
  end
144
242
  else
145
243
  unique_db_primary_keys << primary_key_value
@@ -157,14 +255,15 @@ module InventoryRefresh::SaveCollection
157
255
  def assert_referential_integrity(hash)
158
256
  inventory_collection.fixed_foreign_keys.each do |x|
159
257
  next unless hash[x].nil?
258
+
160
259
  subject = "#{hash} of #{inventory_collection} because of missing foreign key #{x} for "\
161
260
  "#{inventory_collection.parent.class.name}:"\
162
261
  "#{inventory_collection.parent.try(:id)}"
163
- unless inventory_collection.assert_graph_integrity
262
+ if inventory_collection.assert_graph_integrity
263
+ raise("Referential integrity check violated for #{subject}")
264
+ else
164
265
  logger.warn("Referential integrity check violated, ignoring #{subject}")
165
266
  return false
166
- else
167
- raise("Referential integrity check violated for #{subject}")
168
267
  end
169
268
  end
170
269
  true
@@ -0,0 +1,17 @@
1
+ require "inventory_refresh/save_collection/saver/concurrent_safe_batch"
2
+
3
+ module InventoryRefresh::SaveCollection
4
+ module Saver
5
+ class Batch < InventoryRefresh::SaveCollection::Saver::ConcurrentSafeBatch
6
+ private
7
+
8
+ # Just returning manager ref transformed to column names, for strategies that do not expect to have unique DB
9
+ # indexes.
10
+ #
11
+ # @return [Array<Symbol>] manager ref transformed to column names
12
+ def unique_index_columns
13
+ inventory_collection.manager_ref_to_cols.map(&:to_sym)
14
+ end
15
+ end
16
+ end
17
+ end
@@ -46,6 +46,41 @@ module InventoryRefresh::SaveCollection
46
46
  record[select_keys_indexes[key]]
47
47
  end
48
48
 
49
+ # Returns iterator or relation based on settings
50
+ #
51
+ # @param association [Symbol] An existing association on manager
52
+ # @return [ActiveRecord::Relation, InventoryRefresh::ApplicationRecordIterator] iterator or relation based on settings
53
+ def batch_iterator(association)
54
+ if pure_sql_records_fetching
55
+ # Building fast iterator doing pure SQL query and therefore avoiding redundant creation of AR objects. The
56
+ # iterator responds to find_in_batches, so it acts like the AR relation. For targeted refresh, the association
57
+ # can already be ApplicationRecordIterator, so we will skip that.
58
+ pure_sql_iterator = lambda do |&block|
59
+ primary_key_offset = nil
60
+ loop do
61
+ relation = association.select(*select_keys)
62
+ .reorder("#{primary_key} ASC")
63
+ .limit(batch_size)
64
+ # Using rails way of comparing primary key instead of offset
65
+ relation = relation.where(arel_primary_key.gt(primary_key_offset)) if primary_key_offset
66
+ records = get_connection.query(relation.to_sql)
67
+ last_record = records.last
68
+ block.call(records)
69
+
70
+ break if records.size < batch_size
71
+
72
+ primary_key_offset = record_key(last_record, primary_key)
73
+ end
74
+ end
75
+
76
+ InventoryRefresh::ApplicationRecordIterator.new(:iterator => pure_sql_iterator)
77
+ else
78
+ # Normal Rails ActiveRecord::Relation where we can call find_in_batches or
79
+ # InventoryRefresh::ApplicationRecordIterator passed from targeted refresh
80
+ association
81
+ end
82
+ end
83
+
49
84
  # Saves the InventoryCollection
50
85
  #
51
86
  # @param association [Symbol] An existing association on manager
@@ -55,7 +90,7 @@ module InventoryRefresh::SaveCollection
55
90
  all_attribute_keys = Set.new + inventory_collection.batch_extra_attributes
56
91
 
57
92
  inventory_collection.each do |inventory_object|
58
- attributes = inventory_object.class.attributes_with_keys(inventory_object.data, inventory_collection, all_attribute_keys, inventory_object)
93
+ attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys, inventory_object)
59
94
  index = build_stringified_reference(attributes, unique_index_keys)
60
95
 
61
96
  # Interesting fact: not building attributes_index and using only inventory_objects_index doesn't do much
@@ -69,7 +104,7 @@ module InventoryRefresh::SaveCollection
69
104
  logger.debug("Processing #{inventory_collection} of size #{inventory_collection.size}...")
70
105
 
71
106
  unless inventory_collection.create_only?
72
- load_and_update_records!(association, inventory_objects_index, attributes_index, all_attribute_keys)
107
+ update_or_destroy_records!(batch_iterator(association), inventory_objects_index, attributes_index, all_attribute_keys)
73
108
  end
74
109
 
75
110
  unless inventory_collection.create_only?
@@ -78,11 +113,15 @@ module InventoryRefresh::SaveCollection
78
113
 
79
114
  # Records that were not found in the DB but sent for saving, we will be creating these in the DB.
80
115
  if inventory_collection.create_allowed?
116
+ on_conflict = inventory_collection.parallel_safe? ? :do_update : nil
117
+
81
118
  inventory_objects_index.each_slice(batch_size_for_persisting) do |batch|
82
- create_records!(all_attribute_keys, batch, attributes_index, :on_conflict => :do_update)
119
+ create_records!(all_attribute_keys, batch, attributes_index, :on_conflict => on_conflict)
83
120
  end
84
121
 
85
- create_or_update_partial_records(all_attribute_keys)
122
+ if inventory_collection.parallel_safe?
123
+ create_or_update_partial_records(all_attribute_keys)
124
+ end
86
125
  end
87
126
 
88
127
  logger.debug("Marking :last_seen_at of #{inventory_collection} of size #{inventory_collection.size}...")
@@ -103,7 +142,7 @@ module InventoryRefresh::SaveCollection
103
142
  end
104
143
 
105
144
  def expand_all_attribute_keys!(all_attribute_keys)
106
- %i(created_at updated_at created_on updated_on).each do |col|
145
+ %i[created_at updated_at created_on updated_on].each do |col|
107
146
  all_attribute_keys << col if supports_column?(col)
108
147
  end
109
148
  all_attribute_keys << :type if supports_sti?
@@ -111,7 +150,7 @@ module InventoryRefresh::SaveCollection
111
150
  end
112
151
 
113
152
  def mark_last_seen_at(attributes_index)
114
- return unless supports_column?(:last_seen_at)
153
+ return unless supports_column?(:last_seen_at) && inventory_collection.parallel_safe?
115
154
  return if attributes_index.blank?
116
155
 
117
156
  all_attribute_keys = [:last_seen_at]
@@ -124,7 +163,8 @@ module InventoryRefresh::SaveCollection
124
163
  get_connection.execute(query)
125
164
  end
126
165
 
127
- # Batch updates existing records that are in the DB using attributes_index.
166
+ # Batch updates existing records that are in the DB using attributes_index. And delete the ones that were not
167
+ # present in inventory_objects_index.
128
168
  #
129
169
  # @param records_batch_iterator [ActiveRecord::Relation, InventoryRefresh::ApplicationRecordIterator] iterator or
130
170
  # relation, both responding to :find_in_batches method
@@ -132,11 +172,12 @@ module InventoryRefresh::SaveCollection
132
172
  # @param attributes_index [Hash{String => Hash}] Hash of data hashes with only keys that are column names of the
133
173
  # models's table
134
174
  # @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
135
- def load_and_update_records!(records_batch_iterator, inventory_objects_index, attributes_index, all_attribute_keys)
136
- hashes_for_update = []
175
+ def update_or_destroy_records!(records_batch_iterator, inventory_objects_index, attributes_index, all_attribute_keys)
176
+ hashes_for_update = []
177
+ records_for_destroy = []
137
178
  indexed_inventory_objects = {}
138
179
 
139
- records_batch_iterator.find_in_batches(:batch_size => batch_size, :attributes_index => attributes_index) do |batch|
180
+ records_batch_iterator.find_in_batches(:batch_size => batch_size) do |batch|
140
181
  update_time = time_now
141
182
 
142
183
  batch.each do |record|
@@ -149,14 +190,20 @@ module InventoryRefresh::SaveCollection
149
190
  inventory_object = inventory_objects_index.delete(index)
150
191
  hash = attributes_index[index]
151
192
 
152
- if inventory_object
193
+ if inventory_object.nil?
194
+ # Record was found in the DB but not sent for saving, that means it doesn't exist anymore and we should
195
+ # delete it from the DB.
196
+ if inventory_collection.delete_allowed?
197
+ records_for_destroy << record
198
+ end
199
+ else
153
200
  # Record was found in the DB and sent for saving, we will be updating the DB.
154
201
  inventory_object.id = primary_key_value
155
202
  next unless assert_referential_integrity(hash)
203
+ next unless changed?(record, hash, all_attribute_keys)
156
204
 
157
- record_version = nil
158
- record_version_max = nil
159
- if supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys)
205
+ if inventory_collection.parallel_safe? &&
206
+ (supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys))
160
207
 
161
208
  version_attr, max_version_attr = if supports_remote_data_timestamp?(all_attribute_keys)
162
209
  [:resource_timestamp, :resource_timestamps_max]
@@ -164,16 +211,16 @@ module InventoryRefresh::SaveCollection
164
211
  [:resource_counter, :resource_counters_max]
165
212
  end
166
213
 
167
- record_version = record_key(record, version_attr.to_s)
168
- record_version_max = record_key(record, max_version_attr.to_s)
214
+ next if skeletonize_or_skip_record(record_key(record, version_attr),
215
+ hash[version_attr],
216
+ record_key(record, max_version_attr),
217
+ inventory_object)
169
218
  end
170
219
 
171
220
  hash_for_update = if inventory_collection.use_ar_object?
172
221
  record.assign_attributes(hash.except(:id))
173
- next unless changed?(record)
174
-
175
222
  values_for_database!(all_attribute_keys,
176
- hash)
223
+ record.attributes.symbolize_keys)
177
224
  elsif serializable_keys?
178
225
  # TODO(lsmola) hash data with current DB data to allow subset of data being sent,
179
226
  # otherwise we would nullify the not sent attributes. Test e.g. on disks in cloud
@@ -184,14 +231,6 @@ module InventoryRefresh::SaveCollection
184
231
  # otherwise we would nullify the not sent attributes. Test e.g. on disks in cloud
185
232
  hash
186
233
  end
187
-
188
- if supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys)
189
- next if skeletonize_or_skip_record(record_version,
190
- hash[version_attr],
191
- record_version_max,
192
- inventory_object)
193
- end
194
-
195
234
  assign_attributes_for_update!(hash_for_update, update_time)
196
235
 
197
236
  hash_for_update[:id] = primary_key_value
@@ -207,22 +246,39 @@ module InventoryRefresh::SaveCollection
207
246
  hashes_for_update = []
208
247
  indexed_inventory_objects = {}
209
248
  end
249
+
250
+ # Destroy in batches
251
+ if records_for_destroy.size >= batch_size_for_persisting
252
+ destroy_records!(records_for_destroy)
253
+ records_for_destroy = []
254
+ end
210
255
  end
211
256
 
212
257
  # Update the last batch
213
258
  update_records!(all_attribute_keys, hashes_for_update, indexed_inventory_objects)
214
259
  hashes_for_update = [] # Cleanup so GC can release it sooner
260
+
261
+ # Destroy the last batch
262
+ destroy_records!(records_for_destroy)
263
+ records_for_destroy = [] # Cleanup so GC can release it sooner
215
264
  end
216
265
 
217
- def changed?(record)
266
+ def changed?(_record, _hash, _all_attribute_keys)
218
267
  return true unless inventory_collection.check_changed?
219
268
 
220
- # If object was archived before, pass it to update so it can be unarchived
221
- return true if record.respond_to?(:archived_at) && record.archived_at
222
- # Skip if nothing changed
223
- return false if record.changed_attributes.empty?
224
- # Skip if we only changed the resource_timestamp, but data stays the same
225
- return false if record.changed_attributes.keys == ["resource_timestamp"]
269
+ # TODO(lsmola) this check needs to be disabled now, because it doesn't work with lazy_find having secondary
270
+ # indexes. Examples: we save a pod before we save a project, that means the project lazy_find won't evaluate,
271
+ # because we load it with secondary index and can't do skeletal precreate. Then when the object is being saved
272
+ # again, the lazy_find is evaluated, but the resource version is not changed, so the row is not saved.
273
+ #
274
+ # To keep this quick .changed? check, we might need to extend this, so the resource_version doesn't save until
275
+ # all lazy_links of the row are evaluated.
276
+ #
277
+ # if supports_resource_version?(all_attribute_keys) && supports_column?(resource_version_column)
278
+ # record_resource_version = record_key(record, resource_version_column.to_s)
279
+ #
280
+ # return record_resource_version != hash[resource_version_column]
281
+ # end
226
282
 
227
283
  true
228
284
  end
@@ -230,7 +286,10 @@ module InventoryRefresh::SaveCollection
230
286
  def db_columns_index(record, pure_sql: false)
231
287
  # Incoming values are in SQL string form.
232
288
  # TODO(lsmola) unify this behavior with object_index_with_keys method in InventoryCollection
289
+ # TODO(lsmola) maybe we can drop the whole pure sql fetching, since everything will be targeted refresh
233
290
  # with streaming refresh? Maybe just metrics and events will not be, but those should be upsert only
291
+ # TODO(lsmola) taking ^ in account, we can't drop pure sql, since that is returned by batch insert and
292
+ # update queries
234
293
  unique_index_keys_to_s.map do |attribute|
235
294
  value = if pure_sql
236
295
  record[attribute]
@@ -261,13 +320,20 @@ module InventoryRefresh::SaveCollection
261
320
  def update_records!(all_attribute_keys, hashes, indexed_inventory_objects)
262
321
  return if hashes.blank?
263
322
 
323
+ unless inventory_collection.parallel_safe?
324
+ # We need to update the stored records before we save it, since hashes are modified
325
+ inventory_collection.store_updated_records(hashes)
326
+ end
327
+
264
328
  query = build_update_query(all_attribute_keys, hashes)
265
329
  result = get_connection.execute(query)
266
330
 
267
- # We will check for timestamp clashes of full row update and we will fallback to skeletal update
268
- inventory_collection.store_updated_records(result)
331
+ if inventory_collection.parallel_safe?
332
+ # We will check for timestamp clashes of full row update and we will fallback to skeletal update
333
+ inventory_collection.store_updated_records(result)
269
334
 
270
- skeletonize_ignored_records!(indexed_inventory_objects, result)
335
+ skeletonize_ignored_records!(indexed_inventory_objects, result)
336
+ end
271
337
 
272
338
  result
273
339
  end
@@ -287,7 +353,11 @@ module InventoryRefresh::SaveCollection
287
353
  hashes = []
288
354
  create_time = time_now
289
355
  batch.each do |index, inventory_object|
290
- hash = if serializable_keys?
356
+ hash = if inventory_collection.use_ar_object?
357
+ record = inventory_collection.model_class.new(attributes_index[index])
358
+ values_for_database!(all_attribute_keys,
359
+ record.attributes.symbolize_keys)
360
+ elsif serializable_keys?
291
361
  values_for_database!(all_attribute_keys,
292
362
  attributes_index[index])
293
363
  else
@@ -309,19 +379,24 @@ module InventoryRefresh::SaveCollection
309
379
  build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => :full)
310
380
  )
311
381
 
312
- # We've done upsert, so records were either created or updated. We can recognize that by checking if
313
- # created and updated timestamps are the same
314
- created_attr = "created_on" if inventory_collection.supports_column?(:created_on)
315
- created_attr ||= "created_at" if inventory_collection.supports_column?(:created_at)
316
- updated_attr = "updated_on" if inventory_collection.supports_column?(:updated_on)
317
- updated_attr ||= "updated_at" if inventory_collection.supports_column?(:updated_at)
318
-
319
- if created_attr && updated_attr
320
- created, updated = result.to_a.partition { |x| x[created_attr] == x[updated_attr] }
321
- inventory_collection.store_created_records(created)
322
- inventory_collection.store_updated_records(updated)
382
+ if inventory_collection.parallel_safe?
383
+ # We've done upsert, so records were either created or updated. We can recognize that by checking if
384
+ # created and updated timestamps are the same
385
+ created_attr = "created_on" if inventory_collection.supports_column?(:created_on)
386
+ created_attr ||= "created_at" if inventory_collection.supports_column?(:created_at)
387
+ updated_attr = "updated_on" if inventory_collection.supports_column?(:updated_on)
388
+ updated_attr ||= "updated_at" if inventory_collection.supports_column?(:updated_at)
389
+
390
+ if created_attr && updated_attr
391
+ created, updated = result.to_a.partition { |x| x[created_attr] == x[updated_attr] }
392
+ inventory_collection.store_created_records(created)
393
+ inventory_collection.store_updated_records(updated)
394
+ else
395
+ # The record doesn't have both created and updated attrs, so we'll take all as created
396
+ inventory_collection.store_created_records(result)
397
+ end
323
398
  else
324
- # The record doesn't have both created and updated attrs, so we'll take all as created
399
+ # We've done just insert, so all records were created
325
400
  inventory_collection.store_created_records(result)
326
401
  end
327
402
 
@@ -334,7 +409,9 @@ module InventoryRefresh::SaveCollection
334
409
  :on_conflict => on_conflict)
335
410
  end
336
411
 
337
- skeletonize_ignored_records!(indexed_inventory_objects, result, :all_unique_columns => true)
412
+ if inventory_collection.parallel_safe?
413
+ skeletonize_ignored_records!(indexed_inventory_objects, result, :all_unique_columns => true)
414
+ end
338
415
  end
339
416
 
340
417
  # Stores primary_key values of created records into associated InventoryObject objects.
@@ -365,6 +442,7 @@ module InventoryRefresh::SaveCollection
365
442
  inventory_object[ref] = attributes[ref]
366
443
 
367
444
  next unless (foreign_key = association_to_foreign_key_mapping[ref])
445
+
368
446
  base_class_name = attributes[association_to_foreign_type_mapping[ref].try(:to_sym)] || association_to_base_class_mapping[ref]
369
447
  id = attributes[foreign_key.to_sym]
370
448
  inventory_object[ref] = InventoryRefresh::ApplicationRecordReference.new(base_class_name, id)
@@ -0,0 +1,57 @@
1
+ require "inventory_refresh/save_collection/saver/base"
2
+
3
+ module InventoryRefresh::SaveCollection
4
+ module Saver
5
+ class Default < InventoryRefresh::SaveCollection::Saver::Base
6
+ private
7
+
8
+ # Updates the passed record with hash data and stores primary key value into inventory_object.
9
+ #
10
+ # @param record [ApplicationRecord] record we want to update in DB
11
+ # @param hash [Hash] data we want to update the record with
12
+ # @param inventory_object [InventoryRefresh::InventoryObject] InventoryObject instance where we will store primary
13
+ # key value
14
+ def update_record!(record, hash, inventory_object)
15
+ record.assign_attributes(hash.except(:id))
16
+ if !inventory_collection.check_changed? || record.changed?
17
+ record.save
18
+ inventory_collection.store_updated_records(record)
19
+ end
20
+
21
+ inventory_object.id = record.id
22
+ end
23
+
24
+ # Creates a new record in the DB using the passed hash data
25
+ #
26
+ # @param hash [Hash] hash with data we want to persist to DB
27
+ # @param inventory_object [InventoryRefresh::InventoryObject] InventoryObject instance where we will store primary
28
+ # key value
29
+ def create_record!(hash, inventory_object)
30
+ record = inventory_collection.model_class.create!(hash.except(:id))
31
+ inventory_collection.store_created_records(record)
32
+
33
+ inventory_object.id = record.id
34
+ end
35
+
36
+ # Asserts we do not have duplicate records in the DB. If the record is duplicate we will destroy it.
37
+ #
38
+ # @param record [ApplicationRecord] record we want to update in DB
39
+ # @param index [String] manager_uuid of the record
40
+ # @return [Boolean] false if the record is duplicate
41
+ def assert_unique_record(record, index)
42
+ # TODO(lsmola) can go away once we indexed our DB with unique indexes
43
+ if unique_db_indexes.include?(index) # Include on Set is O(1)
44
+ # We have a duplicate in the DB, destroy it. A find_each method does automatically .order(:id => :asc)
45
+ # so we always keep the oldest record in the case of duplicates.
46
+ logger.warn("A duplicate record was detected and destroyed, inventory_collection: "\
47
+ "'#{inventory_collection}', record: '#{record}', duplicate_index: '#{index}'")
48
+ record.destroy
49
+ return false
50
+ else
51
+ unique_db_indexes << index
52
+ end
53
+ true
54
+ end
55
+ end
56
+ end
57
+ end
@@ -59,7 +59,7 @@ module InventoryRefresh::SaveCollection
59
59
  skeletal_inventory_objects_index = {}
60
60
 
61
61
  inventory_collection.skeletal_primary_index.each_value do |inventory_object|
62
- attributes = inventory_object.class.attributes_with_keys(inventory_object.data, inventory_collection, all_attribute_keys, inventory_object)
62
+ attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys, inventory_object)
63
63
  index = build_stringified_reference(attributes, unique_index_keys)
64
64
 
65
65
  skeletal_attributes_index[index] = attributes
@@ -135,6 +135,7 @@ module InventoryRefresh::SaveCollection
135
135
  # We need to set correct timestamps_max for this particular attribute, based on what is in timestamps
136
136
  batch.each do |x|
137
137
  next unless x[:__non_serialized_versions][column_name]
138
+
138
139
  x[comparables_max_name] = x[:__non_serialized_versions][column_name]
139
140
  end
140
141
  end
@@ -200,25 +201,7 @@ module InventoryRefresh::SaveCollection
200
201
  )
201
202
  end
202
203
 
203
- def comparable_timestamp(timestamp)
204
- # Lets cast all timestamps to to_f, rounding the time comparing precision to miliseconds, that should be
205
- # enough, since we are the ones setting the record version in collector. Otherwise we will have hard time with
206
- # doing equality, since the value changes going through DB (DB. cuts it at 5 decimal places)
207
-
208
- if timestamp.kind_of?(String)
209
- Time.use_zone('UTC') { Time.zone.parse(timestamp) }.to_f.round(3)
210
- elsif timestamp.kind_of?(Time)
211
- timestamp.in_time_zone('UTC').to_f.round(3)
212
- else
213
- timestamp
214
- end
215
- end
216
-
217
204
  def skeletonize_or_skip_record(record_version, hash_version, record_versions_max, inventory_object)
218
- record_version = comparable_timestamp(record_version)
219
- record_versions_max = comparable_timestamp(record_versions_max) if record_versions_max
220
- hash_version = comparable_timestamp(hash_version)
221
-
222
205
  # Skip updating this record, because it is old
223
206
  return true if record_version && hash_version && record_version >= hash_version
224
207