inventory_refresh 0.3.6 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.codeclimate.yml +25 -30
  3. data/.github/workflows/ci.yaml +47 -0
  4. data/.rubocop.yml +3 -3
  5. data/.rubocop_cc.yml +3 -4
  6. data/.rubocop_local.yml +5 -2
  7. data/.whitesource +3 -0
  8. data/CHANGELOG.md +19 -0
  9. data/Gemfile +10 -4
  10. data/README.md +1 -2
  11. data/Rakefile +2 -2
  12. data/inventory_refresh.gemspec +8 -9
  13. data/lib/inventory_refresh/application_record_iterator.rb +25 -12
  14. data/lib/inventory_refresh/graph/topological_sort.rb +24 -26
  15. data/lib/inventory_refresh/graph.rb +2 -2
  16. data/lib/inventory_refresh/inventory_collection/builder.rb +37 -15
  17. data/lib/inventory_refresh/inventory_collection/data_storage.rb +9 -0
  18. data/lib/inventory_refresh/inventory_collection/helpers/initialize_helper.rb +147 -38
  19. data/lib/inventory_refresh/inventory_collection/helpers/questions_helper.rb +48 -4
  20. data/lib/inventory_refresh/inventory_collection/index/proxy.rb +35 -3
  21. data/lib/inventory_refresh/inventory_collection/index/type/base.rb +8 -0
  22. data/lib/inventory_refresh/inventory_collection/index/type/local_db.rb +2 -0
  23. data/lib/inventory_refresh/inventory_collection/index/type/skeletal.rb +1 -0
  24. data/lib/inventory_refresh/inventory_collection/reference.rb +1 -0
  25. data/lib/inventory_refresh/inventory_collection/references_storage.rb +17 -0
  26. data/lib/inventory_refresh/inventory_collection/scanner.rb +91 -3
  27. data/lib/inventory_refresh/inventory_collection/serialization.rb +16 -10
  28. data/lib/inventory_refresh/inventory_collection.rb +122 -64
  29. data/lib/inventory_refresh/inventory_object.rb +74 -40
  30. data/lib/inventory_refresh/inventory_object_lazy.rb +17 -10
  31. data/lib/inventory_refresh/null_logger.rb +2 -2
  32. data/lib/inventory_refresh/persister.rb +43 -93
  33. data/lib/inventory_refresh/save_collection/base.rb +4 -2
  34. data/lib/inventory_refresh/save_collection/saver/base.rb +114 -15
  35. data/lib/inventory_refresh/save_collection/saver/batch.rb +17 -0
  36. data/lib/inventory_refresh/save_collection/saver/concurrent_safe_batch.rb +129 -51
  37. data/lib/inventory_refresh/save_collection/saver/default.rb +57 -0
  38. data/lib/inventory_refresh/save_collection/saver/partial_upsert_helper.rb +2 -19
  39. data/lib/inventory_refresh/save_collection/saver/retention_helper.rb +68 -3
  40. data/lib/inventory_refresh/save_collection/saver/sql_helper.rb +125 -0
  41. data/lib/inventory_refresh/save_collection/saver/sql_helper_update.rb +10 -6
  42. data/lib/inventory_refresh/save_collection/saver/sql_helper_upsert.rb +28 -16
  43. data/lib/inventory_refresh/save_collection/sweeper.rb +17 -93
  44. data/lib/inventory_refresh/save_collection/topological_sort.rb +5 -5
  45. data/lib/inventory_refresh/save_inventory.rb +5 -12
  46. data/lib/inventory_refresh/target.rb +73 -0
  47. data/lib/inventory_refresh/target_collection.rb +92 -0
  48. data/lib/inventory_refresh/version.rb +1 -1
  49. data/lib/inventory_refresh.rb +2 -0
  50. metadata +34 -37
  51. data/.travis.yml +0 -23
  52. data/lib/inventory_refresh/exception.rb +0 -8
@@ -12,6 +12,8 @@ module InventoryRefresh::SaveCollection
12
12
  # @param inventory_collection [InventoryRefresh::InventoryCollection] InventoryCollection object we will be saving
13
13
  def initialize(inventory_collection)
14
14
  @inventory_collection = inventory_collection
15
+ # TODO(lsmola) do I need to reload every time? Also it should be enough to clear the associations.
16
+ inventory_collection.parent&.reload
15
17
  @association = inventory_collection.db_collection_for_comparison
16
18
 
17
19
  # Private attrs
@@ -19,17 +21,21 @@ module InventoryRefresh::SaveCollection
19
21
  @table_name = @model_class.table_name
20
22
  @q_table_name = get_connection.quote_table_name(@table_name)
21
23
  @primary_key = @model_class.primary_key
22
- @arel_primary_key = @model_class.arel_attribute(@primary_key)
24
+ @arel_primary_key = @model_class.arel_table[@primary_key]
23
25
  @unique_index_keys = inventory_collection.unique_index_keys
24
26
  @unique_index_keys_to_s = inventory_collection.manager_ref_to_cols.map(&:to_s)
25
27
  @select_keys = [@primary_key] + @unique_index_keys_to_s + internal_columns.map(&:to_s)
26
28
  @unique_db_primary_keys = Set.new
27
29
  @unique_db_indexes = Set.new
28
30
 
31
+ # Right now ApplicationRecordIterator in association is used for targeted refresh. Given the small amount of
32
+ # records flowing through there, we probably don't need to optimize that association to fetch a pure SQL.
33
+ @pure_sql_records_fetching = !inventory_collection.use_ar_object? && !@association.kind_of?(InventoryRefresh::ApplicationRecordIterator)
34
+
29
35
  @batch_size_for_persisting = inventory_collection.batch_size_pure_sql
30
- @batch_size = inventory_collection.use_ar_object? ? @batch_size_for_persisting : inventory_collection.batch_size
31
36
 
32
- @record_key_method = inventory_collection.pure_sql_record_fetching? ? :pure_sql_record_key : :ar_record_key
37
+ @batch_size = @pure_sql_records_fetching ? @batch_size_for_persisting : inventory_collection.batch_size
38
+ @record_key_method = @pure_sql_records_fetching ? :pure_sql_record_key : :ar_record_key
33
39
  @select_keys_indexes = @select_keys.each_with_object({}).with_index { |(key, obj), index| obj[key.to_s] = index }
34
40
  @pg_types = @model_class.attribute_names.each_with_object({}) do |key, obj|
35
41
  obj[key.to_sym] = inventory_collection.model_class.columns_hash[key]
@@ -69,8 +75,14 @@ module InventoryRefresh::SaveCollection
69
75
 
70
76
  # Saves the InventoryCollection
71
77
  def save_inventory_collection!
78
+ # If we have a targeted InventoryCollection that wouldn't do anything, quickly skip it
79
+ return if inventory_collection.noop?
80
+
81
+ # Delete_complement strategy using :all_manager_uuids attribute
82
+ delete_complement unless inventory_collection.delete_complement_noop?
83
+
72
84
  # Create/Update/Archive/Delete records based on InventoryCollection data and scope
73
- save!(association)
85
+ save!(association) unless inventory_collection.saving_noop?
74
86
  end
75
87
 
76
88
  protected
@@ -89,8 +101,6 @@ module InventoryRefresh::SaveCollection
89
101
  # @param attributes [Hash] attributes hash
90
102
  # @return [Hash] modified hash from parameter attributes with casted values
91
103
  def values_for_database!(all_attribute_keys, attributes)
92
- # TODO(lsmola) we'll need to fill default value from the DB to the NOT_NULL columns here, since sending NULL
93
- # to column with NOT_NULL constraint always fails, even if there is a default value
94
104
  all_attribute_keys.each do |key|
95
105
  next unless attributes.key?(key)
96
106
 
@@ -102,7 +112,11 @@ module InventoryRefresh::SaveCollection
102
112
  end
103
113
 
104
114
  def transform_to_hash!(all_attribute_keys, hash)
105
- if serializable_keys?
115
+ if inventory_collection.use_ar_object?
116
+ record = inventory_collection.model_class.new(hash)
117
+ values_for_database!(all_attribute_keys,
118
+ record.attributes.slice(*record.changed_attributes.keys).symbolize_keys)
119
+ elsif serializable_keys?
106
120
  values_for_database!(all_attribute_keys,
107
121
  hash)
108
122
  else
@@ -113,15 +127,99 @@ module InventoryRefresh::SaveCollection
113
127
  private
114
128
 
115
129
  attr_reader :unique_index_keys, :unique_index_keys_to_s, :select_keys, :unique_db_primary_keys, :unique_db_indexes,
116
- :primary_key, :arel_primary_key, :record_key_method, :select_keys_indexes,
130
+ :primary_key, :arel_primary_key, :record_key_method, :pure_sql_records_fetching, :select_keys_indexes,
117
131
  :batch_size, :batch_size_for_persisting, :model_class, :serializable_keys, :deserializable_keys, :pg_types, :table_name,
118
132
  :q_table_name
119
133
 
120
134
  delegate :supports_column?, :to => :inventory_collection
121
135
 
136
+ # Saves the InventoryCollection
137
+ #
138
+ # @param association [Symbol] An existing association on manager
139
+ def save!(association)
140
+ attributes_index = {}
141
+ inventory_objects_index = {}
142
+ inventory_collection.each do |inventory_object|
143
+ attributes = inventory_object.attributes(inventory_collection)
144
+ index = build_stringified_reference(attributes, unique_index_keys)
145
+
146
+ attributes_index[index] = attributes
147
+ inventory_objects_index[index] = inventory_object
148
+ end
149
+
150
+ logger.debug("Processing #{inventory_collection} of size #{inventory_collection.size}...")
151
+ # Records that are in the DB, we will be updating or deleting them.
152
+ ActiveRecord::Base.transaction do
153
+ association.find_each do |record|
154
+ index = build_stringified_reference_for_record(record, unique_index_keys)
155
+
156
+ next unless assert_distinct_relation(record.id)
157
+ next unless assert_unique_record(record, index)
158
+
159
+ inventory_object = inventory_objects_index.delete(index)
160
+ hash = attributes_index.delete(index)
161
+
162
+ if inventory_object.nil?
163
+ # Record was found in the DB but not sent for saving, that means it doesn't exist anymore and we should
164
+ # delete it from the DB.
165
+ delete_record!(record) if inventory_collection.delete_allowed?
166
+ elsif assert_referential_integrity(hash)
167
+ # Record was found in the DB and sent for saving, we will be updating the DB.
168
+ update_record!(record, hash, inventory_object)
169
+ end
170
+ end
171
+ end
172
+
173
+ inventory_collection.custom_reconnect_block&.call(inventory_collection, inventory_objects_index, attributes_index)
174
+
175
+ # Records that were not found in the DB but sent for saving, we will be creating these in the DB.
176
+ if inventory_collection.create_allowed?
177
+ ActiveRecord::Base.transaction do
178
+ inventory_objects_index.each do |index, inventory_object|
179
+ hash = attributes_index.delete(index)
180
+
181
+ create_record!(hash, inventory_object) if assert_referential_integrity(hash)
182
+ end
183
+ end
184
+ end
185
+ logger.debug("Processing #{inventory_collection}, "\
186
+ "created=#{inventory_collection.created_records.count}, "\
187
+ "updated=#{inventory_collection.updated_records.count}, "\
188
+ "deleted=#{inventory_collection.deleted_records.count}...Complete")
189
+ rescue => e
190
+ logger.error("Error when saving #{inventory_collection} with #{inventory_collection_details}. Message: #{e.message}")
191
+ raise e
192
+ end
193
+
122
194
  # @return [String] a string for logging purposes
123
195
  def inventory_collection_details
124
- "strategy: #{inventory_collection.strategy}, saver_strategy: #{inventory_collection.saver_strategy}"
196
+ "strategy: #{inventory_collection.strategy}, saver_strategy: #{inventory_collection.saver_strategy}, targeted: #{inventory_collection.targeted?}"
197
+ end
198
+
199
+ # @param record [ApplicationRecord] ApplicationRecord object
200
+ # @param key [Symbol] A key that is an attribute of the AR object
201
+ # @return [Object] Value of attribute name :key on the :record
202
+ def record_key(record, key)
203
+ record.public_send(key)
204
+ end
205
+
206
+ # Deletes a complement of referenced data
207
+ def delete_complement
208
+ raise(":delete_complement method is supported only for :saver_strategy => [:batch, :concurrent_safe_batch]")
209
+ end
210
+
211
+ # Deletes/soft-deletes a given record
212
+ #
213
+ # @param [ApplicationRecord] record we want to delete
214
+ def delete_record!(record)
215
+ record.public_send(inventory_collection.delete_method)
216
+ inventory_collection.store_deleted_records(record)
217
+ end
218
+
219
+ # @return [TrueClass] always return true, this method is redefined in default saver
220
+ def assert_unique_record(_record, _index)
221
+ # TODO(lsmola) can go away once we indexed our DB with unique indexes
222
+ true
125
223
  end
126
224
 
127
225
  # Check if relation provided is distinct, i.e. the relation should not return the same primary key value twice.
@@ -134,12 +232,12 @@ module InventoryRefresh::SaveCollection
134
232
  # Change the InventoryCollection's :association or :arel parameter to return distinct results. The :through
135
233
  # relations can return the same record multiple times. We don't want to do SELECT DISTINCT by default, since
136
234
  # it can be very slow.
137
- unless inventory_collection.assert_graph_integrity
235
+ if inventory_collection.assert_graph_integrity
236
+ raise("Please update :association or :arel for #{inventory_collection} to return a DISTINCT result. ")
237
+ else
138
238
  logger.warn("Please update :association or :arel for #{inventory_collection} to return a DISTINCT result. "\
139
239
  " The duplicate value is being ignored.")
140
240
  return false
141
- else
142
- raise("Please update :association or :arel for #{inventory_collection} to return a DISTINCT result. ")
143
241
  end
144
242
  else
145
243
  unique_db_primary_keys << primary_key_value
@@ -157,14 +255,15 @@ module InventoryRefresh::SaveCollection
157
255
  def assert_referential_integrity(hash)
158
256
  inventory_collection.fixed_foreign_keys.each do |x|
159
257
  next unless hash[x].nil?
258
+
160
259
  subject = "#{hash} of #{inventory_collection} because of missing foreign key #{x} for "\
161
260
  "#{inventory_collection.parent.class.name}:"\
162
261
  "#{inventory_collection.parent.try(:id)}"
163
- unless inventory_collection.assert_graph_integrity
262
+ if inventory_collection.assert_graph_integrity
263
+ raise("Referential integrity check violated for #{subject}")
264
+ else
164
265
  logger.warn("Referential integrity check violated, ignoring #{subject}")
165
266
  return false
166
- else
167
- raise("Referential integrity check violated for #{subject}")
168
267
  end
169
268
  end
170
269
  true
@@ -0,0 +1,17 @@
1
+ require "inventory_refresh/save_collection/saver/concurrent_safe_batch"
2
+
3
+ module InventoryRefresh::SaveCollection
4
+ module Saver
5
+ class Batch < InventoryRefresh::SaveCollection::Saver::ConcurrentSafeBatch
6
+ private
7
+
8
+ # Just returning manager ref transformed to column names, for strategies that do not expect to have unique DB
9
+ # indexes.
10
+ #
11
+ # @return [Array<Symbol>] manager ref transformed to column names
12
+ def unique_index_columns
13
+ inventory_collection.manager_ref_to_cols.map(&:to_sym)
14
+ end
15
+ end
16
+ end
17
+ end
@@ -46,6 +46,41 @@ module InventoryRefresh::SaveCollection
46
46
  record[select_keys_indexes[key]]
47
47
  end
48
48
 
49
+ # Returns iterator or relation based on settings
50
+ #
51
+ # @param association [Symbol] An existing association on manager
52
+ # @return [ActiveRecord::Relation, InventoryRefresh::ApplicationRecordIterator] iterator or relation based on settings
53
+ def batch_iterator(association)
54
+ if pure_sql_records_fetching
55
+ # Building fast iterator doing pure SQL query and therefore avoiding redundant creation of AR objects. The
56
+ # iterator responds to find_in_batches, so it acts like the AR relation. For targeted refresh, the association
57
+ # can already be ApplicationRecordIterator, so we will skip that.
58
+ pure_sql_iterator = lambda do |&block|
59
+ primary_key_offset = nil
60
+ loop do
61
+ relation = association.select(*select_keys)
62
+ .reorder("#{primary_key} ASC")
63
+ .limit(batch_size)
64
+ # Using rails way of comparing primary key instead of offset
65
+ relation = relation.where(arel_primary_key.gt(primary_key_offset)) if primary_key_offset
66
+ records = get_connection.query(relation.to_sql)
67
+ last_record = records.last
68
+ block.call(records)
69
+
70
+ break if records.size < batch_size
71
+
72
+ primary_key_offset = record_key(last_record, primary_key)
73
+ end
74
+ end
75
+
76
+ InventoryRefresh::ApplicationRecordIterator.new(:iterator => pure_sql_iterator)
77
+ else
78
+ # Normal Rails ActiveRecord::Relation where we can call find_in_batches or
79
+ # InventoryRefresh::ApplicationRecordIterator passed from targeted refresh
80
+ association
81
+ end
82
+ end
83
+
49
84
  # Saves the InventoryCollection
50
85
  #
51
86
  # @param association [Symbol] An existing association on manager
@@ -55,7 +90,7 @@ module InventoryRefresh::SaveCollection
55
90
  all_attribute_keys = Set.new + inventory_collection.batch_extra_attributes
56
91
 
57
92
  inventory_collection.each do |inventory_object|
58
- attributes = inventory_object.class.attributes_with_keys(inventory_object.data, inventory_collection, all_attribute_keys, inventory_object)
93
+ attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys, inventory_object)
59
94
  index = build_stringified_reference(attributes, unique_index_keys)
60
95
 
61
96
  # Interesting fact: not building attributes_index and using only inventory_objects_index doesn't do much
@@ -69,7 +104,7 @@ module InventoryRefresh::SaveCollection
69
104
  logger.debug("Processing #{inventory_collection} of size #{inventory_collection.size}...")
70
105
 
71
106
  unless inventory_collection.create_only?
72
- load_and_update_records!(association, inventory_objects_index, attributes_index, all_attribute_keys)
107
+ update_or_destroy_records!(batch_iterator(association), inventory_objects_index, attributes_index, all_attribute_keys)
73
108
  end
74
109
 
75
110
  unless inventory_collection.create_only?
@@ -78,11 +113,15 @@ module InventoryRefresh::SaveCollection
78
113
 
79
114
  # Records that were not found in the DB but sent for saving, we will be creating these in the DB.
80
115
  if inventory_collection.create_allowed?
116
+ on_conflict = inventory_collection.parallel_safe? ? :do_update : nil
117
+
81
118
  inventory_objects_index.each_slice(batch_size_for_persisting) do |batch|
82
- create_records!(all_attribute_keys, batch, attributes_index, :on_conflict => :do_update)
119
+ create_records!(all_attribute_keys, batch, attributes_index, :on_conflict => on_conflict)
83
120
  end
84
121
 
85
- create_or_update_partial_records(all_attribute_keys)
122
+ if inventory_collection.parallel_safe?
123
+ create_or_update_partial_records(all_attribute_keys)
124
+ end
86
125
  end
87
126
 
88
127
  logger.debug("Marking :last_seen_at of #{inventory_collection} of size #{inventory_collection.size}...")
@@ -103,7 +142,7 @@ module InventoryRefresh::SaveCollection
103
142
  end
104
143
 
105
144
  def expand_all_attribute_keys!(all_attribute_keys)
106
- %i(created_at updated_at created_on updated_on).each do |col|
145
+ %i[created_at updated_at created_on updated_on].each do |col|
107
146
  all_attribute_keys << col if supports_column?(col)
108
147
  end
109
148
  all_attribute_keys << :type if supports_sti?
@@ -111,7 +150,7 @@ module InventoryRefresh::SaveCollection
111
150
  end
112
151
 
113
152
  def mark_last_seen_at(attributes_index)
114
- return unless supports_column?(:last_seen_at)
153
+ return unless supports_column?(:last_seen_at) && inventory_collection.parallel_safe?
115
154
  return if attributes_index.blank?
116
155
 
117
156
  all_attribute_keys = [:last_seen_at]
@@ -124,7 +163,8 @@ module InventoryRefresh::SaveCollection
124
163
  get_connection.execute(query)
125
164
  end
126
165
 
127
- # Batch updates existing records that are in the DB using attributes_index.
166
+ # Batch updates existing records that are in the DB using attributes_index. And delete the ones that were not
167
+ # present in inventory_objects_index.
128
168
  #
129
169
  # @param records_batch_iterator [ActiveRecord::Relation, InventoryRefresh::ApplicationRecordIterator] iterator or
130
170
  # relation, both responding to :find_in_batches method
@@ -132,11 +172,12 @@ module InventoryRefresh::SaveCollection
132
172
  # @param attributes_index [Hash{String => Hash}] Hash of data hashes with only keys that are column names of the
133
173
  # models's table
134
174
  # @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
135
- def load_and_update_records!(records_batch_iterator, inventory_objects_index, attributes_index, all_attribute_keys)
136
- hashes_for_update = []
175
+ def update_or_destroy_records!(records_batch_iterator, inventory_objects_index, attributes_index, all_attribute_keys)
176
+ hashes_for_update = []
177
+ records_for_destroy = []
137
178
  indexed_inventory_objects = {}
138
179
 
139
- records_batch_iterator.find_in_batches(:batch_size => batch_size, :attributes_index => attributes_index) do |batch|
180
+ records_batch_iterator.find_in_batches(:batch_size => batch_size) do |batch|
140
181
  update_time = time_now
141
182
 
142
183
  batch.each do |record|
@@ -149,14 +190,20 @@ module InventoryRefresh::SaveCollection
149
190
  inventory_object = inventory_objects_index.delete(index)
150
191
  hash = attributes_index[index]
151
192
 
152
- if inventory_object
193
+ if inventory_object.nil?
194
+ # Record was found in the DB but not sent for saving, that means it doesn't exist anymore and we should
195
+ # delete it from the DB.
196
+ if inventory_collection.delete_allowed?
197
+ records_for_destroy << record
198
+ end
199
+ else
153
200
  # Record was found in the DB and sent for saving, we will be updating the DB.
154
201
  inventory_object.id = primary_key_value
155
202
  next unless assert_referential_integrity(hash)
203
+ next unless changed?(record, hash, all_attribute_keys)
156
204
 
157
- record_version = nil
158
- record_version_max = nil
159
- if supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys)
205
+ if inventory_collection.parallel_safe? &&
206
+ (supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys))
160
207
 
161
208
  version_attr, max_version_attr = if supports_remote_data_timestamp?(all_attribute_keys)
162
209
  [:resource_timestamp, :resource_timestamps_max]
@@ -164,16 +211,16 @@ module InventoryRefresh::SaveCollection
164
211
  [:resource_counter, :resource_counters_max]
165
212
  end
166
213
 
167
- record_version = record_key(record, version_attr.to_s)
168
- record_version_max = record_key(record, max_version_attr.to_s)
214
+ next if skeletonize_or_skip_record(record_key(record, version_attr),
215
+ hash[version_attr],
216
+ record_key(record, max_version_attr),
217
+ inventory_object)
169
218
  end
170
219
 
171
220
  hash_for_update = if inventory_collection.use_ar_object?
172
221
  record.assign_attributes(hash.except(:id))
173
- next unless changed?(record)
174
-
175
222
  values_for_database!(all_attribute_keys,
176
- hash)
223
+ record.attributes.symbolize_keys)
177
224
  elsif serializable_keys?
178
225
  # TODO(lsmola) hash data with current DB data to allow subset of data being sent,
179
226
  # otherwise we would nullify the not sent attributes. Test e.g. on disks in cloud
@@ -184,14 +231,6 @@ module InventoryRefresh::SaveCollection
184
231
  # otherwise we would nullify the not sent attributes. Test e.g. on disks in cloud
185
232
  hash
186
233
  end
187
-
188
- if supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys)
189
- next if skeletonize_or_skip_record(record_version,
190
- hash[version_attr],
191
- record_version_max,
192
- inventory_object)
193
- end
194
-
195
234
  assign_attributes_for_update!(hash_for_update, update_time)
196
235
 
197
236
  hash_for_update[:id] = primary_key_value
@@ -207,22 +246,39 @@ module InventoryRefresh::SaveCollection
207
246
  hashes_for_update = []
208
247
  indexed_inventory_objects = {}
209
248
  end
249
+
250
+ # Destroy in batches
251
+ if records_for_destroy.size >= batch_size_for_persisting
252
+ destroy_records!(records_for_destroy)
253
+ records_for_destroy = []
254
+ end
210
255
  end
211
256
 
212
257
  # Update the last batch
213
258
  update_records!(all_attribute_keys, hashes_for_update, indexed_inventory_objects)
214
259
  hashes_for_update = [] # Cleanup so GC can release it sooner
260
+
261
+ # Destroy the last batch
262
+ destroy_records!(records_for_destroy)
263
+ records_for_destroy = [] # Cleanup so GC can release it sooner
215
264
  end
216
265
 
217
- def changed?(record)
266
+ def changed?(_record, _hash, _all_attribute_keys)
218
267
  return true unless inventory_collection.check_changed?
219
268
 
220
- # If object was archived before, pass it to update so it can be unarchived
221
- return true if record.respond_to?(:archived_at) && record.archived_at
222
- # Skip if nothing changed
223
- return false if record.changed_attributes.empty?
224
- # Skip if we only changed the resource_timestamp, but data stays the same
225
- return false if record.changed_attributes.keys == ["resource_timestamp"]
269
+ # TODO(lsmola) this check needs to be disabled now, because it doesn't work with lazy_find having secondary
270
+ # indexes. Examples: we save a pod before we save a project, that means the project lazy_find won't evaluate,
271
+ # because we load it with secondary index and can't do skeletal precreate. Then when the object is being saved
272
+ # again, the lazy_find is evaluated, but the resource version is not changed, so the row is not saved.
273
+ #
274
+ # To keep this quick .changed? check, we might need to extend this, so the resource_version doesn't save until
275
+ # all lazy_links of the row are evaluated.
276
+ #
277
+ # if supports_resource_version?(all_attribute_keys) && supports_column?(resource_version_column)
278
+ # record_resource_version = record_key(record, resource_version_column.to_s)
279
+ #
280
+ # return record_resource_version != hash[resource_version_column]
281
+ # end
226
282
 
227
283
  true
228
284
  end
@@ -230,7 +286,10 @@ module InventoryRefresh::SaveCollection
230
286
  def db_columns_index(record, pure_sql: false)
231
287
  # Incoming values are in SQL string form.
232
288
  # TODO(lsmola) unify this behavior with object_index_with_keys method in InventoryCollection
289
+ # TODO(lsmola) maybe we can drop the whole pure sql fetching, since everything will be targeted refresh
233
290
  # with streaming refresh? Maybe just metrics and events will not be, but those should be upsert only
291
+ # TODO(lsmola) taking ^ in account, we can't drop pure sql, since that is returned by batch insert and
292
+ # update queries
234
293
  unique_index_keys_to_s.map do |attribute|
235
294
  value = if pure_sql
236
295
  record[attribute]
@@ -261,13 +320,20 @@ module InventoryRefresh::SaveCollection
261
320
  def update_records!(all_attribute_keys, hashes, indexed_inventory_objects)
262
321
  return if hashes.blank?
263
322
 
323
+ unless inventory_collection.parallel_safe?
324
+ # We need to update the stored records before we save it, since hashes are modified
325
+ inventory_collection.store_updated_records(hashes)
326
+ end
327
+
264
328
  query = build_update_query(all_attribute_keys, hashes)
265
329
  result = get_connection.execute(query)
266
330
 
267
- # We will check for timestamp clashes of full row update and we will fallback to skeletal update
268
- inventory_collection.store_updated_records(result)
331
+ if inventory_collection.parallel_safe?
332
+ # We will check for timestamp clashes of full row update and we will fallback to skeletal update
333
+ inventory_collection.store_updated_records(result)
269
334
 
270
- skeletonize_ignored_records!(indexed_inventory_objects, result)
335
+ skeletonize_ignored_records!(indexed_inventory_objects, result)
336
+ end
271
337
 
272
338
  result
273
339
  end
@@ -287,7 +353,11 @@ module InventoryRefresh::SaveCollection
287
353
  hashes = []
288
354
  create_time = time_now
289
355
  batch.each do |index, inventory_object|
290
- hash = if serializable_keys?
356
+ hash = if inventory_collection.use_ar_object?
357
+ record = inventory_collection.model_class.new(attributes_index[index])
358
+ values_for_database!(all_attribute_keys,
359
+ record.attributes.symbolize_keys)
360
+ elsif serializable_keys?
291
361
  values_for_database!(all_attribute_keys,
292
362
  attributes_index[index])
293
363
  else
@@ -309,19 +379,24 @@ module InventoryRefresh::SaveCollection
309
379
  build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => :full)
310
380
  )
311
381
 
312
- # We've done upsert, so records were either created or updated. We can recognize that by checking if
313
- # created and updated timestamps are the same
314
- created_attr = "created_on" if inventory_collection.supports_column?(:created_on)
315
- created_attr ||= "created_at" if inventory_collection.supports_column?(:created_at)
316
- updated_attr = "updated_on" if inventory_collection.supports_column?(:updated_on)
317
- updated_attr ||= "updated_at" if inventory_collection.supports_column?(:updated_at)
318
-
319
- if created_attr && updated_attr
320
- created, updated = result.to_a.partition { |x| x[created_attr] == x[updated_attr] }
321
- inventory_collection.store_created_records(created)
322
- inventory_collection.store_updated_records(updated)
382
+ if inventory_collection.parallel_safe?
383
+ # We've done upsert, so records were either created or updated. We can recognize that by checking if
384
+ # created and updated timestamps are the same
385
+ created_attr = "created_on" if inventory_collection.supports_column?(:created_on)
386
+ created_attr ||= "created_at" if inventory_collection.supports_column?(:created_at)
387
+ updated_attr = "updated_on" if inventory_collection.supports_column?(:updated_on)
388
+ updated_attr ||= "updated_at" if inventory_collection.supports_column?(:updated_at)
389
+
390
+ if created_attr && updated_attr
391
+ created, updated = result.to_a.partition { |x| x[created_attr] == x[updated_attr] }
392
+ inventory_collection.store_created_records(created)
393
+ inventory_collection.store_updated_records(updated)
394
+ else
395
+ # The record doesn't have both created and updated attrs, so we'll take all as created
396
+ inventory_collection.store_created_records(result)
397
+ end
323
398
  else
324
- # The record doesn't have both created and updated attrs, so we'll take all as created
399
+ # We've done just insert, so all records were created
325
400
  inventory_collection.store_created_records(result)
326
401
  end
327
402
 
@@ -334,7 +409,9 @@ module InventoryRefresh::SaveCollection
334
409
  :on_conflict => on_conflict)
335
410
  end
336
411
 
337
- skeletonize_ignored_records!(indexed_inventory_objects, result, :all_unique_columns => true)
412
+ if inventory_collection.parallel_safe?
413
+ skeletonize_ignored_records!(indexed_inventory_objects, result, :all_unique_columns => true)
414
+ end
338
415
  end
339
416
 
340
417
  # Stores primary_key values of created records into associated InventoryObject objects.
@@ -365,6 +442,7 @@ module InventoryRefresh::SaveCollection
365
442
  inventory_object[ref] = attributes[ref]
366
443
 
367
444
  next unless (foreign_key = association_to_foreign_key_mapping[ref])
445
+
368
446
  base_class_name = attributes[association_to_foreign_type_mapping[ref].try(:to_sym)] || association_to_base_class_mapping[ref]
369
447
  id = attributes[foreign_key.to_sym]
370
448
  inventory_object[ref] = InventoryRefresh::ApplicationRecordReference.new(base_class_name, id)
@@ -0,0 +1,57 @@
1
+ require "inventory_refresh/save_collection/saver/base"
2
+
3
+ module InventoryRefresh::SaveCollection
4
+ module Saver
5
+ class Default < InventoryRefresh::SaveCollection::Saver::Base
6
+ private
7
+
8
+ # Updates the passed record with hash data and stores primary key value into inventory_object.
9
+ #
10
+ # @param record [ApplicationRecord] record we want to update in DB
11
+ # @param hash [Hash] data we want to update the record with
12
+ # @param inventory_object [InventoryRefresh::InventoryObject] InventoryObject instance where we will store primary
13
+ # key value
14
+ def update_record!(record, hash, inventory_object)
15
+ record.assign_attributes(hash.except(:id))
16
+ if !inventory_collection.check_changed? || record.changed?
17
+ record.save
18
+ inventory_collection.store_updated_records(record)
19
+ end
20
+
21
+ inventory_object.id = record.id
22
+ end
23
+
24
+ # Creates a new record in the DB using the passed hash data
25
+ #
26
+ # @param hash [Hash] hash with data we want to persist to DB
27
+ # @param inventory_object [InventoryRefresh::InventoryObject] InventoryObject instance where we will store primary
28
+ # key value
29
+ def create_record!(hash, inventory_object)
30
+ record = inventory_collection.model_class.create!(hash.except(:id))
31
+ inventory_collection.store_created_records(record)
32
+
33
+ inventory_object.id = record.id
34
+ end
35
+
36
+ # Asserts we do not have duplicate records in the DB. If the record is duplicate we will destroy it.
37
+ #
38
+ # @param record [ApplicationRecord] record we want to update in DB
39
+ # @param index [String] manager_uuid of the record
40
+ # @return [Boolean] false if the record is duplicate
41
+ def assert_unique_record(record, index)
42
+ # TODO(lsmola) can go away once we indexed our DB with unique indexes
43
+ if unique_db_indexes.include?(index) # Include on Set is O(1)
44
+ # We have a duplicate in the DB, destroy it. A find_each method does automatically .order(:id => :asc)
45
+ # so we always keep the oldest record in the case of duplicates.
46
+ logger.warn("A duplicate record was detected and destroyed, inventory_collection: "\
47
+ "'#{inventory_collection}', record: '#{record}', duplicate_index: '#{index}'")
48
+ record.destroy
49
+ return false
50
+ else
51
+ unique_db_indexes << index
52
+ end
53
+ true
54
+ end
55
+ end
56
+ end
57
+ end
@@ -59,7 +59,7 @@ module InventoryRefresh::SaveCollection
59
59
  skeletal_inventory_objects_index = {}
60
60
 
61
61
  inventory_collection.skeletal_primary_index.each_value do |inventory_object|
62
- attributes = inventory_object.class.attributes_with_keys(inventory_object.data, inventory_collection, all_attribute_keys, inventory_object)
62
+ attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys, inventory_object)
63
63
  index = build_stringified_reference(attributes, unique_index_keys)
64
64
 
65
65
  skeletal_attributes_index[index] = attributes
@@ -135,6 +135,7 @@ module InventoryRefresh::SaveCollection
135
135
  # We need to set correct timestamps_max for this particular attribute, based on what is in timestamps
136
136
  batch.each do |x|
137
137
  next unless x[:__non_serialized_versions][column_name]
138
+
138
139
  x[comparables_max_name] = x[:__non_serialized_versions][column_name]
139
140
  end
140
141
  end
@@ -200,25 +201,7 @@ module InventoryRefresh::SaveCollection
200
201
  )
201
202
  end
202
203
 
203
- def comparable_timestamp(timestamp)
204
- # Lets cast all timestamps to to_f, rounding the time comparing precision to miliseconds, that should be
205
- # enough, since we are the ones setting the record version in collector. Otherwise we will have hard time with
206
- # doing equality, since the value changes going through DB (DB. cuts it at 5 decimal places)
207
-
208
- if timestamp.kind_of?(String)
209
- Time.use_zone('UTC') { Time.zone.parse(timestamp) }.to_f.round(3)
210
- elsif timestamp.kind_of?(Time)
211
- timestamp.in_time_zone('UTC').to_f.round(3)
212
- else
213
- timestamp
214
- end
215
- end
216
-
217
204
  def skeletonize_or_skip_record(record_version, hash_version, record_versions_max, inventory_object)
218
- record_version = comparable_timestamp(record_version)
219
- record_versions_max = comparable_timestamp(record_versions_max) if record_versions_max
220
- hash_version = comparable_timestamp(hash_version)
221
-
222
205
  # Skip updating this record, because it is old
223
206
  return true if record_version && hash_version && record_version >= hash_version
224
207