inventory_refresh 0.3.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.codeclimate.yml +25 -30
  3. data/.github/workflows/ci.yaml +47 -0
  4. data/.rubocop.yml +3 -3
  5. data/.rubocop_cc.yml +3 -4
  6. data/.rubocop_local.yml +5 -2
  7. data/.whitesource +3 -0
  8. data/CHANGELOG.md +19 -0
  9. data/Gemfile +10 -4
  10. data/README.md +1 -2
  11. data/Rakefile +2 -2
  12. data/inventory_refresh.gemspec +9 -10
  13. data/lib/inventory_refresh/application_record_iterator.rb +25 -12
  14. data/lib/inventory_refresh/graph/topological_sort.rb +24 -26
  15. data/lib/inventory_refresh/graph.rb +2 -2
  16. data/lib/inventory_refresh/inventory_collection/builder.rb +37 -15
  17. data/lib/inventory_refresh/inventory_collection/data_storage.rb +9 -0
  18. data/lib/inventory_refresh/inventory_collection/helpers/initialize_helper.rb +147 -38
  19. data/lib/inventory_refresh/inventory_collection/helpers/questions_helper.rb +49 -5
  20. data/lib/inventory_refresh/inventory_collection/index/proxy.rb +35 -3
  21. data/lib/inventory_refresh/inventory_collection/index/type/base.rb +8 -0
  22. data/lib/inventory_refresh/inventory_collection/index/type/local_db.rb +2 -0
  23. data/lib/inventory_refresh/inventory_collection/index/type/skeletal.rb +1 -0
  24. data/lib/inventory_refresh/inventory_collection/reference.rb +1 -0
  25. data/lib/inventory_refresh/inventory_collection/references_storage.rb +17 -0
  26. data/lib/inventory_refresh/inventory_collection/scanner.rb +91 -3
  27. data/lib/inventory_refresh/inventory_collection/serialization.rb +16 -10
  28. data/lib/inventory_refresh/inventory_collection.rb +122 -64
  29. data/lib/inventory_refresh/inventory_object.rb +74 -40
  30. data/lib/inventory_refresh/inventory_object_lazy.rb +17 -10
  31. data/lib/inventory_refresh/null_logger.rb +2 -2
  32. data/lib/inventory_refresh/persister.rb +31 -65
  33. data/lib/inventory_refresh/save_collection/base.rb +4 -2
  34. data/lib/inventory_refresh/save_collection/saver/base.rb +114 -15
  35. data/lib/inventory_refresh/save_collection/saver/batch.rb +17 -0
  36. data/lib/inventory_refresh/save_collection/saver/concurrent_safe_batch.rb +129 -51
  37. data/lib/inventory_refresh/save_collection/saver/default.rb +57 -0
  38. data/lib/inventory_refresh/save_collection/saver/partial_upsert_helper.rb +2 -19
  39. data/lib/inventory_refresh/save_collection/saver/retention_helper.rb +68 -3
  40. data/lib/inventory_refresh/save_collection/saver/sql_helper.rb +125 -0
  41. data/lib/inventory_refresh/save_collection/saver/sql_helper_update.rb +10 -6
  42. data/lib/inventory_refresh/save_collection/saver/sql_helper_upsert.rb +28 -16
  43. data/lib/inventory_refresh/save_collection/sweeper.rb +17 -93
  44. data/lib/inventory_refresh/save_collection/topological_sort.rb +5 -5
  45. data/lib/inventory_refresh/save_inventory.rb +5 -12
  46. data/lib/inventory_refresh/target.rb +73 -0
  47. data/lib/inventory_refresh/target_collection.rb +92 -0
  48. data/lib/inventory_refresh/version.rb +1 -1
  49. data/lib/inventory_refresh.rb +2 -0
  50. metadata +42 -39
  51. data/.travis.yml +0 -23
  52. data/lib/inventory_refresh/exception.rb +0 -8
@@ -46,6 +46,41 @@ module InventoryRefresh::SaveCollection
46
46
  record[select_keys_indexes[key]]
47
47
  end
48
48
 
49
+ # Returns iterator or relation based on settings
50
+ #
51
+ # @param association [Symbol] An existing association on manager
52
+ # @return [ActiveRecord::Relation, InventoryRefresh::ApplicationRecordIterator] iterator or relation based on settings
53
+ def batch_iterator(association)
54
+ if pure_sql_records_fetching
55
+ # Building fast iterator doing pure SQL query and therefore avoiding redundant creation of AR objects. The
56
+ # iterator responds to find_in_batches, so it acts like the AR relation. For targeted refresh, the association
57
+ # can already be ApplicationRecordIterator, so we will skip that.
58
+ pure_sql_iterator = lambda do |&block|
59
+ primary_key_offset = nil
60
+ loop do
61
+ relation = association.select(*select_keys)
62
+ .reorder("#{primary_key} ASC")
63
+ .limit(batch_size)
64
+ # Using rails way of comparing primary key instead of offset
65
+ relation = relation.where(arel_primary_key.gt(primary_key_offset)) if primary_key_offset
66
+ records = get_connection.query(relation.to_sql)
67
+ last_record = records.last
68
+ block.call(records)
69
+
70
+ break if records.size < batch_size
71
+
72
+ primary_key_offset = record_key(last_record, primary_key)
73
+ end
74
+ end
75
+
76
+ InventoryRefresh::ApplicationRecordIterator.new(:iterator => pure_sql_iterator)
77
+ else
78
+ # Normal Rails ActiveRecord::Relation where we can call find_in_batches or
79
+ # InventoryRefresh::ApplicationRecordIterator passed from targeted refresh
80
+ association
81
+ end
82
+ end
83
+
49
84
  # Saves the InventoryCollection
50
85
  #
51
86
  # @param association [Symbol] An existing association on manager
@@ -55,7 +90,7 @@ module InventoryRefresh::SaveCollection
55
90
  all_attribute_keys = Set.new + inventory_collection.batch_extra_attributes
56
91
 
57
92
  inventory_collection.each do |inventory_object|
58
- attributes = inventory_object.class.attributes_with_keys(inventory_object.data, inventory_collection, all_attribute_keys, inventory_object)
93
+ attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys, inventory_object)
59
94
  index = build_stringified_reference(attributes, unique_index_keys)
60
95
 
61
96
  # Interesting fact: not building attributes_index and using only inventory_objects_index doesn't do much
@@ -69,7 +104,7 @@ module InventoryRefresh::SaveCollection
69
104
  logger.debug("Processing #{inventory_collection} of size #{inventory_collection.size}...")
70
105
 
71
106
  unless inventory_collection.create_only?
72
- load_and_update_records!(association, inventory_objects_index, attributes_index, all_attribute_keys)
107
+ update_or_destroy_records!(batch_iterator(association), inventory_objects_index, attributes_index, all_attribute_keys)
73
108
  end
74
109
 
75
110
  unless inventory_collection.create_only?
@@ -78,11 +113,15 @@ module InventoryRefresh::SaveCollection
78
113
 
79
114
  # Records that were not found in the DB but sent for saving, we will be creating these in the DB.
80
115
  if inventory_collection.create_allowed?
116
+ on_conflict = inventory_collection.parallel_safe? ? :do_update : nil
117
+
81
118
  inventory_objects_index.each_slice(batch_size_for_persisting) do |batch|
82
- create_records!(all_attribute_keys, batch, attributes_index, :on_conflict => :do_update)
119
+ create_records!(all_attribute_keys, batch, attributes_index, :on_conflict => on_conflict)
83
120
  end
84
121
 
85
- create_or_update_partial_records(all_attribute_keys)
122
+ if inventory_collection.parallel_safe?
123
+ create_or_update_partial_records(all_attribute_keys)
124
+ end
86
125
  end
87
126
 
88
127
  logger.debug("Marking :last_seen_at of #{inventory_collection} of size #{inventory_collection.size}...")
@@ -103,7 +142,7 @@ module InventoryRefresh::SaveCollection
103
142
  end
104
143
 
105
144
  def expand_all_attribute_keys!(all_attribute_keys)
106
- %i(created_at updated_at created_on updated_on).each do |col|
145
+ %i[created_at updated_at created_on updated_on].each do |col|
107
146
  all_attribute_keys << col if supports_column?(col)
108
147
  end
109
148
  all_attribute_keys << :type if supports_sti?
@@ -111,7 +150,7 @@ module InventoryRefresh::SaveCollection
111
150
  end
112
151
 
113
152
  def mark_last_seen_at(attributes_index)
114
- return unless supports_column?(:last_seen_at)
153
+ return unless supports_column?(:last_seen_at) && inventory_collection.parallel_safe?
115
154
  return if attributes_index.blank?
116
155
 
117
156
  all_attribute_keys = [:last_seen_at]
@@ -124,7 +163,8 @@ module InventoryRefresh::SaveCollection
124
163
  get_connection.execute(query)
125
164
  end
126
165
 
127
- # Batch updates existing records that are in the DB using attributes_index.
166
+ # Batch updates existing records that are in the DB using attributes_index. And delete the ones that were not
167
+ # present in inventory_objects_index.
128
168
  #
129
169
  # @param records_batch_iterator [ActiveRecord::Relation, InventoryRefresh::ApplicationRecordIterator] iterator or
130
170
  # relation, both responding to :find_in_batches method
@@ -132,11 +172,12 @@ module InventoryRefresh::SaveCollection
132
172
  # @param attributes_index [Hash{String => Hash}] Hash of data hashes with only keys that are column names of the
133
173
  # models's table
134
174
  # @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
135
- def load_and_update_records!(records_batch_iterator, inventory_objects_index, attributes_index, all_attribute_keys)
136
- hashes_for_update = []
175
+ def update_or_destroy_records!(records_batch_iterator, inventory_objects_index, attributes_index, all_attribute_keys)
176
+ hashes_for_update = []
177
+ records_for_destroy = []
137
178
  indexed_inventory_objects = {}
138
179
 
139
- records_batch_iterator.find_in_batches(:batch_size => batch_size, :attributes_index => attributes_index) do |batch|
180
+ records_batch_iterator.find_in_batches(:batch_size => batch_size) do |batch|
140
181
  update_time = time_now
141
182
 
142
183
  batch.each do |record|
@@ -149,14 +190,20 @@ module InventoryRefresh::SaveCollection
149
190
  inventory_object = inventory_objects_index.delete(index)
150
191
  hash = attributes_index[index]
151
192
 
152
- if inventory_object
193
+ if inventory_object.nil?
194
+ # Record was found in the DB but not sent for saving, that means it doesn't exist anymore and we should
195
+ # delete it from the DB.
196
+ if inventory_collection.delete_allowed?
197
+ records_for_destroy << record
198
+ end
199
+ else
153
200
  # Record was found in the DB and sent for saving, we will be updating the DB.
154
201
  inventory_object.id = primary_key_value
155
202
  next unless assert_referential_integrity(hash)
203
+ next unless changed?(record, hash, all_attribute_keys)
156
204
 
157
- record_version = nil
158
- record_version_max = nil
159
- if supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys)
205
+ if inventory_collection.parallel_safe? &&
206
+ (supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys))
160
207
 
161
208
  version_attr, max_version_attr = if supports_remote_data_timestamp?(all_attribute_keys)
162
209
  [:resource_timestamp, :resource_timestamps_max]
@@ -164,16 +211,16 @@ module InventoryRefresh::SaveCollection
164
211
  [:resource_counter, :resource_counters_max]
165
212
  end
166
213
 
167
- record_version = record_key(record, version_attr.to_s)
168
- record_version_max = record_key(record, max_version_attr.to_s)
214
+ next if skeletonize_or_skip_record(record_key(record, version_attr),
215
+ hash[version_attr],
216
+ record_key(record, max_version_attr),
217
+ inventory_object)
169
218
  end
170
219
 
171
220
  hash_for_update = if inventory_collection.use_ar_object?
172
221
  record.assign_attributes(hash.except(:id))
173
- next unless changed?(record)
174
-
175
222
  values_for_database!(all_attribute_keys,
176
- hash)
223
+ record.attributes.symbolize_keys)
177
224
  elsif serializable_keys?
178
225
  # TODO(lsmola) hash data with current DB data to allow subset of data being sent,
179
226
  # otherwise we would nullify the not sent attributes. Test e.g. on disks in cloud
@@ -184,14 +231,6 @@ module InventoryRefresh::SaveCollection
184
231
  # otherwise we would nullify the not sent attributes. Test e.g. on disks in cloud
185
232
  hash
186
233
  end
187
-
188
- if supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys)
189
- next if skeletonize_or_skip_record(record_version,
190
- hash[version_attr],
191
- record_version_max,
192
- inventory_object)
193
- end
194
-
195
234
  assign_attributes_for_update!(hash_for_update, update_time)
196
235
 
197
236
  hash_for_update[:id] = primary_key_value
@@ -207,22 +246,39 @@ module InventoryRefresh::SaveCollection
207
246
  hashes_for_update = []
208
247
  indexed_inventory_objects = {}
209
248
  end
249
+
250
+ # Destroy in batches
251
+ if records_for_destroy.size >= batch_size_for_persisting
252
+ destroy_records!(records_for_destroy)
253
+ records_for_destroy = []
254
+ end
210
255
  end
211
256
 
212
257
  # Update the last batch
213
258
  update_records!(all_attribute_keys, hashes_for_update, indexed_inventory_objects)
214
259
  hashes_for_update = [] # Cleanup so GC can release it sooner
260
+
261
+ # Destroy the last batch
262
+ destroy_records!(records_for_destroy)
263
+ records_for_destroy = [] # Cleanup so GC can release it sooner
215
264
  end
216
265
 
217
- def changed?(record)
266
+ def changed?(_record, _hash, _all_attribute_keys)
218
267
  return true unless inventory_collection.check_changed?
219
268
 
220
- # If object was archived before, pass it to update so it can be unarchived
221
- return true if record.respond_to?(:archived_at) && record.archived_at
222
- # Skip if nothing changed
223
- return false if record.changed_attributes.empty?
224
- # Skip if we only changed the resource_timestamp, but data stays the same
225
- return false if record.changed_attributes.keys == ["resource_timestamp"]
269
+ # TODO(lsmola) this check needs to be disabled now, because it doesn't work with lazy_find having secondary
270
+ # indexes. Examples: we save a pod before we save a project, that means the project lazy_find won't evaluate,
271
+ # because we load it with secondary index and can't do skeletal precreate. Then when the object is being saved
272
+ # again, the lazy_find is evaluated, but the resource version is not changed, so the row is not saved.
273
+ #
274
+ # To keep this quick .changed? check, we might need to extend this, so the resource_version doesn't save until
275
+ # all lazy_links of the row are evaluated.
276
+ #
277
+ # if supports_resource_version?(all_attribute_keys) && supports_column?(resource_version_column)
278
+ # record_resource_version = record_key(record, resource_version_column.to_s)
279
+ #
280
+ # return record_resource_version != hash[resource_version_column]
281
+ # end
226
282
 
227
283
  true
228
284
  end
@@ -230,7 +286,10 @@ module InventoryRefresh::SaveCollection
230
286
  def db_columns_index(record, pure_sql: false)
231
287
  # Incoming values are in SQL string form.
232
288
  # TODO(lsmola) unify this behavior with object_index_with_keys method in InventoryCollection
289
+ # TODO(lsmola) maybe we can drop the whole pure sql fetching, since everything will be targeted refresh
233
290
  # with streaming refresh? Maybe just metrics and events will not be, but those should be upsert only
291
+ # TODO(lsmola) taking ^ in account, we can't drop pure sql, since that is returned by batch insert and
292
+ # update queries
234
293
  unique_index_keys_to_s.map do |attribute|
235
294
  value = if pure_sql
236
295
  record[attribute]
@@ -261,13 +320,20 @@ module InventoryRefresh::SaveCollection
261
320
  def update_records!(all_attribute_keys, hashes, indexed_inventory_objects)
262
321
  return if hashes.blank?
263
322
 
323
+ unless inventory_collection.parallel_safe?
324
+ # We need to update the stored records before we save it, since hashes are modified
325
+ inventory_collection.store_updated_records(hashes)
326
+ end
327
+
264
328
  query = build_update_query(all_attribute_keys, hashes)
265
329
  result = get_connection.execute(query)
266
330
 
267
- # We will check for timestamp clashes of full row update and we will fallback to skeletal update
268
- inventory_collection.store_updated_records(result)
331
+ if inventory_collection.parallel_safe?
332
+ # We will check for timestamp clashes of full row update and we will fallback to skeletal update
333
+ inventory_collection.store_updated_records(result)
269
334
 
270
- skeletonize_ignored_records!(indexed_inventory_objects, result)
335
+ skeletonize_ignored_records!(indexed_inventory_objects, result)
336
+ end
271
337
 
272
338
  result
273
339
  end
@@ -287,7 +353,11 @@ module InventoryRefresh::SaveCollection
287
353
  hashes = []
288
354
  create_time = time_now
289
355
  batch.each do |index, inventory_object|
290
- hash = if serializable_keys?
356
+ hash = if inventory_collection.use_ar_object?
357
+ record = inventory_collection.model_class.new(attributes_index[index])
358
+ values_for_database!(all_attribute_keys,
359
+ record.attributes.symbolize_keys)
360
+ elsif serializable_keys?
291
361
  values_for_database!(all_attribute_keys,
292
362
  attributes_index[index])
293
363
  else
@@ -309,19 +379,24 @@ module InventoryRefresh::SaveCollection
309
379
  build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => :full)
310
380
  )
311
381
 
312
- # We've done upsert, so records were either created or updated. We can recognize that by checking if
313
- # created and updated timestamps are the same
314
- created_attr = "created_on" if inventory_collection.supports_column?(:created_on)
315
- created_attr ||= "created_at" if inventory_collection.supports_column?(:created_at)
316
- updated_attr = "updated_on" if inventory_collection.supports_column?(:updated_on)
317
- updated_attr ||= "updated_at" if inventory_collection.supports_column?(:updated_at)
318
-
319
- if created_attr && updated_attr
320
- created, updated = result.to_a.partition { |x| x[created_attr] == x[updated_attr] }
321
- inventory_collection.store_created_records(created)
322
- inventory_collection.store_updated_records(updated)
382
+ if inventory_collection.parallel_safe?
383
+ # We've done upsert, so records were either created or updated. We can recognize that by checking if
384
+ # created and updated timestamps are the same
385
+ created_attr = "created_on" if inventory_collection.supports_column?(:created_on)
386
+ created_attr ||= "created_at" if inventory_collection.supports_column?(:created_at)
387
+ updated_attr = "updated_on" if inventory_collection.supports_column?(:updated_on)
388
+ updated_attr ||= "updated_at" if inventory_collection.supports_column?(:updated_at)
389
+
390
+ if created_attr && updated_attr
391
+ created, updated = result.to_a.partition { |x| x[created_attr] == x[updated_attr] }
392
+ inventory_collection.store_created_records(created)
393
+ inventory_collection.store_updated_records(updated)
394
+ else
395
+ # The record doesn't have both created and updated attrs, so we'll take all as created
396
+ inventory_collection.store_created_records(result)
397
+ end
323
398
  else
324
- # The record doesn't have both created and updated attrs, so we'll take all as created
399
+ # We've done just insert, so all records were created
325
400
  inventory_collection.store_created_records(result)
326
401
  end
327
402
 
@@ -334,7 +409,9 @@ module InventoryRefresh::SaveCollection
334
409
  :on_conflict => on_conflict)
335
410
  end
336
411
 
337
- skeletonize_ignored_records!(indexed_inventory_objects, result, :all_unique_columns => true)
412
+ if inventory_collection.parallel_safe?
413
+ skeletonize_ignored_records!(indexed_inventory_objects, result, :all_unique_columns => true)
414
+ end
338
415
  end
339
416
 
340
417
  # Stores primary_key values of created records into associated InventoryObject objects.
@@ -365,6 +442,7 @@ module InventoryRefresh::SaveCollection
365
442
  inventory_object[ref] = attributes[ref]
366
443
 
367
444
  next unless (foreign_key = association_to_foreign_key_mapping[ref])
445
+
368
446
  base_class_name = attributes[association_to_foreign_type_mapping[ref].try(:to_sym)] || association_to_base_class_mapping[ref]
369
447
  id = attributes[foreign_key.to_sym]
370
448
  inventory_object[ref] = InventoryRefresh::ApplicationRecordReference.new(base_class_name, id)
@@ -0,0 +1,57 @@
1
+ require "inventory_refresh/save_collection/saver/base"
2
+
3
+ module InventoryRefresh::SaveCollection
4
+ module Saver
5
+ class Default < InventoryRefresh::SaveCollection::Saver::Base
6
+ private
7
+
8
+ # Updates the passed record with hash data and stores primary key value into inventory_object.
9
+ #
10
+ # @param record [ApplicationRecord] record we want to update in DB
11
+ # @param hash [Hash] data we want to update the record with
12
+ # @param inventory_object [InventoryRefresh::InventoryObject] InventoryObject instance where we will store primary
13
+ # key value
14
+ def update_record!(record, hash, inventory_object)
15
+ record.assign_attributes(hash.except(:id))
16
+ if !inventory_collection.check_changed? || record.changed?
17
+ record.save
18
+ inventory_collection.store_updated_records(record)
19
+ end
20
+
21
+ inventory_object.id = record.id
22
+ end
23
+
24
+ # Creates a new record in the DB using the passed hash data
25
+ #
26
+ # @param hash [Hash] hash with data we want to persist to DB
27
+ # @param inventory_object [InventoryRefresh::InventoryObject] InventoryObject instance where we will store primary
28
+ # key value
29
+ def create_record!(hash, inventory_object)
30
+ record = inventory_collection.model_class.create!(hash.except(:id))
31
+ inventory_collection.store_created_records(record)
32
+
33
+ inventory_object.id = record.id
34
+ end
35
+
36
+ # Asserts we do not have duplicate records in the DB. If the record is duplicate we will destroy it.
37
+ #
38
+ # @param record [ApplicationRecord] record we want to update in DB
39
+ # @param index [String] manager_uuid of the record
40
+ # @return [Boolean] false if the record is duplicate
41
+ def assert_unique_record(record, index)
42
+ # TODO(lsmola) can go away once we indexed our DB with unique indexes
43
+ if unique_db_indexes.include?(index) # Include on Set is O(1)
44
+ # We have a duplicate in the DB, destroy it. A find_each method does automatically .order(:id => :asc)
45
+ # so we always keep the oldest record in the case of duplicates.
46
+ logger.warn("A duplicate record was detected and destroyed, inventory_collection: "\
47
+ "'#{inventory_collection}', record: '#{record}', duplicate_index: '#{index}'")
48
+ record.destroy
49
+ return false
50
+ else
51
+ unique_db_indexes << index
52
+ end
53
+ true
54
+ end
55
+ end
56
+ end
57
+ end
@@ -59,7 +59,7 @@ module InventoryRefresh::SaveCollection
59
59
  skeletal_inventory_objects_index = {}
60
60
 
61
61
  inventory_collection.skeletal_primary_index.each_value do |inventory_object|
62
- attributes = inventory_object.class.attributes_with_keys(inventory_object.data, inventory_collection, all_attribute_keys, inventory_object)
62
+ attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys, inventory_object)
63
63
  index = build_stringified_reference(attributes, unique_index_keys)
64
64
 
65
65
  skeletal_attributes_index[index] = attributes
@@ -135,6 +135,7 @@ module InventoryRefresh::SaveCollection
135
135
  # We need to set correct timestamps_max for this particular attribute, based on what is in timestamps
136
136
  batch.each do |x|
137
137
  next unless x[:__non_serialized_versions][column_name]
138
+
138
139
  x[comparables_max_name] = x[:__non_serialized_versions][column_name]
139
140
  end
140
141
  end
@@ -200,25 +201,7 @@ module InventoryRefresh::SaveCollection
200
201
  )
201
202
  end
202
203
 
203
- def comparable_timestamp(timestamp)
204
- # Lets cast all timestamps to to_f, rounding the time comparing precision to miliseconds, that should be
205
- # enough, since we are the ones setting the record version in collector. Otherwise we will have hard time with
206
- # doing equality, since the value changes going through DB (DB. cuts it at 5 decimal places)
207
-
208
- if timestamp.kind_of?(String)
209
- Time.use_zone('UTC') { Time.zone.parse(timestamp) }.to_f.round(3)
210
- elsif timestamp.kind_of?(Time)
211
- timestamp.in_time_zone('UTC').to_f.round(3)
212
- else
213
- timestamp
214
- end
215
- end
216
-
217
204
  def skeletonize_or_skip_record(record_version, hash_version, record_versions_max, inventory_object)
218
- record_version = comparable_timestamp(record_version)
219
- record_versions_max = comparable_timestamp(record_versions_max) if record_versions_max
220
- hash_version = comparable_timestamp(hash_version)
221
-
222
205
  # Skip updating this record, because it is old
223
206
  return true if record_version && hash_version && record_version >= hash_version
224
207
 
@@ -3,6 +3,28 @@ module InventoryRefresh::SaveCollection
3
3
  module RetentionHelper
4
4
  private
5
5
 
6
+ # Deletes a complement of referenced data
7
+ def delete_complement
8
+ return unless inventory_collection.delete_allowed?
9
+
10
+ all_manager_uuids_size = inventory_collection.all_manager_uuids.size
11
+
12
+ logger.debug("Processing :delete_complement of #{inventory_collection} of size "\
13
+ "#{all_manager_uuids_size}...")
14
+
15
+ query = complement_of!(inventory_collection.all_manager_uuids,
16
+ inventory_collection.all_manager_uuids_scope,
17
+ inventory_collection.all_manager_uuids_timestamp)
18
+
19
+ ids_of_non_active_entities = ActiveRecord::Base.connection.execute(query.to_sql).to_a
20
+ ids_of_non_active_entities.each_slice(10_000) do |batch|
21
+ destroy_records!(batch)
22
+ end
23
+
24
+ logger.debug("Processing :delete_complement of #{inventory_collection} of size "\
25
+ "#{all_manager_uuids_size}, deleted=#{inventory_collection.deleted_records.size}...Complete")
26
+ end
27
+
6
28
  # Applies strategy based on :retention_strategy parameter, or fallbacks to legacy_destroy_records.
7
29
  #
8
30
  # @param records [Array<ApplicationRecord, Hash, Array>] Records we want to delete or archive
@@ -13,9 +35,13 @@ module InventoryRefresh::SaveCollection
13
35
  return false unless inventory_collection.delete_allowed?
14
36
  return if records.blank?
15
37
 
16
- ids = ids_array(records)
17
- inventory_collection.store_deleted_records(ids)
18
- send("#{inventory_collection.retention_strategy}_all_records!", ids)
38
+ if inventory_collection.retention_strategy
39
+ ids = ids_array(records)
40
+ inventory_collection.store_deleted_records(ids)
41
+ send("#{inventory_collection.retention_strategy}_all_records!", ids)
42
+ else
43
+ legacy_destroy_records!(records)
44
+ end
19
45
  end
20
46
 
21
47
  # Convert records to list of ids in format [{:id => X}, {:id => Y}...]
@@ -45,6 +71,45 @@ module InventoryRefresh::SaveCollection
45
71
  def destroy_all_records!(records)
46
72
  inventory_collection.model_class.where(:id => records.map { |x| x[:id] }).delete_all
47
73
  end
74
+
75
+ # Deletes or sof-deletes records. If the model_class supports a custom class delete method, we will use it for
76
+ # batch soft-delete. This is the legacy method doing either ineffective deletion/archiving or requiring a method
77
+ # on a class.
78
+ #
79
+ # @param records [Array<ApplicationRecord, Hash>] Records we want to delete. If we have only hashes, we need to
80
+ # to fetch ApplicationRecord objects from the DB
81
+ def legacy_destroy_records!(records)
82
+ # Is the delete_method rails standard deleting method?
83
+ rails_delete = %i[destroy delete].include?(inventory_collection.delete_method)
84
+ if !rails_delete && inventory_collection.model_class.respond_to?(inventory_collection.delete_method)
85
+ # We have custom delete method defined on a class, that means it supports batch destroy
86
+ inventory_collection.store_deleted_records(records.map { |x| {:id => record_key(x, primary_key)} })
87
+ inventory_collection.model_class.public_send(inventory_collection.delete_method, records.map { |x| record_key(x, primary_key) })
88
+ else
89
+ legacy_ineffective_destroy_records(records)
90
+ end
91
+ end
92
+
93
+ # Very ineffective way of deleting records, but is needed if we want to invoke hooks.
94
+ #
95
+ # @param records [Array<ApplicationRecord, Hash>] Records we want to delete. If we have only hashes, we need to
96
+ # to fetch ApplicationRecord objects from the DB
97
+ def legacy_ineffective_destroy_records(records)
98
+ # We have either standard :destroy and :delete rails method, or custom instance level delete method
99
+ # Note: The standard :destroy and :delete rails method can't be batched because of the hooks and cascade destroy
100
+ ActiveRecord::Base.transaction do
101
+ if pure_sql_records_fetching
102
+ # For pure SQL fetching, we need to get the AR objects again, so we can call destroy
103
+ inventory_collection.model_class.where(:id => records.map { |x| record_key(x, primary_key) }).find_each do |record|
104
+ delete_record!(record)
105
+ end
106
+ else
107
+ records.each do |record|
108
+ delete_record!(record)
109
+ end
110
+ end
111
+ end
112
+ end
48
113
  end
49
114
  end
50
115
  end
@@ -8,6 +8,9 @@ module InventoryRefresh::SaveCollection
8
8
  module SqlHelper
9
9
  include InventoryRefresh::Logging
10
10
 
11
+ # TODO(lsmola) all below methods should be rewritten to arel, but we need to first extend arel to be able to do
12
+ # this
13
+
11
14
  extend ActiveSupport::Concern
12
15
 
13
16
  included do
@@ -80,6 +83,128 @@ module InventoryRefresh::SaveCollection
80
83
  "#{value}::#{sql_type}"
81
84
  end
82
85
  end
86
+
87
+ # Effective way of doing multiselect
88
+ #
89
+ # If we use "(col1, col2) IN [(a,e), (b,f), (b,e)]" it's not great, just with 10k batch, we see
90
+ # *** ActiveRecord::StatementInvalid Exception: PG::StatementTooComplex: ERROR: stack depth limit exceeded
91
+ # HINT: Increase the configuration parameter "max_stack_depth" (currently 2048kB), after ensuring the
92
+ # platform's stack depth limit is adequate.
93
+ #
94
+ # If we use "(col1 = a AND col2 = e) OR (col1 = b AND col2 = f) OR (col1 = b AND col2 = e)" with 10k batch, it
95
+ # takes about 6s and consumes 300MB, with 100k it takes ~1h and consume 3GB in Postgre process
96
+ #
97
+ # The best way seems to be using CTE, where the list of values we want to map is turned to 'table' and we just
98
+ # do RIGHT OUTER JOIN to get the complement of given identifiers. Tested on getting complement of 100k items,
99
+ # using 2 cols (:ems_ref and :uid_ems) from total 150k rows. It takes ~1s and 350MB in Postgre process
100
+ #
101
+ # @param manager_uuids [Array<String>, Array[Hash]] Array with manager_uuids of entities. The keys have to match
102
+ # inventory_collection.manager_ref. We allow passing just array of strings, if manager_ref.size ==1, to
103
+ # spare some memory
104
+ # @return [Arel::SelectManager] Arel for getting complement of uuids. This method modifies the passed
105
+ # manager_uuids to spare some memory
106
+ def complement_of!(manager_uuids, all_manager_uuids_scope, all_manager_uuids_timestamp)
107
+ all_attribute_keys = inventory_collection.manager_ref
108
+ all_attribute_keys_array = inventory_collection.manager_ref.map(&:to_s)
109
+
110
+ active_entities = Arel::Table.new(:active_entities)
111
+ active_entities_cte = Arel::Nodes::As.new(
112
+ active_entities,
113
+ Arel.sql("(#{active_entities_query(all_attribute_keys_array, manager_uuids)})")
114
+ )
115
+
116
+ all_entities = Arel::Table.new(:all_entities)
117
+ all_entities_cte = Arel::Nodes::As.new(
118
+ all_entities,
119
+ Arel.sql("(#{all_entities_query(all_manager_uuids_scope, all_manager_uuids_timestamp).select(:id, *all_attribute_keys_array).to_sql})")
120
+ )
121
+ join_condition = all_attribute_keys.map { |key| active_entities[key].eq(all_entities[key]) }.inject(:and)
122
+ where_condition = all_attribute_keys.map { |key| active_entities[key].eq(nil) }.inject(:and)
123
+
124
+ active_entities
125
+ .project(all_entities[:id])
126
+ .join(all_entities, Arel::Nodes::RightOuterJoin)
127
+ .on(join_condition)
128
+ .with(active_entities_cte, all_entities_cte)
129
+ .where(where_condition)
130
+ end
131
+
132
+ private
133
+
134
+ def all_entities_query(all_manager_uuids_scope, all_manager_uuids_timestamp)
135
+ all_entities_query = inventory_collection.full_collection_for_comparison
136
+ all_entities_query = all_entities_query.active if inventory_collection.retention_strategy == :archive
137
+
138
+ if all_manager_uuids_scope
139
+ scope_keys = all_manager_uuids_scope.first.keys.map { |x| association_to_foreign_key_mapping[x.to_sym] }.map(&:to_s)
140
+ scope = load_scope(all_manager_uuids_scope)
141
+ condition = inventory_collection.build_multi_selection_condition(scope, scope_keys)
142
+ all_entities_query = all_entities_query.where(condition)
143
+ end
144
+
145
+ if all_manager_uuids_timestamp && supports_column?(:resource_timestamp)
146
+ all_manager_uuids_timestamp = Time.parse(all_manager_uuids_timestamp).utc
147
+
148
+ date_field = model_class.arel_table[:resource_timestamp]
149
+ all_entities_query = all_entities_query.where(date_field.lt(all_manager_uuids_timestamp))
150
+ end
151
+ all_entities_query
152
+ end
153
+
154
+ def load_scope(all_manager_uuids_scope)
155
+ scope_keys = all_manager_uuids_scope.first.keys.to_set
156
+
157
+ all_manager_uuids_scope.map do |cond|
158
+ assert_scope!(scope_keys, cond)
159
+
160
+ cond.map do |key, value|
161
+ foreign_key = association_to_foreign_key_mapping[key.to_sym]
162
+ foreign_key_value = value.load&.id
163
+
164
+ assert_foreign_keys!(key, value, foreign_key, foreign_key_value)
165
+
166
+ [foreign_key, foreign_key_value]
167
+ end.to_h
168
+ end
169
+ end
170
+
171
+ def assert_scope!(scope_keys, cond)
172
+ if cond.keys.to_set != scope_keys
173
+ raise "'#{inventory_collection}' expected keys for :all_manager_uuids_scope are #{scope_keys.to_a}, got"\
174
+ " #{cond.keys}. Keys must be the same for all scopes provided."
175
+ end
176
+ end
177
+
178
+ def assert_foreign_keys!(key, value, foreign_key, foreign_key_value)
179
+ unless foreign_key
180
+ raise "'#{inventory_collection}' doesn't have relation :#{key} provided in :all_manager_uuids_scope."
181
+ end
182
+
183
+ unless foreign_key_value
184
+ raise "'#{inventory_collection}' couldn't load scope value :#{key} => #{value.inspect} provided in :all_manager_uuids_scope"
185
+ end
186
+ end
187
+
188
+ def active_entities_query(all_attribute_keys_array, manager_uuids)
189
+ connection = ActiveRecord::Base.connection
190
+
191
+ all_attribute_keys_array_q = all_attribute_keys_array.map { |x| quote_column_name(x) }
192
+ # For Postgre, only first set of values should contain the type casts
193
+ first_value = manager_uuids.shift.to_h
194
+ first_value = "(#{all_attribute_keys_array.map { |x| quote(connection, first_value[x], x, true) }.join(",")})"
195
+
196
+ # Rest of the values, without the type cast
197
+ values = manager_uuids.map! do |hash|
198
+ "(#{all_attribute_keys_array.map { |x| quote(connection, hash[x], x, false) }.join(",")})"
199
+ end.join(",")
200
+
201
+ values = values.blank? ? first_value : [first_value, values].join(",")
202
+
203
+ <<-SQL
204
+ SELECT *
205
+ FROM (VALUES #{values}) AS active_entities_table(#{all_attribute_keys_array_q.join(",")})
206
+ SQL
207
+ end
83
208
  end
84
209
  end
85
210
  end