inventory_refresh 0.3.3 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.codeclimate.yml +25 -30
  3. data/.github/workflows/ci.yaml +47 -0
  4. data/.rubocop.yml +3 -3
  5. data/.rubocop_cc.yml +3 -4
  6. data/.rubocop_local.yml +5 -2
  7. data/.whitesource +3 -0
  8. data/CHANGELOG.md +19 -0
  9. data/Gemfile +10 -4
  10. data/README.md +1 -2
  11. data/Rakefile +2 -2
  12. data/inventory_refresh.gemspec +9 -10
  13. data/lib/inventory_refresh/application_record_iterator.rb +25 -12
  14. data/lib/inventory_refresh/graph/topological_sort.rb +24 -26
  15. data/lib/inventory_refresh/graph.rb +2 -2
  16. data/lib/inventory_refresh/inventory_collection/builder.rb +37 -15
  17. data/lib/inventory_refresh/inventory_collection/data_storage.rb +9 -0
  18. data/lib/inventory_refresh/inventory_collection/helpers/initialize_helper.rb +147 -38
  19. data/lib/inventory_refresh/inventory_collection/helpers/questions_helper.rb +49 -5
  20. data/lib/inventory_refresh/inventory_collection/index/proxy.rb +35 -3
  21. data/lib/inventory_refresh/inventory_collection/index/type/base.rb +8 -0
  22. data/lib/inventory_refresh/inventory_collection/index/type/local_db.rb +2 -0
  23. data/lib/inventory_refresh/inventory_collection/index/type/skeletal.rb +1 -0
  24. data/lib/inventory_refresh/inventory_collection/reference.rb +1 -0
  25. data/lib/inventory_refresh/inventory_collection/references_storage.rb +17 -0
  26. data/lib/inventory_refresh/inventory_collection/scanner.rb +91 -3
  27. data/lib/inventory_refresh/inventory_collection/serialization.rb +16 -10
  28. data/lib/inventory_refresh/inventory_collection.rb +122 -64
  29. data/lib/inventory_refresh/inventory_object.rb +74 -40
  30. data/lib/inventory_refresh/inventory_object_lazy.rb +17 -10
  31. data/lib/inventory_refresh/null_logger.rb +2 -2
  32. data/lib/inventory_refresh/persister.rb +31 -65
  33. data/lib/inventory_refresh/save_collection/base.rb +4 -2
  34. data/lib/inventory_refresh/save_collection/saver/base.rb +114 -15
  35. data/lib/inventory_refresh/save_collection/saver/batch.rb +17 -0
  36. data/lib/inventory_refresh/save_collection/saver/concurrent_safe_batch.rb +129 -51
  37. data/lib/inventory_refresh/save_collection/saver/default.rb +57 -0
  38. data/lib/inventory_refresh/save_collection/saver/partial_upsert_helper.rb +2 -19
  39. data/lib/inventory_refresh/save_collection/saver/retention_helper.rb +68 -3
  40. data/lib/inventory_refresh/save_collection/saver/sql_helper.rb +125 -0
  41. data/lib/inventory_refresh/save_collection/saver/sql_helper_update.rb +10 -6
  42. data/lib/inventory_refresh/save_collection/saver/sql_helper_upsert.rb +28 -16
  43. data/lib/inventory_refresh/save_collection/sweeper.rb +17 -93
  44. data/lib/inventory_refresh/save_collection/topological_sort.rb +5 -5
  45. data/lib/inventory_refresh/save_inventory.rb +5 -12
  46. data/lib/inventory_refresh/target.rb +73 -0
  47. data/lib/inventory_refresh/target_collection.rb +92 -0
  48. data/lib/inventory_refresh/version.rb +1 -1
  49. data/lib/inventory_refresh.rb +2 -0
  50. metadata +42 -39
  51. data/.travis.yml +0 -23
  52. data/lib/inventory_refresh/exception.rb +0 -8
@@ -46,6 +46,41 @@ module InventoryRefresh::SaveCollection
46
46
  record[select_keys_indexes[key]]
47
47
  end
48
48
 
49
+ # Returns iterator or relation based on settings
50
+ #
51
+ # @param association [Symbol] An existing association on manager
52
+ # @return [ActiveRecord::Relation, InventoryRefresh::ApplicationRecordIterator] iterator or relation based on settings
53
+ def batch_iterator(association)
54
+ if pure_sql_records_fetching
55
+ # Building fast iterator doing pure SQL query and therefore avoiding redundant creation of AR objects. The
56
+ # iterator responds to find_in_batches, so it acts like the AR relation. For targeted refresh, the association
57
+ # can already be ApplicationRecordIterator, so we will skip that.
58
+ pure_sql_iterator = lambda do |&block|
59
+ primary_key_offset = nil
60
+ loop do
61
+ relation = association.select(*select_keys)
62
+ .reorder("#{primary_key} ASC")
63
+ .limit(batch_size)
64
+ # Using rails way of comparing primary key instead of offset
65
+ relation = relation.where(arel_primary_key.gt(primary_key_offset)) if primary_key_offset
66
+ records = get_connection.query(relation.to_sql)
67
+ last_record = records.last
68
+ block.call(records)
69
+
70
+ break if records.size < batch_size
71
+
72
+ primary_key_offset = record_key(last_record, primary_key)
73
+ end
74
+ end
75
+
76
+ InventoryRefresh::ApplicationRecordIterator.new(:iterator => pure_sql_iterator)
77
+ else
78
+ # Normal Rails ActiveRecord::Relation where we can call find_in_batches or
79
+ # InventoryRefresh::ApplicationRecordIterator passed from targeted refresh
80
+ association
81
+ end
82
+ end
83
+
49
84
  # Saves the InventoryCollection
50
85
  #
51
86
  # @param association [Symbol] An existing association on manager
@@ -55,7 +90,7 @@ module InventoryRefresh::SaveCollection
55
90
  all_attribute_keys = Set.new + inventory_collection.batch_extra_attributes
56
91
 
57
92
  inventory_collection.each do |inventory_object|
58
- attributes = inventory_object.class.attributes_with_keys(inventory_object.data, inventory_collection, all_attribute_keys, inventory_object)
93
+ attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys, inventory_object)
59
94
  index = build_stringified_reference(attributes, unique_index_keys)
60
95
 
61
96
  # Interesting fact: not building attributes_index and using only inventory_objects_index doesn't do much
@@ -69,7 +104,7 @@ module InventoryRefresh::SaveCollection
69
104
  logger.debug("Processing #{inventory_collection} of size #{inventory_collection.size}...")
70
105
 
71
106
  unless inventory_collection.create_only?
72
- load_and_update_records!(association, inventory_objects_index, attributes_index, all_attribute_keys)
107
+ update_or_destroy_records!(batch_iterator(association), inventory_objects_index, attributes_index, all_attribute_keys)
73
108
  end
74
109
 
75
110
  unless inventory_collection.create_only?
@@ -78,11 +113,15 @@ module InventoryRefresh::SaveCollection
78
113
 
79
114
  # Records that were not found in the DB but sent for saving, we will be creating these in the DB.
80
115
  if inventory_collection.create_allowed?
116
+ on_conflict = inventory_collection.parallel_safe? ? :do_update : nil
117
+
81
118
  inventory_objects_index.each_slice(batch_size_for_persisting) do |batch|
82
- create_records!(all_attribute_keys, batch, attributes_index, :on_conflict => :do_update)
119
+ create_records!(all_attribute_keys, batch, attributes_index, :on_conflict => on_conflict)
83
120
  end
84
121
 
85
- create_or_update_partial_records(all_attribute_keys)
122
+ if inventory_collection.parallel_safe?
123
+ create_or_update_partial_records(all_attribute_keys)
124
+ end
86
125
  end
87
126
 
88
127
  logger.debug("Marking :last_seen_at of #{inventory_collection} of size #{inventory_collection.size}...")
@@ -103,7 +142,7 @@ module InventoryRefresh::SaveCollection
103
142
  end
104
143
 
105
144
  def expand_all_attribute_keys!(all_attribute_keys)
106
- %i(created_at updated_at created_on updated_on).each do |col|
145
+ %i[created_at updated_at created_on updated_on].each do |col|
107
146
  all_attribute_keys << col if supports_column?(col)
108
147
  end
109
148
  all_attribute_keys << :type if supports_sti?
@@ -111,7 +150,7 @@ module InventoryRefresh::SaveCollection
111
150
  end
112
151
 
113
152
  def mark_last_seen_at(attributes_index)
114
- return unless supports_column?(:last_seen_at)
153
+ return unless supports_column?(:last_seen_at) && inventory_collection.parallel_safe?
115
154
  return if attributes_index.blank?
116
155
 
117
156
  all_attribute_keys = [:last_seen_at]
@@ -124,7 +163,8 @@ module InventoryRefresh::SaveCollection
124
163
  get_connection.execute(query)
125
164
  end
126
165
 
127
- # Batch updates existing records that are in the DB using attributes_index.
166
+ # Batch updates existing records that are in the DB using attributes_index. And delete the ones that were not
167
+ # present in inventory_objects_index.
128
168
  #
129
169
  # @param records_batch_iterator [ActiveRecord::Relation, InventoryRefresh::ApplicationRecordIterator] iterator or
130
170
  # relation, both responding to :find_in_batches method
@@ -132,11 +172,12 @@ module InventoryRefresh::SaveCollection
132
172
  # @param attributes_index [Hash{String => Hash}] Hash of data hashes with only keys that are column names of the
133
173
  # models's table
134
174
  # @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
135
- def load_and_update_records!(records_batch_iterator, inventory_objects_index, attributes_index, all_attribute_keys)
136
- hashes_for_update = []
175
+ def update_or_destroy_records!(records_batch_iterator, inventory_objects_index, attributes_index, all_attribute_keys)
176
+ hashes_for_update = []
177
+ records_for_destroy = []
137
178
  indexed_inventory_objects = {}
138
179
 
139
- records_batch_iterator.find_in_batches(:batch_size => batch_size, :attributes_index => attributes_index) do |batch|
180
+ records_batch_iterator.find_in_batches(:batch_size => batch_size) do |batch|
140
181
  update_time = time_now
141
182
 
142
183
  batch.each do |record|
@@ -149,14 +190,20 @@ module InventoryRefresh::SaveCollection
149
190
  inventory_object = inventory_objects_index.delete(index)
150
191
  hash = attributes_index[index]
151
192
 
152
- if inventory_object
193
+ if inventory_object.nil?
194
+ # Record was found in the DB but not sent for saving, that means it doesn't exist anymore and we should
195
+ # delete it from the DB.
196
+ if inventory_collection.delete_allowed?
197
+ records_for_destroy << record
198
+ end
199
+ else
153
200
  # Record was found in the DB and sent for saving, we will be updating the DB.
154
201
  inventory_object.id = primary_key_value
155
202
  next unless assert_referential_integrity(hash)
203
+ next unless changed?(record, hash, all_attribute_keys)
156
204
 
157
- record_version = nil
158
- record_version_max = nil
159
- if supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys)
205
+ if inventory_collection.parallel_safe? &&
206
+ (supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys))
160
207
 
161
208
  version_attr, max_version_attr = if supports_remote_data_timestamp?(all_attribute_keys)
162
209
  [:resource_timestamp, :resource_timestamps_max]
@@ -164,16 +211,16 @@ module InventoryRefresh::SaveCollection
164
211
  [:resource_counter, :resource_counters_max]
165
212
  end
166
213
 
167
- record_version = record_key(record, version_attr.to_s)
168
- record_version_max = record_key(record, max_version_attr.to_s)
214
+ next if skeletonize_or_skip_record(record_key(record, version_attr),
215
+ hash[version_attr],
216
+ record_key(record, max_version_attr),
217
+ inventory_object)
169
218
  end
170
219
 
171
220
  hash_for_update = if inventory_collection.use_ar_object?
172
221
  record.assign_attributes(hash.except(:id))
173
- next unless changed?(record)
174
-
175
222
  values_for_database!(all_attribute_keys,
176
- hash)
223
+ record.attributes.symbolize_keys)
177
224
  elsif serializable_keys?
178
225
  # TODO(lsmola) hash data with current DB data to allow subset of data being sent,
179
226
  # otherwise we would nullify the not sent attributes. Test e.g. on disks in cloud
@@ -184,14 +231,6 @@ module InventoryRefresh::SaveCollection
184
231
  # otherwise we would nullify the not sent attributes. Test e.g. on disks in cloud
185
232
  hash
186
233
  end
187
-
188
- if supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys)
189
- next if skeletonize_or_skip_record(record_version,
190
- hash[version_attr],
191
- record_version_max,
192
- inventory_object)
193
- end
194
-
195
234
  assign_attributes_for_update!(hash_for_update, update_time)
196
235
 
197
236
  hash_for_update[:id] = primary_key_value
@@ -207,22 +246,39 @@ module InventoryRefresh::SaveCollection
207
246
  hashes_for_update = []
208
247
  indexed_inventory_objects = {}
209
248
  end
249
+
250
+ # Destroy in batches
251
+ if records_for_destroy.size >= batch_size_for_persisting
252
+ destroy_records!(records_for_destroy)
253
+ records_for_destroy = []
254
+ end
210
255
  end
211
256
 
212
257
  # Update the last batch
213
258
  update_records!(all_attribute_keys, hashes_for_update, indexed_inventory_objects)
214
259
  hashes_for_update = [] # Cleanup so GC can release it sooner
260
+
261
+ # Destroy the last batch
262
+ destroy_records!(records_for_destroy)
263
+ records_for_destroy = [] # Cleanup so GC can release it sooner
215
264
  end
216
265
 
217
- def changed?(record)
266
+ def changed?(_record, _hash, _all_attribute_keys)
218
267
  return true unless inventory_collection.check_changed?
219
268
 
220
- # If object was archived before, pass it to update so it can be unarchived
221
- return true if record.respond_to?(:archived_at) && record.archived_at
222
- # Skip if nothing changed
223
- return false if record.changed_attributes.empty?
224
- # Skip if we only changed the resource_timestamp, but data stays the same
225
- return false if record.changed_attributes.keys == ["resource_timestamp"]
269
+ # TODO(lsmola) this check needs to be disabled now, because it doesn't work with lazy_find having secondary
270
+ # indexes. Examples: we save a pod before we save a project, that means the project lazy_find won't evaluate,
271
+ # because we load it with secondary index and can't do skeletal precreate. Then when the object is being saved
272
+ # again, the lazy_find is evaluated, but the resource version is not changed, so the row is not saved.
273
+ #
274
+ # To keep this quick .changed? check, we might need to extend this, so the resource_version doesn't save until
275
+ # all lazy_links of the row are evaluated.
276
+ #
277
+ # if supports_resource_version?(all_attribute_keys) && supports_column?(resource_version_column)
278
+ # record_resource_version = record_key(record, resource_version_column.to_s)
279
+ #
280
+ # return record_resource_version != hash[resource_version_column]
281
+ # end
226
282
 
227
283
  true
228
284
  end
@@ -230,7 +286,10 @@ module InventoryRefresh::SaveCollection
230
286
  def db_columns_index(record, pure_sql: false)
231
287
  # Incoming values are in SQL string form.
232
288
  # TODO(lsmola) unify this behavior with object_index_with_keys method in InventoryCollection
289
+ # TODO(lsmola) maybe we can drop the whole pure sql fetching, since everything will be targeted refresh
233
290
  # with streaming refresh? Maybe just metrics and events will not be, but those should be upsert only
291
+ # TODO(lsmola) taking ^ in account, we can't drop pure sql, since that is returned by batch insert and
292
+ # update queries
234
293
  unique_index_keys_to_s.map do |attribute|
235
294
  value = if pure_sql
236
295
  record[attribute]
@@ -261,13 +320,20 @@ module InventoryRefresh::SaveCollection
261
320
  def update_records!(all_attribute_keys, hashes, indexed_inventory_objects)
262
321
  return if hashes.blank?
263
322
 
323
+ unless inventory_collection.parallel_safe?
324
+ # We need to update the stored records before we save it, since hashes are modified
325
+ inventory_collection.store_updated_records(hashes)
326
+ end
327
+
264
328
  query = build_update_query(all_attribute_keys, hashes)
265
329
  result = get_connection.execute(query)
266
330
 
267
- # We will check for timestamp clashes of full row update and we will fallback to skeletal update
268
- inventory_collection.store_updated_records(result)
331
+ if inventory_collection.parallel_safe?
332
+ # We will check for timestamp clashes of full row update and we will fallback to skeletal update
333
+ inventory_collection.store_updated_records(result)
269
334
 
270
- skeletonize_ignored_records!(indexed_inventory_objects, result)
335
+ skeletonize_ignored_records!(indexed_inventory_objects, result)
336
+ end
271
337
 
272
338
  result
273
339
  end
@@ -287,7 +353,11 @@ module InventoryRefresh::SaveCollection
287
353
  hashes = []
288
354
  create_time = time_now
289
355
  batch.each do |index, inventory_object|
290
- hash = if serializable_keys?
356
+ hash = if inventory_collection.use_ar_object?
357
+ record = inventory_collection.model_class.new(attributes_index[index])
358
+ values_for_database!(all_attribute_keys,
359
+ record.attributes.symbolize_keys)
360
+ elsif serializable_keys?
291
361
  values_for_database!(all_attribute_keys,
292
362
  attributes_index[index])
293
363
  else
@@ -309,19 +379,24 @@ module InventoryRefresh::SaveCollection
309
379
  build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => :full)
310
380
  )
311
381
 
312
- # We've done upsert, so records were either created or updated. We can recognize that by checking if
313
- # created and updated timestamps are the same
314
- created_attr = "created_on" if inventory_collection.supports_column?(:created_on)
315
- created_attr ||= "created_at" if inventory_collection.supports_column?(:created_at)
316
- updated_attr = "updated_on" if inventory_collection.supports_column?(:updated_on)
317
- updated_attr ||= "updated_at" if inventory_collection.supports_column?(:updated_at)
318
-
319
- if created_attr && updated_attr
320
- created, updated = result.to_a.partition { |x| x[created_attr] == x[updated_attr] }
321
- inventory_collection.store_created_records(created)
322
- inventory_collection.store_updated_records(updated)
382
+ if inventory_collection.parallel_safe?
383
+ # We've done upsert, so records were either created or updated. We can recognize that by checking if
384
+ # created and updated timestamps are the same
385
+ created_attr = "created_on" if inventory_collection.supports_column?(:created_on)
386
+ created_attr ||= "created_at" if inventory_collection.supports_column?(:created_at)
387
+ updated_attr = "updated_on" if inventory_collection.supports_column?(:updated_on)
388
+ updated_attr ||= "updated_at" if inventory_collection.supports_column?(:updated_at)
389
+
390
+ if created_attr && updated_attr
391
+ created, updated = result.to_a.partition { |x| x[created_attr] == x[updated_attr] }
392
+ inventory_collection.store_created_records(created)
393
+ inventory_collection.store_updated_records(updated)
394
+ else
395
+ # The record doesn't have both created and updated attrs, so we'll take all as created
396
+ inventory_collection.store_created_records(result)
397
+ end
323
398
  else
324
- # The record doesn't have both created and updated attrs, so we'll take all as created
399
+ # We've done just insert, so all records were created
325
400
  inventory_collection.store_created_records(result)
326
401
  end
327
402
 
@@ -334,7 +409,9 @@ module InventoryRefresh::SaveCollection
334
409
  :on_conflict => on_conflict)
335
410
  end
336
411
 
337
- skeletonize_ignored_records!(indexed_inventory_objects, result, :all_unique_columns => true)
412
+ if inventory_collection.parallel_safe?
413
+ skeletonize_ignored_records!(indexed_inventory_objects, result, :all_unique_columns => true)
414
+ end
338
415
  end
339
416
 
340
417
  # Stores primary_key values of created records into associated InventoryObject objects.
@@ -365,6 +442,7 @@ module InventoryRefresh::SaveCollection
365
442
  inventory_object[ref] = attributes[ref]
366
443
 
367
444
  next unless (foreign_key = association_to_foreign_key_mapping[ref])
445
+
368
446
  base_class_name = attributes[association_to_foreign_type_mapping[ref].try(:to_sym)] || association_to_base_class_mapping[ref]
369
447
  id = attributes[foreign_key.to_sym]
370
448
  inventory_object[ref] = InventoryRefresh::ApplicationRecordReference.new(base_class_name, id)
@@ -0,0 +1,57 @@
1
+ require "inventory_refresh/save_collection/saver/base"
2
+
3
+ module InventoryRefresh::SaveCollection
4
+ module Saver
5
+ class Default < InventoryRefresh::SaveCollection::Saver::Base
6
+ private
7
+
8
+ # Updates the passed record with hash data and stores primary key value into inventory_object.
9
+ #
10
+ # @param record [ApplicationRecord] record we want to update in DB
11
+ # @param hash [Hash] data we want to update the record with
12
+ # @param inventory_object [InventoryRefresh::InventoryObject] InventoryObject instance where we will store primary
13
+ # key value
14
+ def update_record!(record, hash, inventory_object)
15
+ record.assign_attributes(hash.except(:id))
16
+ if !inventory_collection.check_changed? || record.changed?
17
+ record.save
18
+ inventory_collection.store_updated_records(record)
19
+ end
20
+
21
+ inventory_object.id = record.id
22
+ end
23
+
24
+ # Creates a new record in the DB using the passed hash data
25
+ #
26
+ # @param hash [Hash] hash with data we want to persist to DB
27
+ # @param inventory_object [InventoryRefresh::InventoryObject] InventoryObject instance where we will store primary
28
+ # key value
29
+ def create_record!(hash, inventory_object)
30
+ record = inventory_collection.model_class.create!(hash.except(:id))
31
+ inventory_collection.store_created_records(record)
32
+
33
+ inventory_object.id = record.id
34
+ end
35
+
36
+ # Asserts we do not have duplicate records in the DB. If the record is duplicate we will destroy it.
37
+ #
38
+ # @param record [ApplicationRecord] record we want to update in DB
39
+ # @param index [String] manager_uuid of the record
40
+ # @return [Boolean] false if the record is duplicate
41
+ def assert_unique_record(record, index)
42
+ # TODO(lsmola) can go away once we indexed our DB with unique indexes
43
+ if unique_db_indexes.include?(index) # Include on Set is O(1)
44
+ # We have a duplicate in the DB, destroy it. A find_each method does automatically .order(:id => :asc)
45
+ # so we always keep the oldest record in the case of duplicates.
46
+ logger.warn("A duplicate record was detected and destroyed, inventory_collection: "\
47
+ "'#{inventory_collection}', record: '#{record}', duplicate_index: '#{index}'")
48
+ record.destroy
49
+ return false
50
+ else
51
+ unique_db_indexes << index
52
+ end
53
+ true
54
+ end
55
+ end
56
+ end
57
+ end
@@ -59,7 +59,7 @@ module InventoryRefresh::SaveCollection
59
59
  skeletal_inventory_objects_index = {}
60
60
 
61
61
  inventory_collection.skeletal_primary_index.each_value do |inventory_object|
62
- attributes = inventory_object.class.attributes_with_keys(inventory_object.data, inventory_collection, all_attribute_keys, inventory_object)
62
+ attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys, inventory_object)
63
63
  index = build_stringified_reference(attributes, unique_index_keys)
64
64
 
65
65
  skeletal_attributes_index[index] = attributes
@@ -135,6 +135,7 @@ module InventoryRefresh::SaveCollection
135
135
  # We need to set correct timestamps_max for this particular attribute, based on what is in timestamps
136
136
  batch.each do |x|
137
137
  next unless x[:__non_serialized_versions][column_name]
138
+
138
139
  x[comparables_max_name] = x[:__non_serialized_versions][column_name]
139
140
  end
140
141
  end
@@ -200,25 +201,7 @@ module InventoryRefresh::SaveCollection
200
201
  )
201
202
  end
202
203
 
203
- def comparable_timestamp(timestamp)
204
- # Lets cast all timestamps to to_f, rounding the time comparing precision to miliseconds, that should be
205
- # enough, since we are the ones setting the record version in collector. Otherwise we will have hard time with
206
- # doing equality, since the value changes going through DB (DB. cuts it at 5 decimal places)
207
-
208
- if timestamp.kind_of?(String)
209
- Time.use_zone('UTC') { Time.zone.parse(timestamp) }.to_f.round(3)
210
- elsif timestamp.kind_of?(Time)
211
- timestamp.in_time_zone('UTC').to_f.round(3)
212
- else
213
- timestamp
214
- end
215
- end
216
-
217
204
  def skeletonize_or_skip_record(record_version, hash_version, record_versions_max, inventory_object)
218
- record_version = comparable_timestamp(record_version)
219
- record_versions_max = comparable_timestamp(record_versions_max) if record_versions_max
220
- hash_version = comparable_timestamp(hash_version)
221
-
222
205
  # Skip updating this record, because it is old
223
206
  return true if record_version && hash_version && record_version >= hash_version
224
207
 
@@ -3,6 +3,28 @@ module InventoryRefresh::SaveCollection
3
3
  module RetentionHelper
4
4
  private
5
5
 
6
+ # Deletes a complement of referenced data
7
+ def delete_complement
8
+ return unless inventory_collection.delete_allowed?
9
+
10
+ all_manager_uuids_size = inventory_collection.all_manager_uuids.size
11
+
12
+ logger.debug("Processing :delete_complement of #{inventory_collection} of size "\
13
+ "#{all_manager_uuids_size}...")
14
+
15
+ query = complement_of!(inventory_collection.all_manager_uuids,
16
+ inventory_collection.all_manager_uuids_scope,
17
+ inventory_collection.all_manager_uuids_timestamp)
18
+
19
+ ids_of_non_active_entities = ActiveRecord::Base.connection.execute(query.to_sql).to_a
20
+ ids_of_non_active_entities.each_slice(10_000) do |batch|
21
+ destroy_records!(batch)
22
+ end
23
+
24
+ logger.debug("Processing :delete_complement of #{inventory_collection} of size "\
25
+ "#{all_manager_uuids_size}, deleted=#{inventory_collection.deleted_records.size}...Complete")
26
+ end
27
+
6
28
  # Applies strategy based on :retention_strategy parameter, or fallbacks to legacy_destroy_records.
7
29
  #
8
30
  # @param records [Array<ApplicationRecord, Hash, Array>] Records we want to delete or archive
@@ -13,9 +35,13 @@ module InventoryRefresh::SaveCollection
13
35
  return false unless inventory_collection.delete_allowed?
14
36
  return if records.blank?
15
37
 
16
- ids = ids_array(records)
17
- inventory_collection.store_deleted_records(ids)
18
- send("#{inventory_collection.retention_strategy}_all_records!", ids)
38
+ if inventory_collection.retention_strategy
39
+ ids = ids_array(records)
40
+ inventory_collection.store_deleted_records(ids)
41
+ send("#{inventory_collection.retention_strategy}_all_records!", ids)
42
+ else
43
+ legacy_destroy_records!(records)
44
+ end
19
45
  end
20
46
 
21
47
  # Convert records to list of ids in format [{:id => X}, {:id => Y}...]
@@ -45,6 +71,45 @@ module InventoryRefresh::SaveCollection
45
71
  def destroy_all_records!(records)
46
72
  inventory_collection.model_class.where(:id => records.map { |x| x[:id] }).delete_all
47
73
  end
74
+
75
+ # Deletes or sof-deletes records. If the model_class supports a custom class delete method, we will use it for
76
+ # batch soft-delete. This is the legacy method doing either ineffective deletion/archiving or requiring a method
77
+ # on a class.
78
+ #
79
+ # @param records [Array<ApplicationRecord, Hash>] Records we want to delete. If we have only hashes, we need to
80
+ # to fetch ApplicationRecord objects from the DB
81
+ def legacy_destroy_records!(records)
82
+ # Is the delete_method rails standard deleting method?
83
+ rails_delete = %i[destroy delete].include?(inventory_collection.delete_method)
84
+ if !rails_delete && inventory_collection.model_class.respond_to?(inventory_collection.delete_method)
85
+ # We have custom delete method defined on a class, that means it supports batch destroy
86
+ inventory_collection.store_deleted_records(records.map { |x| {:id => record_key(x, primary_key)} })
87
+ inventory_collection.model_class.public_send(inventory_collection.delete_method, records.map { |x| record_key(x, primary_key) })
88
+ else
89
+ legacy_ineffective_destroy_records(records)
90
+ end
91
+ end
92
+
93
+ # Very ineffective way of deleting records, but is needed if we want to invoke hooks.
94
+ #
95
+ # @param records [Array<ApplicationRecord, Hash>] Records we want to delete. If we have only hashes, we need to
96
+ # to fetch ApplicationRecord objects from the DB
97
+ def legacy_ineffective_destroy_records(records)
98
+ # We have either standard :destroy and :delete rails method, or custom instance level delete method
99
+ # Note: The standard :destroy and :delete rails method can't be batched because of the hooks and cascade destroy
100
+ ActiveRecord::Base.transaction do
101
+ if pure_sql_records_fetching
102
+ # For pure SQL fetching, we need to get the AR objects again, so we can call destroy
103
+ inventory_collection.model_class.where(:id => records.map { |x| record_key(x, primary_key) }).find_each do |record|
104
+ delete_record!(record)
105
+ end
106
+ else
107
+ records.each do |record|
108
+ delete_record!(record)
109
+ end
110
+ end
111
+ end
112
+ end
48
113
  end
49
114
  end
50
115
  end
@@ -8,6 +8,9 @@ module InventoryRefresh::SaveCollection
8
8
  module SqlHelper
9
9
  include InventoryRefresh::Logging
10
10
 
11
+ # TODO(lsmola) all below methods should be rewritten to arel, but we need to first extend arel to be able to do
12
+ # this
13
+
11
14
  extend ActiveSupport::Concern
12
15
 
13
16
  included do
@@ -80,6 +83,128 @@ module InventoryRefresh::SaveCollection
80
83
  "#{value}::#{sql_type}"
81
84
  end
82
85
  end
86
+
87
+ # Effective way of doing multiselect
88
+ #
89
+ # If we use "(col1, col2) IN [(a,e), (b,f), (b,e)]" it's not great, just with 10k batch, we see
90
+ # *** ActiveRecord::StatementInvalid Exception: PG::StatementTooComplex: ERROR: stack depth limit exceeded
91
+ # HINT: Increase the configuration parameter "max_stack_depth" (currently 2048kB), after ensuring the
92
+ # platform's stack depth limit is adequate.
93
+ #
94
+ # If we use "(col1 = a AND col2 = e) OR (col1 = b AND col2 = f) OR (col1 = b AND col2 = e)" with 10k batch, it
95
+ # takes about 6s and consumes 300MB, with 100k it takes ~1h and consume 3GB in Postgre process
96
+ #
97
+ # The best way seems to be using CTE, where the list of values we want to map is turned to 'table' and we just
98
+ # do RIGHT OUTER JOIN to get the complement of given identifiers. Tested on getting complement of 100k items,
99
+ # using 2 cols (:ems_ref and :uid_ems) from total 150k rows. It takes ~1s and 350MB in Postgre process
100
+ #
101
+ # @param manager_uuids [Array<String>, Array[Hash]] Array with manager_uuids of entities. The keys have to match
102
+ # inventory_collection.manager_ref. We allow passing just array of strings, if manager_ref.size ==1, to
103
+ # spare some memory
104
+ # @return [Arel::SelectManager] Arel for getting complement of uuids. This method modifies the passed
105
+ # manager_uuids to spare some memory
106
+ def complement_of!(manager_uuids, all_manager_uuids_scope, all_manager_uuids_timestamp)
107
+ all_attribute_keys = inventory_collection.manager_ref
108
+ all_attribute_keys_array = inventory_collection.manager_ref.map(&:to_s)
109
+
110
+ active_entities = Arel::Table.new(:active_entities)
111
+ active_entities_cte = Arel::Nodes::As.new(
112
+ active_entities,
113
+ Arel.sql("(#{active_entities_query(all_attribute_keys_array, manager_uuids)})")
114
+ )
115
+
116
+ all_entities = Arel::Table.new(:all_entities)
117
+ all_entities_cte = Arel::Nodes::As.new(
118
+ all_entities,
119
+ Arel.sql("(#{all_entities_query(all_manager_uuids_scope, all_manager_uuids_timestamp).select(:id, *all_attribute_keys_array).to_sql})")
120
+ )
121
+ join_condition = all_attribute_keys.map { |key| active_entities[key].eq(all_entities[key]) }.inject(:and)
122
+ where_condition = all_attribute_keys.map { |key| active_entities[key].eq(nil) }.inject(:and)
123
+
124
+ active_entities
125
+ .project(all_entities[:id])
126
+ .join(all_entities, Arel::Nodes::RightOuterJoin)
127
+ .on(join_condition)
128
+ .with(active_entities_cte, all_entities_cte)
129
+ .where(where_condition)
130
+ end
131
+
132
+ private
133
+
134
+ def all_entities_query(all_manager_uuids_scope, all_manager_uuids_timestamp)
135
+ all_entities_query = inventory_collection.full_collection_for_comparison
136
+ all_entities_query = all_entities_query.active if inventory_collection.retention_strategy == :archive
137
+
138
+ if all_manager_uuids_scope
139
+ scope_keys = all_manager_uuids_scope.first.keys.map { |x| association_to_foreign_key_mapping[x.to_sym] }.map(&:to_s)
140
+ scope = load_scope(all_manager_uuids_scope)
141
+ condition = inventory_collection.build_multi_selection_condition(scope, scope_keys)
142
+ all_entities_query = all_entities_query.where(condition)
143
+ end
144
+
145
+ if all_manager_uuids_timestamp && supports_column?(:resource_timestamp)
146
+ all_manager_uuids_timestamp = Time.parse(all_manager_uuids_timestamp).utc
147
+
148
+ date_field = model_class.arel_table[:resource_timestamp]
149
+ all_entities_query = all_entities_query.where(date_field.lt(all_manager_uuids_timestamp))
150
+ end
151
+ all_entities_query
152
+ end
153
+
154
+ def load_scope(all_manager_uuids_scope)
155
+ scope_keys = all_manager_uuids_scope.first.keys.to_set
156
+
157
+ all_manager_uuids_scope.map do |cond|
158
+ assert_scope!(scope_keys, cond)
159
+
160
+ cond.map do |key, value|
161
+ foreign_key = association_to_foreign_key_mapping[key.to_sym]
162
+ foreign_key_value = value.load&.id
163
+
164
+ assert_foreign_keys!(key, value, foreign_key, foreign_key_value)
165
+
166
+ [foreign_key, foreign_key_value]
167
+ end.to_h
168
+ end
169
+ end
170
+
171
+ def assert_scope!(scope_keys, cond)
172
+ if cond.keys.to_set != scope_keys
173
+ raise "'#{inventory_collection}' expected keys for :all_manager_uuids_scope are #{scope_keys.to_a}, got"\
174
+ " #{cond.keys}. Keys must be the same for all scopes provided."
175
+ end
176
+ end
177
+
178
+ def assert_foreign_keys!(key, value, foreign_key, foreign_key_value)
179
+ unless foreign_key
180
+ raise "'#{inventory_collection}' doesn't have relation :#{key} provided in :all_manager_uuids_scope."
181
+ end
182
+
183
+ unless foreign_key_value
184
+ raise "'#{inventory_collection}' couldn't load scope value :#{key} => #{value.inspect} provided in :all_manager_uuids_scope"
185
+ end
186
+ end
187
+
188
+ def active_entities_query(all_attribute_keys_array, manager_uuids)
189
+ connection = ActiveRecord::Base.connection
190
+
191
+ all_attribute_keys_array_q = all_attribute_keys_array.map { |x| quote_column_name(x) }
192
+ # For Postgre, only first set of values should contain the type casts
193
+ first_value = manager_uuids.shift.to_h
194
+ first_value = "(#{all_attribute_keys_array.map { |x| quote(connection, first_value[x], x, true) }.join(",")})"
195
+
196
+ # Rest of the values, without the type cast
197
+ values = manager_uuids.map! do |hash|
198
+ "(#{all_attribute_keys_array.map { |x| quote(connection, hash[x], x, false) }.join(",")})"
199
+ end.join(",")
200
+
201
+ values = values.blank? ? first_value : [first_value, values].join(",")
202
+
203
+ <<-SQL
204
+ SELECT *
205
+ FROM (VALUES #{values}) AS active_entities_table(#{all_attribute_keys_array_q.join(",")})
206
+ SQL
207
+ end
83
208
  end
84
209
  end
85
210
  end