inventory_refresh 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +6 -0
  3. data/Gemfile +4 -0
  4. data/bundler.d/.gitkeep +0 -0
  5. data/inventory_refresh.gemspec +4 -4
  6. data/lib/inventory_refresh/inventory_collection/builder.rb +249 -0
  7. data/lib/inventory_refresh/inventory_collection/graph.rb +0 -15
  8. data/lib/inventory_refresh/inventory_collection/helpers/associations_helper.rb +80 -0
  9. data/lib/inventory_refresh/inventory_collection/helpers/initialize_helper.rb +456 -0
  10. data/lib/inventory_refresh/inventory_collection/helpers/questions_helper.rb +132 -0
  11. data/lib/inventory_refresh/inventory_collection/helpers.rb +6 -0
  12. data/lib/inventory_refresh/inventory_collection/index/type/skeletal.rb +5 -5
  13. data/lib/inventory_refresh/inventory_collection/reference.rb +4 -0
  14. data/lib/inventory_refresh/inventory_collection/scanner.rb +111 -18
  15. data/lib/inventory_refresh/inventory_collection/serialization.rb +7 -7
  16. data/lib/inventory_refresh/inventory_collection/unconnected_edge.rb +19 -0
  17. data/lib/inventory_refresh/inventory_collection.rb +114 -649
  18. data/lib/inventory_refresh/inventory_object.rb +17 -11
  19. data/lib/inventory_refresh/inventory_object_lazy.rb +20 -10
  20. data/lib/inventory_refresh/persister.rb +212 -0
  21. data/lib/inventory_refresh/save_collection/base.rb +18 -3
  22. data/lib/inventory_refresh/save_collection/saver/base.rb +25 -62
  23. data/lib/inventory_refresh/save_collection/saver/concurrent_safe_batch.rb +73 -225
  24. data/lib/inventory_refresh/save_collection/saver/partial_upsert_helper.rb +226 -0
  25. data/lib/inventory_refresh/save_collection/saver/retention_helper.rb +115 -0
  26. data/lib/inventory_refresh/save_collection/saver/sql_helper.rb +122 -0
  27. data/lib/inventory_refresh/save_collection/saver/sql_helper_update.rb +24 -5
  28. data/lib/inventory_refresh/save_collection/saver/sql_helper_upsert.rb +6 -6
  29. data/lib/inventory_refresh/save_collection/sweeper.rb +69 -0
  30. data/lib/inventory_refresh/save_inventory.rb +18 -8
  31. data/lib/inventory_refresh/target_collection.rb +12 -0
  32. data/lib/inventory_refresh/version.rb +1 -1
  33. data/lib/inventory_refresh.rb +1 -0
  34. metadata +24 -15
  35. data/lib/inventory_refresh/save_collection/recursive.rb +0 -52
  36. data/lib/inventory_refresh/save_collection/saver/concurrent_safe.rb +0 -71
@@ -1,4 +1,6 @@
1
1
  require "inventory_refresh/save_collection/saver/base"
2
+ require "inventory_refresh/save_collection/saver/partial_upsert_helper"
3
+ require "inventory_refresh/save_collection/saver/retention_helper"
2
4
  require "active_support/core_ext/module/delegation"
3
5
 
4
6
  module InventoryRefresh::SaveCollection
@@ -6,16 +8,21 @@ module InventoryRefresh::SaveCollection
6
8
  class ConcurrentSafeBatch < InventoryRefresh::SaveCollection::Saver::Base
7
9
  private
8
10
 
11
+ # Methods for archiving or deleting non existent records
12
+ include InventoryRefresh::SaveCollection::Saver::PartialUpsertHelper
13
+ include InventoryRefresh::SaveCollection::Saver::RetentionHelper
14
+
9
15
  delegate :association_to_base_class_mapping,
10
16
  :association_to_foreign_key_mapping,
11
17
  :association_to_foreign_type_mapping,
12
18
  :attribute_references,
19
+ :resource_version_column,
13
20
  :to => :inventory_collection
14
21
 
15
22
  # Attribute accessor to ApplicationRecord object or Hash
16
23
  #
17
24
  # @param record [Hash, ApplicationRecord] record or hash
18
- # @param key [Symbol] key pointing to attribute of the record
25
+ # @param key [String] key pointing to attribute of the record
19
26
  # @return [Object] value of the record on the key
20
27
  def record_key(record, key)
21
28
  send(record_key_method, record, key)
@@ -24,7 +31,7 @@ module InventoryRefresh::SaveCollection
24
31
  # Attribute accessor to ApplicationRecord object
25
32
  #
26
33
  # @param record [ApplicationRecord] record
27
- # @param key [Symbol] key pointing to attribute of the record
34
+ # @param key [String] key pointing to attribute of the record
28
35
  # @return [Object] value of the record on the key
29
36
  def ar_record_key(record, key)
30
37
  record.public_send(key)
@@ -33,7 +40,7 @@ module InventoryRefresh::SaveCollection
33
40
  # Attribute accessor to Hash object
34
41
  #
35
42
  # @param record [Hash] hash
36
- # @param key [Symbol] key pointing to attribute of the record
43
+ # @param key [String] key pointing to attribute of the record
37
44
  # @return [Object] value of the record on the key
38
45
  def pure_sql_record_key(record, key)
39
46
  record[select_keys_indexes[key]]
@@ -82,7 +89,7 @@ module InventoryRefresh::SaveCollection
82
89
  all_attribute_keys = Set.new + inventory_collection.batch_extra_attributes
83
90
 
84
91
  inventory_collection.each do |inventory_object|
85
- attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys)
92
+ attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys, inventory_object)
86
93
  index = build_stringified_reference(attributes, unique_index_keys)
87
94
 
88
95
  # Interesting fact: not building attributes_index and using only inventory_objects_index doesn't do much
@@ -91,11 +98,7 @@ module InventoryRefresh::SaveCollection
91
98
  inventory_objects_index[index] = inventory_object
92
99
  end
93
100
 
94
- all_attribute_keys << :created_at if supports_created_at?
95
- all_attribute_keys << :updated_at if supports_updated_at?
96
- all_attribute_keys << :created_on if supports_created_on?
97
- all_attribute_keys << :updated_on if supports_updated_on?
98
- all_attribute_keys << :type if supports_sti?
101
+ expand_all_attribute_keys!(all_attribute_keys)
99
102
 
100
103
  logger.debug("Processing #{inventory_collection} of size #{inventory_collection.size}...")
101
104
 
@@ -115,14 +118,19 @@ module InventoryRefresh::SaveCollection
115
118
  create_records!(all_attribute_keys, batch, attributes_index, :on_conflict => on_conflict)
116
119
  end
117
120
 
118
- # Let the GC clean this up
119
- inventory_objects_index = nil
120
- attributes_index = nil
121
-
122
121
  if inventory_collection.parallel_safe?
123
122
  create_or_update_partial_records(all_attribute_keys)
124
123
  end
125
124
  end
125
+
126
+ logger.debug("Marking :last_seen_at of #{inventory_collection} of size #{inventory_collection.size}...")
127
+
128
+ mark_last_seen_at(attributes_index)
129
+
130
+ # Let the GC clean this up
131
+ inventory_objects_index = nil
132
+ attributes_index = nil
133
+
126
134
  logger.debug("Processing #{inventory_collection}, "\
127
135
  "created=#{inventory_collection.created_records.count}, "\
128
136
  "updated=#{inventory_collection.updated_records.count}, "\
@@ -132,6 +140,28 @@ module InventoryRefresh::SaveCollection
132
140
  raise e
133
141
  end
134
142
 
143
+ def expand_all_attribute_keys!(all_attribute_keys)
144
+ %i(created_at updated_at created_on updated_on).each do |col|
145
+ all_attribute_keys << col if supports_column?(col)
146
+ end
147
+ all_attribute_keys << :type if supports_sti?
148
+ all_attribute_keys << :archived_at if supports_column?(:archived_at)
149
+ end
150
+
151
+ def mark_last_seen_at(attributes_index)
152
+ return unless supports_column?(:last_seen_at) && inventory_collection.parallel_safe?
153
+ return if attributes_index.blank?
154
+
155
+ all_attribute_keys = [:last_seen_at]
156
+
157
+ last_seen_at = Time.now.utc
158
+ attributes_index.each_value { |v| v[:last_seen_at] = last_seen_at }
159
+
160
+ query = build_partial_update_query(all_attribute_keys, attributes_index.values)
161
+
162
+ get_connection.execute(query)
163
+ end
164
+
135
165
  # Batch updates existing records that are in the DB using attributes_index. And delete the ones that were not
136
166
  # present in inventory_objects_index.
137
167
  #
@@ -157,7 +187,7 @@ module InventoryRefresh::SaveCollection
157
187
  index = db_columns_index(record)
158
188
 
159
189
  inventory_object = inventory_objects_index.delete(index)
160
- hash = attributes_index.delete(index)
190
+ hash = attributes_index[index]
161
191
 
162
192
  if inventory_object.nil?
163
193
  # Record was found in the DB but not sent for saving, that means it doesn't exist anymore and we should
@@ -167,8 +197,9 @@ module InventoryRefresh::SaveCollection
167
197
  end
168
198
  else
169
199
  # Record was found in the DB and sent for saving, we will be updating the DB.
170
- next unless assert_referential_integrity(hash)
171
200
  inventory_object.id = primary_key_value
201
+ next unless assert_referential_integrity(hash)
202
+ next unless changed?(record, hash, all_attribute_keys)
172
203
 
173
204
  if inventory_collection.parallel_safe? &&
174
205
  (supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys))
@@ -176,12 +207,12 @@ module InventoryRefresh::SaveCollection
176
207
  version_attr, max_version_attr = if supports_remote_data_timestamp?(all_attribute_keys)
177
208
  [:resource_timestamp, :resource_timestamps_max]
178
209
  elsif supports_remote_data_version?(all_attribute_keys)
179
- [:resource_version, :resource_versions_max]
210
+ [:resource_counter, :resource_counters_max]
180
211
  end
181
212
 
182
- next if skeletonize_or_skip_record(record.try(version_attr) || record.try(:[], version_attr),
213
+ next if skeletonize_or_skip_record(record_key(record, version_attr),
183
214
  hash[version_attr],
184
- record.try(max_version_attr) || record.try(:[], max_version_attr),
215
+ record_key(record, max_version_attr),
185
216
  inventory_object)
186
217
  end
187
218
 
@@ -231,6 +262,26 @@ module InventoryRefresh::SaveCollection
231
262
  records_for_destroy = [] # Cleanup so GC can release it sooner
232
263
  end
233
264
 
265
+ def changed?(_record, _hash, _all_attribute_keys)
266
+ return true unless inventory_collection.check_changed?
267
+
268
+ # TODO(lsmola) this check needs to be disabled now, because it doesn't work with lazy_find having secondary
269
+ # indexes. Examples: we save a pod before we save a project, that means the project lazy_find won't evaluate,
270
+ # because we load it with secondary index and can't do skeletal precreate. Then when the object is being saved
271
+ # again, the lazy_find is evaluated, but the resource version is not changed, so the row is not saved.
272
+ #
273
+ # To keep this quick .changed? check, we might need to extend this, so the resource_version doesn't save until
274
+ # all lazy_links of the row are evaluated.
275
+ #
276
+ # if supports_resource_version?(all_attribute_keys) && supports_column?(resource_version_column)
277
+ # record_resource_version = record_key(record, resource_version_column.to_s)
278
+ #
279
+ # return record_resource_version != hash[resource_version_column]
280
+ # end
281
+
282
+ true
283
+ end
284
+
234
285
  def db_columns_index(record, pure_sql: false)
235
286
  # Incoming values are in SQL string form.
236
287
  # TODO(lsmola) unify this behavior with object_index_with_keys method in InventoryCollection
@@ -261,39 +312,6 @@ module InventoryRefresh::SaveCollection
261
312
  end
262
313
  end
263
314
 
264
- # Deletes or sof-deletes records. If the model_class supports a custom class delete method, we will use it for
265
- # batch soft-delete.
266
- #
267
- # @param records [Array<ApplicationRecord, Hash>] Records we want to delete. If we have only hashes, we need to
268
- # to fetch ApplicationRecord objects from the DB
269
- def destroy_records!(records)
270
- return false unless inventory_collection.delete_allowed?
271
- return if records.blank?
272
-
273
- # Is the delete_method rails standard deleting method?
274
- rails_delete = %i(destroy delete).include?(inventory_collection.delete_method)
275
- if !rails_delete && inventory_collection.model_class.respond_to?(inventory_collection.delete_method)
276
- # We have custom delete method defined on a class, that means it supports batch destroy
277
- inventory_collection.store_deleted_records(records.map { |x| {:id => record_key(x, primary_key)} })
278
- inventory_collection.model_class.public_send(inventory_collection.delete_method, records.map { |x| record_key(x, primary_key) })
279
- else
280
- # We have either standard :destroy and :delete rails method, or custom instance level delete method
281
- # Note: The standard :destroy and :delete rails method can't be batched because of the hooks and cascade destroy
282
- ActiveRecord::Base.transaction do
283
- if pure_sql_records_fetching
284
- # For pure SQL fetching, we need to get the AR objects again, so we can call destroy
285
- inventory_collection.model_class.where(:id => records.map { |x| record_key(x, primary_key) }).find_each do |record|
286
- delete_record!(record)
287
- end
288
- else
289
- records.each do |record|
290
- delete_record!(record)
291
- end
292
- end
293
- end
294
- end
295
- end
296
-
297
315
  # Batch updates existing records
298
316
  #
299
317
  # @param hashes [Array<Hash>] data used for building a batch update sql query
@@ -319,153 +337,6 @@ module InventoryRefresh::SaveCollection
319
337
  result
320
338
  end
321
339
 
322
- # Taking result from update or upsert of the row. The records that were not saved will be turned into skeletal
323
- # records and we will save them attribute by attribute.
324
- #
325
- # @param hash [Hash{String => InventoryObject}>] Hash with indexed data we want to save
326
- # @param result [Array<Hash>] Result from the DB containing the data that were actually saved
327
- # @param all_unique_columns [Boolean] True if index is consisted from all columns of the unique index. False if
328
- # index is just made from manager_ref turned in DB column names.
329
- def skeletonize_ignored_records!(hash, result, all_unique_columns: false)
330
- updated = if all_unique_columns
331
- result.map { |x| unique_index_columns_to_s.map { |key| x[key] } }
332
- else
333
- result.map { |x| db_columns_index(x, :pure_sql => true) }
334
- end
335
-
336
- updated.each { |x| hash.delete(x) }
337
-
338
- # Now lets skeletonize all inventory_objects that were not saved by update or upsert. Old rows that can't be
339
- # saved are not being sent here. We have only rows that are new, but become old as we send the query (so other
340
- # parallel process saved the data in the meantime). Or if some attributes are newer than the whole row
341
- # being sent.
342
- hash.each_key do |db_index|
343
- inventory_collection.skeletal_primary_index.skeletonize_primary_index(hash[db_index].manager_uuid)
344
- end
345
- end
346
-
347
- # Saves partial records using upsert, taking records from skeletal_primary_index. This is used both for
348
- # skeletal precreate as well as for saving partial rows.
349
- #
350
- # @param all_attribute_keys [Set] Superset of all keys of all records being saved
351
- def create_or_update_partial_records(all_attribute_keys)
352
- skeletal_attributes_index = {}
353
- skeletal_inventory_objects_index = {}
354
-
355
- inventory_collection.skeletal_primary_index.each_value do |inventory_object|
356
- attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys)
357
- index = build_stringified_reference(attributes, unique_index_keys)
358
-
359
- skeletal_attributes_index[index] = attributes
360
- skeletal_inventory_objects_index[index] = inventory_object
361
- end
362
-
363
- if supports_remote_data_timestamp?(all_attribute_keys)
364
- all_attribute_keys << :resource_timestamps
365
- all_attribute_keys << :resource_timestamps_max
366
- elsif supports_remote_data_version?(all_attribute_keys)
367
- all_attribute_keys << :resource_versions
368
- all_attribute_keys << :resource_versions_max
369
- end
370
-
371
- indexed_inventory_objects = {}
372
- hashes = []
373
- create_time = time_now
374
-
375
- skeletal_inventory_objects_index.each do |index, inventory_object|
376
- hash = skeletal_attributes_index.delete(index)
377
- # Partial create or update must never set a timestamp for the whole row
378
- timestamps = if supports_remote_data_timestamp?(all_attribute_keys) && supports_resource_timestamps_max?
379
- assign_partial_row_version_attributes!(:resource_timestamp,
380
- :resource_timestamps,
381
- :resource_timestamps_max,
382
- hash,
383
- all_attribute_keys)
384
- elsif supports_remote_data_version?(all_attribute_keys) && supports_resource_versions_max?
385
- assign_partial_row_version_attributes!(:resource_version,
386
- :resource_versions,
387
- :resource_versions_max,
388
- hash,
389
- all_attribute_keys)
390
- end
391
- # Transform hash to DB format
392
- hash = transform_to_hash!(all_attribute_keys, hash)
393
-
394
- assign_attributes_for_create!(hash, create_time)
395
-
396
- next unless assert_referential_integrity(hash)
397
-
398
- hash[:__non_serialized_versions] = timestamps # store non serialized timestamps for the partial updates
399
- hashes << hash
400
- # Index on Unique Columns values, so we can easily fill in the :id later
401
- indexed_inventory_objects[unique_index_columns.map { |x| hash[x] }] = inventory_object
402
- end
403
-
404
- return if hashes.blank?
405
-
406
- # First, lets try to create all partial records
407
- hashes.each_slice(batch_size_for_persisting) do |batch|
408
- result = create_partial!(all_attribute_keys,
409
- batch,
410
- :on_conflict => :do_nothing)
411
- inventory_collection.store_created_records(result)
412
- end
413
-
414
- # We need only skeletal records with timestamp. We can't save the ones without timestamp, because e.g. skeletal
415
- # precreate would be updating records with default values, that are not correct.
416
- pre_filtered = hashes.select { |x| x[:resource_timestamps_max] || x[:resource_versions_max] }
417
-
418
- results = {}
419
- # TODO(lsmola) we don't need to process rows that were save by the create -> oncoflict do nothing
420
- (all_attribute_keys - inventory_collection.base_columns).each do |column_name|
421
- filtered = pre_filtered.select { |x| x.key?(column_name) }
422
-
423
- filtered.each_slice(batch_size_for_persisting) do |batch|
424
- # We need to set correct timestamps_max for this particular attribute, based on what is in timestamps
425
- if supports_remote_data_timestamp?(all_attribute_keys)
426
- batch.each { |x| x[:resource_timestamps_max] = x[:__non_serialized_versions][column_name] if x[:__non_serialized_versions][column_name] }
427
- elsif supports_remote_data_version?(all_attribute_keys)
428
- batch.each { |x| x[:resource_versions_max] = x[:__non_serialized_versions][column_name] if x[:__non_serialized_versions][column_name] }
429
- end
430
-
431
- result = create_partial!(inventory_collection.base_columns + [column_name],
432
- batch,
433
- :on_conflict => :do_update,
434
- :column_name => column_name)
435
- result.each do |res|
436
- results[res["id"]] = res
437
- end
438
- end
439
- end
440
-
441
- inventory_collection.store_updated_records(results.values)
442
-
443
- # TODO(lsmola) we need to move here the hash loading ar object etc. otherwise the lazy_find with key will not
444
- # be correct
445
- if inventory_collection.dependees.present?
446
- # We need to get primary keys of the created objects, but only if there are dependees that would use them
447
- map_ids_to_inventory_objects(indexed_inventory_objects,
448
- all_attribute_keys,
449
- hashes,
450
- nil,
451
- :on_conflict => :do_nothing)
452
- end
453
- end
454
-
455
- # Batch upserts 1 data column of the row, plus the internal columns
456
- #
457
- # @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
458
- # @param hashes [Array<InventoryRefresh::InventoryObject>] Array of InventoryObject objects we will be inserting
459
- # into the DB
460
- # @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values
461
- # are :do_update, :do_nothing, nil
462
- # @param column_name [Symbol] Name of the data column we will be upserting
463
- def create_partial!(all_attribute_keys, hashes, on_conflict: nil, column_name: nil)
464
- get_connection.execute(
465
- build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => :partial, :column_name => column_name)
466
- )
467
- end
468
-
469
340
  # Batch inserts records using attributes_index data. With on_conflict option using :do_update, this method
470
341
  # does atomic upsert.
471
342
  #
@@ -510,10 +381,10 @@ module InventoryRefresh::SaveCollection
510
381
  if inventory_collection.parallel_safe?
511
382
  # We've done upsert, so records were either created or updated. We can recognize that by checking if
512
383
  # created and updated timestamps are the same
513
- created_attr = "created_on" if inventory_collection.supports_created_on?
514
- created_attr ||= "created_at" if inventory_collection.supports_created_at?
515
- updated_attr = "updated_on" if inventory_collection.supports_updated_on?
516
- updated_attr ||= "updated_at" if inventory_collection.supports_updated_at?
384
+ created_attr = "created_on" if inventory_collection.supports_column?(:created_on)
385
+ created_attr ||= "created_at" if inventory_collection.supports_column?(:created_at)
386
+ updated_attr = "updated_on" if inventory_collection.supports_column?(:updated_on)
387
+ updated_attr ||= "updated_at" if inventory_collection.supports_column?(:updated_at)
517
388
 
518
389
  if created_attr && updated_attr
519
390
  created, updated = result.to_a.partition { |x| x[created_attr] == x[updated_attr] }
@@ -604,29 +475,6 @@ module InventoryRefresh::SaveCollection
604
475
  end
605
476
  end
606
477
  end
607
-
608
- def skeletonize_or_skip_record(record_version, hash_version, record_versions_max, inventory_object)
609
- # Skip updating this record, because it is old
610
- return true if record_version && hash_version && record_version >= hash_version
611
-
612
- # Some column has bigger version than the whole row, we need to store the row partially
613
- if record_versions_max && hash_version && record_versions_max > hash_version
614
- inventory_collection.skeletal_primary_index.skeletonize_primary_index(inventory_object.manager_uuid)
615
- return true
616
- end
617
-
618
- false
619
- end
620
-
621
- def assign_partial_row_version_attributes!(full_row_version_attr, partial_row_version_attr,
622
- partial_row_version_attr_max, hash, all_attribute_keys)
623
- hash[partial_row_version_attr_max] = hash.delete(full_row_version_attr)
624
-
625
- if hash[partial_row_version_attr].present?
626
- # Lets clean to only what we save, since when we build the skeletal object, we can set more
627
- hash[partial_row_version_attr] = hash[partial_row_version_attr].slice(*all_attribute_keys)
628
- end
629
- end
630
478
  end
631
479
  end
632
480
  end
@@ -0,0 +1,226 @@
1
+ module InventoryRefresh::SaveCollection
2
+ module Saver
3
+ module PartialUpsertHelper
4
+ private
5
+
6
+ # Taking result from update or upsert of the row. The records that were not saved will be turned into skeletal
7
+ # records and we will save them attribute by attribute.
8
+ #
9
+ # @param hash [Hash{String => InventoryObject}>] Hash with indexed data we want to save
10
+ # @param result [Array<Hash>] Result from the DB containing the data that were actually saved
11
+ # @param all_unique_columns [Boolean] True if index is consisted from all columns of the unique index. False if
12
+ # index is just made from manager_ref turned in DB column names.
13
+ def skeletonize_ignored_records!(hash, result, all_unique_columns: false)
14
+ updated = if all_unique_columns
15
+ result.map { |x| unique_index_columns_to_s.map { |key| x[key] } }
16
+ else
17
+ result.map { |x| db_columns_index(x, :pure_sql => true) }
18
+ end
19
+
20
+ updated.each { |x| hash.delete(x) }
21
+
22
+ # Now lets skeletonize all inventory_objects that were not saved by update or upsert. Old rows that can't be
23
+ # saved are not being sent here. We have only rows that are new, but become old as we send the query (so other
24
+ # parallel process saved the data in the meantime). Or if some attributes are newer than the whole row
25
+ # being sent.
26
+ hash.each_key do |db_index|
27
+ inventory_collection.skeletal_primary_index.skeletonize_primary_index(hash[db_index].manager_uuid)
28
+ end
29
+ end
30
+
31
+ # Saves partial records using upsert, taking records from skeletal_primary_index. This is used both for
32
+ # skeletal precreate as well as for saving partial rows.
33
+ #
34
+ # @param all_attribute_keys [Set] Superset of all keys of all records being saved
35
+ def create_or_update_partial_records(all_attribute_keys)
36
+ skeletal_inventory_objects_index, skeletal_attributes_index = load_partial_attributes(all_attribute_keys)
37
+
38
+ indexed_inventory_objects, hashes = process_partial_data(skeletal_inventory_objects_index,
39
+ skeletal_attributes_index,
40
+ all_attribute_keys)
41
+ return if hashes.blank?
42
+
43
+ processed_record_refs = skeletal_precreate!(hashes, all_attribute_keys)
44
+ hashes_for_update = hashes_for_update(hashes, processed_record_refs)
45
+ partial_updates!(hashes_for_update, all_attribute_keys)
46
+
47
+ if inventory_collection.dependees.present?
48
+ # We need to get primary keys of the created objects, but only if there are dependees that would use them
49
+ map_ids_to_inventory_objects(indexed_inventory_objects,
50
+ all_attribute_keys,
51
+ hashes,
52
+ nil,
53
+ :on_conflict => :do_nothing)
54
+ end
55
+ end
56
+
57
+ def load_partial_attributes(all_attribute_keys)
58
+ skeletal_attributes_index = {}
59
+ skeletal_inventory_objects_index = {}
60
+
61
+ inventory_collection.skeletal_primary_index.each_value do |inventory_object|
62
+ attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys, inventory_object)
63
+ index = build_stringified_reference(attributes, unique_index_keys)
64
+
65
+ skeletal_attributes_index[index] = attributes
66
+ skeletal_inventory_objects_index[index] = inventory_object
67
+ end
68
+
69
+ if supports_remote_data_timestamp?(all_attribute_keys)
70
+ all_attribute_keys << :resource_timestamps
71
+ all_attribute_keys << :resource_timestamps_max
72
+ elsif supports_remote_data_version?(all_attribute_keys)
73
+ all_attribute_keys << :resource_counters
74
+ all_attribute_keys << :resource_counters_max
75
+ end
76
+
77
+ # We cannot set the resource_version doing partial update
78
+ all_attribute_keys.delete(resource_version_column)
79
+
80
+ return skeletal_inventory_objects_index, skeletal_attributes_index
81
+ end
82
+
83
+ def hashes_for_update(hashes, processed_record_refs)
84
+ indexed_hashes = hashes.each_with_object({}) { |hash, obj| obj[unique_index_columns.map { |x| hash[x] }] = hash }
85
+ indexed_hashes.except!(*processed_record_refs)
86
+ hashes_for_update = indexed_hashes.values
87
+
88
+ # We need only skeletal records with timestamp. We can't save the ones without timestamp, because e.g. skeletal
89
+ # precreate would be updating records with default values, that are not correct.
90
+ hashes_for_update.select { |x| x[:resource_timestamps_max] || x[:resource_counters_max] }
91
+ end
92
+
93
+ def skeletal_precreate!(hashes, all_attribute_keys)
94
+ processed_record_refs = []
95
+ # First, lets try to create all partial records
96
+ hashes.each_slice(batch_size_for_persisting) do |batch|
97
+ result = create_partial!(all_attribute_keys,
98
+ batch,
99
+ :on_conflict => :do_nothing)
100
+ inventory_collection.store_created_records(result)
101
+ # Store refs of created records, so we can ignore them for update
102
+ result.each { |hash| processed_record_refs << unique_index_columns.map { |x| hash[x.to_s] } }
103
+ end
104
+
105
+ processed_record_refs
106
+ end
107
+
108
+ def partial_updates!(hashes, all_attribute_keys)
109
+ results = {}
110
+ (all_attribute_keys - inventory_collection.base_columns).each do |column_name|
111
+ filtered = hashes.select { |x| x.key?(column_name) }
112
+
113
+ filtered.each_slice(batch_size_for_persisting) do |batch|
114
+ partial_update!(batch, all_attribute_keys, column_name, results)
115
+ end
116
+ end
117
+
118
+ inventory_collection.store_updated_records(results.values)
119
+ end
120
+
121
+ def partial_update!(batch, all_attribute_keys, column_name, results)
122
+ fill_comparables_max!(batch, all_attribute_keys, column_name)
123
+ result = create_partial!((inventory_collection.base_columns + [column_name]).to_set & all_attribute_keys,
124
+ batch,
125
+ :on_conflict => :do_update,
126
+ :column_name => column_name)
127
+ result.each do |res|
128
+ results[res["id"]] = res
129
+ end
130
+ end
131
+
132
+ def fill_comparables_max!(batch, all_attribute_keys, column_name)
133
+ comparables_max_name = comparable_max_column_name(all_attribute_keys)
134
+
135
+ # We need to set correct timestamps_max for this particular attribute, based on what is in timestamps
136
+ batch.each do |x|
137
+ next unless x[:__non_serialized_versions][column_name]
138
+ x[comparables_max_name] = x[:__non_serialized_versions][column_name]
139
+ end
140
+ end
141
+
142
+ def comparable_max_column_name(all_attribute_keys)
143
+ if supports_remote_data_timestamp?(all_attribute_keys)
144
+ :resource_timestamps_max
145
+ elsif supports_remote_data_version?(all_attribute_keys)
146
+ :resource_counters_max
147
+ end
148
+ end
149
+
150
+ def process_partial_data(skeletal_inventory_objects_index, skeletal_attributes_index, all_attribute_keys)
151
+ indexed_inventory_objects = {}
152
+ hashes = []
153
+ create_time = time_now
154
+
155
+ skeletal_inventory_objects_index.each do |index, inventory_object|
156
+ hash = prepare_partial_hash(skeletal_attributes_index.delete(index), all_attribute_keys, create_time)
157
+ next unless assert_referential_integrity(hash)
158
+
159
+ hashes << hash
160
+ # Index on Unique Columns values, so we can easily fill in the :id later
161
+ indexed_inventory_objects[unique_index_columns.map { |x| hash[x] }] = inventory_object
162
+ end
163
+
164
+ return indexed_inventory_objects, hashes
165
+ end
166
+
167
+ def prepare_partial_hash(hash, all_attribute_keys, create_time)
168
+ # Partial create or update must never set a timestamp for the whole row
169
+ timestamps = if supports_remote_data_timestamp?(all_attribute_keys) && supports_column?(:resource_timestamps_max)
170
+ assign_partial_row_version_attributes!(:resource_timestamp,
171
+ :resource_timestamps,
172
+ hash,
173
+ all_attribute_keys)
174
+ elsif supports_remote_data_version?(all_attribute_keys) && supports_column?(:resource_counters_max)
175
+ assign_partial_row_version_attributes!(:resource_counter,
176
+ :resource_counters,
177
+ hash,
178
+ all_attribute_keys)
179
+ end
180
+ # Transform hash to DB format
181
+ hash = transform_to_hash!(all_attribute_keys, hash)
182
+
183
+ assign_attributes_for_create!(hash, create_time)
184
+
185
+ hash[:__non_serialized_versions] = timestamps # store non serialized timestamps for the partial updates
186
+ hash
187
+ end
188
+
189
+ # Batch upserts 1 data column of the row, plus the internal columns
190
+ #
191
+ # @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
192
+ # @param hashes [Array<InventoryRefresh::InventoryObject>] Array of InventoryObject objects we will be inserting
193
+ # into the DB
194
+ # @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values
195
+ # are :do_update, :do_nothing, nil
196
+ # @param column_name [Symbol] Name of the data column we will be upserting
197
+ def create_partial!(all_attribute_keys, hashes, on_conflict: nil, column_name: nil)
198
+ get_connection.execute(
199
+ build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => :partial, :column_name => column_name)
200
+ )
201
+ end
202
+
203
+ def skeletonize_or_skip_record(record_version, hash_version, record_versions_max, inventory_object)
204
+ # Skip updating this record, because it is old
205
+ return true if record_version && hash_version && record_version >= hash_version
206
+
207
+ # Some column has bigger version than the whole row, we need to store the row partially
208
+ if record_versions_max && hash_version && record_versions_max > hash_version
209
+ inventory_collection.skeletal_primary_index.skeletonize_primary_index(inventory_object.manager_uuid)
210
+ return true
211
+ end
212
+
213
+ false
214
+ end
215
+
216
+ def assign_partial_row_version_attributes!(full_row_version_attr, partial_row_version_attr, hash, all_attribute_keys)
217
+ hash[comparable_max_column_name(all_attribute_keys)] = hash.delete(full_row_version_attr)
218
+
219
+ return if hash[partial_row_version_attr].blank?
220
+
221
+ # Lets clean to only what we save, since when we build the skeletal object, we can set more
222
+ hash[partial_row_version_attr] = hash[partial_row_version_attr].slice(*all_attribute_keys)
223
+ end
224
+ end
225
+ end
226
+ end