inventory_refresh 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +6 -0
  3. data/Gemfile +4 -0
  4. data/bundler.d/.gitkeep +0 -0
  5. data/inventory_refresh.gemspec +4 -4
  6. data/lib/inventory_refresh/inventory_collection/builder.rb +249 -0
  7. data/lib/inventory_refresh/inventory_collection/graph.rb +0 -15
  8. data/lib/inventory_refresh/inventory_collection/helpers/associations_helper.rb +80 -0
  9. data/lib/inventory_refresh/inventory_collection/helpers/initialize_helper.rb +456 -0
  10. data/lib/inventory_refresh/inventory_collection/helpers/questions_helper.rb +132 -0
  11. data/lib/inventory_refresh/inventory_collection/helpers.rb +6 -0
  12. data/lib/inventory_refresh/inventory_collection/index/type/skeletal.rb +5 -5
  13. data/lib/inventory_refresh/inventory_collection/reference.rb +4 -0
  14. data/lib/inventory_refresh/inventory_collection/scanner.rb +111 -18
  15. data/lib/inventory_refresh/inventory_collection/serialization.rb +7 -7
  16. data/lib/inventory_refresh/inventory_collection/unconnected_edge.rb +19 -0
  17. data/lib/inventory_refresh/inventory_collection.rb +114 -649
  18. data/lib/inventory_refresh/inventory_object.rb +17 -11
  19. data/lib/inventory_refresh/inventory_object_lazy.rb +20 -10
  20. data/lib/inventory_refresh/persister.rb +212 -0
  21. data/lib/inventory_refresh/save_collection/base.rb +18 -3
  22. data/lib/inventory_refresh/save_collection/saver/base.rb +25 -62
  23. data/lib/inventory_refresh/save_collection/saver/concurrent_safe_batch.rb +73 -225
  24. data/lib/inventory_refresh/save_collection/saver/partial_upsert_helper.rb +226 -0
  25. data/lib/inventory_refresh/save_collection/saver/retention_helper.rb +115 -0
  26. data/lib/inventory_refresh/save_collection/saver/sql_helper.rb +122 -0
  27. data/lib/inventory_refresh/save_collection/saver/sql_helper_update.rb +24 -5
  28. data/lib/inventory_refresh/save_collection/saver/sql_helper_upsert.rb +6 -6
  29. data/lib/inventory_refresh/save_collection/sweeper.rb +69 -0
  30. data/lib/inventory_refresh/save_inventory.rb +18 -8
  31. data/lib/inventory_refresh/target_collection.rb +12 -0
  32. data/lib/inventory_refresh/version.rb +1 -1
  33. data/lib/inventory_refresh.rb +1 -0
  34. metadata +24 -15
  35. data/lib/inventory_refresh/save_collection/recursive.rb +0 -52
  36. data/lib/inventory_refresh/save_collection/saver/concurrent_safe.rb +0 -71
@@ -1,4 +1,6 @@
1
1
  require "inventory_refresh/save_collection/saver/base"
2
+ require "inventory_refresh/save_collection/saver/partial_upsert_helper"
3
+ require "inventory_refresh/save_collection/saver/retention_helper"
2
4
  require "active_support/core_ext/module/delegation"
3
5
 
4
6
  module InventoryRefresh::SaveCollection
@@ -6,16 +8,21 @@ module InventoryRefresh::SaveCollection
6
8
  class ConcurrentSafeBatch < InventoryRefresh::SaveCollection::Saver::Base
7
9
  private
8
10
 
11
+ # Methods for archiving or deleting non existent records
12
+ include InventoryRefresh::SaveCollection::Saver::PartialUpsertHelper
13
+ include InventoryRefresh::SaveCollection::Saver::RetentionHelper
14
+
9
15
  delegate :association_to_base_class_mapping,
10
16
  :association_to_foreign_key_mapping,
11
17
  :association_to_foreign_type_mapping,
12
18
  :attribute_references,
19
+ :resource_version_column,
13
20
  :to => :inventory_collection
14
21
 
15
22
  # Attribute accessor to ApplicationRecord object or Hash
16
23
  #
17
24
  # @param record [Hash, ApplicationRecord] record or hash
18
- # @param key [Symbol] key pointing to attribute of the record
25
+ # @param key [String] key pointing to attribute of the record
19
26
  # @return [Object] value of the record on the key
20
27
  def record_key(record, key)
21
28
  send(record_key_method, record, key)
@@ -24,7 +31,7 @@ module InventoryRefresh::SaveCollection
24
31
  # Attribute accessor to ApplicationRecord object
25
32
  #
26
33
  # @param record [ApplicationRecord] record
27
- # @param key [Symbol] key pointing to attribute of the record
34
+ # @param key [String] key pointing to attribute of the record
28
35
  # @return [Object] value of the record on the key
29
36
  def ar_record_key(record, key)
30
37
  record.public_send(key)
@@ -33,7 +40,7 @@ module InventoryRefresh::SaveCollection
33
40
  # Attribute accessor to Hash object
34
41
  #
35
42
  # @param record [Hash] hash
36
- # @param key [Symbol] key pointing to attribute of the record
43
+ # @param key [String] key pointing to attribute of the record
37
44
  # @return [Object] value of the record on the key
38
45
  def pure_sql_record_key(record, key)
39
46
  record[select_keys_indexes[key]]
@@ -82,7 +89,7 @@ module InventoryRefresh::SaveCollection
82
89
  all_attribute_keys = Set.new + inventory_collection.batch_extra_attributes
83
90
 
84
91
  inventory_collection.each do |inventory_object|
85
- attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys)
92
+ attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys, inventory_object)
86
93
  index = build_stringified_reference(attributes, unique_index_keys)
87
94
 
88
95
  # Interesting fact: not building attributes_index and using only inventory_objects_index doesn't do much
@@ -91,11 +98,7 @@ module InventoryRefresh::SaveCollection
91
98
  inventory_objects_index[index] = inventory_object
92
99
  end
93
100
 
94
- all_attribute_keys << :created_at if supports_created_at?
95
- all_attribute_keys << :updated_at if supports_updated_at?
96
- all_attribute_keys << :created_on if supports_created_on?
97
- all_attribute_keys << :updated_on if supports_updated_on?
98
- all_attribute_keys << :type if supports_sti?
101
+ expand_all_attribute_keys!(all_attribute_keys)
99
102
 
100
103
  logger.debug("Processing #{inventory_collection} of size #{inventory_collection.size}...")
101
104
 
@@ -115,14 +118,19 @@ module InventoryRefresh::SaveCollection
115
118
  create_records!(all_attribute_keys, batch, attributes_index, :on_conflict => on_conflict)
116
119
  end
117
120
 
118
- # Let the GC clean this up
119
- inventory_objects_index = nil
120
- attributes_index = nil
121
-
122
121
  if inventory_collection.parallel_safe?
123
122
  create_or_update_partial_records(all_attribute_keys)
124
123
  end
125
124
  end
125
+
126
+ logger.debug("Marking :last_seen_at of #{inventory_collection} of size #{inventory_collection.size}...")
127
+
128
+ mark_last_seen_at(attributes_index)
129
+
130
+ # Let the GC clean this up
131
+ inventory_objects_index = nil
132
+ attributes_index = nil
133
+
126
134
  logger.debug("Processing #{inventory_collection}, "\
127
135
  "created=#{inventory_collection.created_records.count}, "\
128
136
  "updated=#{inventory_collection.updated_records.count}, "\
@@ -132,6 +140,28 @@ module InventoryRefresh::SaveCollection
132
140
  raise e
133
141
  end
134
142
 
143
+ def expand_all_attribute_keys!(all_attribute_keys)
144
+ %i(created_at updated_at created_on updated_on).each do |col|
145
+ all_attribute_keys << col if supports_column?(col)
146
+ end
147
+ all_attribute_keys << :type if supports_sti?
148
+ all_attribute_keys << :archived_at if supports_column?(:archived_at)
149
+ end
150
+
151
+ def mark_last_seen_at(attributes_index)
152
+ return unless supports_column?(:last_seen_at) && inventory_collection.parallel_safe?
153
+ return if attributes_index.blank?
154
+
155
+ all_attribute_keys = [:last_seen_at]
156
+
157
+ last_seen_at = Time.now.utc
158
+ attributes_index.each_value { |v| v[:last_seen_at] = last_seen_at }
159
+
160
+ query = build_partial_update_query(all_attribute_keys, attributes_index.values)
161
+
162
+ get_connection.execute(query)
163
+ end
164
+
135
165
  # Batch updates existing records that are in the DB using attributes_index. And delete the ones that were not
136
166
  # present in inventory_objects_index.
137
167
  #
@@ -157,7 +187,7 @@ module InventoryRefresh::SaveCollection
157
187
  index = db_columns_index(record)
158
188
 
159
189
  inventory_object = inventory_objects_index.delete(index)
160
- hash = attributes_index.delete(index)
190
+ hash = attributes_index[index]
161
191
 
162
192
  if inventory_object.nil?
163
193
  # Record was found in the DB but not sent for saving, that means it doesn't exist anymore and we should
@@ -167,8 +197,9 @@ module InventoryRefresh::SaveCollection
167
197
  end
168
198
  else
169
199
  # Record was found in the DB and sent for saving, we will be updating the DB.
170
- next unless assert_referential_integrity(hash)
171
200
  inventory_object.id = primary_key_value
201
+ next unless assert_referential_integrity(hash)
202
+ next unless changed?(record, hash, all_attribute_keys)
172
203
 
173
204
  if inventory_collection.parallel_safe? &&
174
205
  (supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys))
@@ -176,12 +207,12 @@ module InventoryRefresh::SaveCollection
176
207
  version_attr, max_version_attr = if supports_remote_data_timestamp?(all_attribute_keys)
177
208
  [:resource_timestamp, :resource_timestamps_max]
178
209
  elsif supports_remote_data_version?(all_attribute_keys)
179
- [:resource_version, :resource_versions_max]
210
+ [:resource_counter, :resource_counters_max]
180
211
  end
181
212
 
182
- next if skeletonize_or_skip_record(record.try(version_attr) || record.try(:[], version_attr),
213
+ next if skeletonize_or_skip_record(record_key(record, version_attr),
183
214
  hash[version_attr],
184
- record.try(max_version_attr) || record.try(:[], max_version_attr),
215
+ record_key(record, max_version_attr),
185
216
  inventory_object)
186
217
  end
187
218
 
@@ -231,6 +262,26 @@ module InventoryRefresh::SaveCollection
231
262
  records_for_destroy = [] # Cleanup so GC can release it sooner
232
263
  end
233
264
 
265
+ def changed?(_record, _hash, _all_attribute_keys)
266
+ return true unless inventory_collection.check_changed?
267
+
268
+ # TODO(lsmola) this check needs to be disabled now, because it doesn't work with lazy_find having secondary
269
+ # indexes. Examples: we save a pod before we save a project, that means the project lazy_find won't evaluate,
270
+ # because we load it with secondary index and can't do skeletal precreate. Then when the object is being saved
271
+ # again, the lazy_find is evaluated, but the resource version is not changed, so the row is not saved.
272
+ #
273
+ # To keep this quick .changed? check, we might need to extend this, so the resource_version doesn't save until
274
+ # all lazy_links of the row are evaluated.
275
+ #
276
+ # if supports_resource_version?(all_attribute_keys) && supports_column?(resource_version_column)
277
+ # record_resource_version = record_key(record, resource_version_column.to_s)
278
+ #
279
+ # return record_resource_version != hash[resource_version_column]
280
+ # end
281
+
282
+ true
283
+ end
284
+
234
285
  def db_columns_index(record, pure_sql: false)
235
286
  # Incoming values are in SQL string form.
236
287
  # TODO(lsmola) unify this behavior with object_index_with_keys method in InventoryCollection
@@ -261,39 +312,6 @@ module InventoryRefresh::SaveCollection
261
312
  end
262
313
  end
263
314
 
264
- # Deletes or sof-deletes records. If the model_class supports a custom class delete method, we will use it for
265
- # batch soft-delete.
266
- #
267
- # @param records [Array<ApplicationRecord, Hash>] Records we want to delete. If we have only hashes, we need to
268
- # to fetch ApplicationRecord objects from the DB
269
- def destroy_records!(records)
270
- return false unless inventory_collection.delete_allowed?
271
- return if records.blank?
272
-
273
- # Is the delete_method rails standard deleting method?
274
- rails_delete = %i(destroy delete).include?(inventory_collection.delete_method)
275
- if !rails_delete && inventory_collection.model_class.respond_to?(inventory_collection.delete_method)
276
- # We have custom delete method defined on a class, that means it supports batch destroy
277
- inventory_collection.store_deleted_records(records.map { |x| {:id => record_key(x, primary_key)} })
278
- inventory_collection.model_class.public_send(inventory_collection.delete_method, records.map { |x| record_key(x, primary_key) })
279
- else
280
- # We have either standard :destroy and :delete rails method, or custom instance level delete method
281
- # Note: The standard :destroy and :delete rails method can't be batched because of the hooks and cascade destroy
282
- ActiveRecord::Base.transaction do
283
- if pure_sql_records_fetching
284
- # For pure SQL fetching, we need to get the AR objects again, so we can call destroy
285
- inventory_collection.model_class.where(:id => records.map { |x| record_key(x, primary_key) }).find_each do |record|
286
- delete_record!(record)
287
- end
288
- else
289
- records.each do |record|
290
- delete_record!(record)
291
- end
292
- end
293
- end
294
- end
295
- end
296
-
297
315
  # Batch updates existing records
298
316
  #
299
317
  # @param hashes [Array<Hash>] data used for building a batch update sql query
@@ -319,153 +337,6 @@ module InventoryRefresh::SaveCollection
319
337
  result
320
338
  end
321
339
 
322
- # Taking result from update or upsert of the row. The records that were not saved will be turned into skeletal
323
- # records and we will save them attribute by attribute.
324
- #
325
- # @param hash [Hash{String => InventoryObject}>] Hash with indexed data we want to save
326
- # @param result [Array<Hash>] Result from the DB containing the data that were actually saved
327
- # @param all_unique_columns [Boolean] True if index is consisted from all columns of the unique index. False if
328
- # index is just made from manager_ref turned in DB column names.
329
- def skeletonize_ignored_records!(hash, result, all_unique_columns: false)
330
- updated = if all_unique_columns
331
- result.map { |x| unique_index_columns_to_s.map { |key| x[key] } }
332
- else
333
- result.map { |x| db_columns_index(x, :pure_sql => true) }
334
- end
335
-
336
- updated.each { |x| hash.delete(x) }
337
-
338
- # Now lets skeletonize all inventory_objects that were not saved by update or upsert. Old rows that can't be
339
- # saved are not being sent here. We have only rows that are new, but become old as we send the query (so other
340
- # parallel process saved the data in the meantime). Or if some attributes are newer than the whole row
341
- # being sent.
342
- hash.each_key do |db_index|
343
- inventory_collection.skeletal_primary_index.skeletonize_primary_index(hash[db_index].manager_uuid)
344
- end
345
- end
346
-
347
- # Saves partial records using upsert, taking records from skeletal_primary_index. This is used both for
348
- # skeletal precreate as well as for saving partial rows.
349
- #
350
- # @param all_attribute_keys [Set] Superset of all keys of all records being saved
351
- def create_or_update_partial_records(all_attribute_keys)
352
- skeletal_attributes_index = {}
353
- skeletal_inventory_objects_index = {}
354
-
355
- inventory_collection.skeletal_primary_index.each_value do |inventory_object|
356
- attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys)
357
- index = build_stringified_reference(attributes, unique_index_keys)
358
-
359
- skeletal_attributes_index[index] = attributes
360
- skeletal_inventory_objects_index[index] = inventory_object
361
- end
362
-
363
- if supports_remote_data_timestamp?(all_attribute_keys)
364
- all_attribute_keys << :resource_timestamps
365
- all_attribute_keys << :resource_timestamps_max
366
- elsif supports_remote_data_version?(all_attribute_keys)
367
- all_attribute_keys << :resource_versions
368
- all_attribute_keys << :resource_versions_max
369
- end
370
-
371
- indexed_inventory_objects = {}
372
- hashes = []
373
- create_time = time_now
374
-
375
- skeletal_inventory_objects_index.each do |index, inventory_object|
376
- hash = skeletal_attributes_index.delete(index)
377
- # Partial create or update must never set a timestamp for the whole row
378
- timestamps = if supports_remote_data_timestamp?(all_attribute_keys) && supports_resource_timestamps_max?
379
- assign_partial_row_version_attributes!(:resource_timestamp,
380
- :resource_timestamps,
381
- :resource_timestamps_max,
382
- hash,
383
- all_attribute_keys)
384
- elsif supports_remote_data_version?(all_attribute_keys) && supports_resource_versions_max?
385
- assign_partial_row_version_attributes!(:resource_version,
386
- :resource_versions,
387
- :resource_versions_max,
388
- hash,
389
- all_attribute_keys)
390
- end
391
- # Transform hash to DB format
392
- hash = transform_to_hash!(all_attribute_keys, hash)
393
-
394
- assign_attributes_for_create!(hash, create_time)
395
-
396
- next unless assert_referential_integrity(hash)
397
-
398
- hash[:__non_serialized_versions] = timestamps # store non serialized timestamps for the partial updates
399
- hashes << hash
400
- # Index on Unique Columns values, so we can easily fill in the :id later
401
- indexed_inventory_objects[unique_index_columns.map { |x| hash[x] }] = inventory_object
402
- end
403
-
404
- return if hashes.blank?
405
-
406
- # First, lets try to create all partial records
407
- hashes.each_slice(batch_size_for_persisting) do |batch|
408
- result = create_partial!(all_attribute_keys,
409
- batch,
410
- :on_conflict => :do_nothing)
411
- inventory_collection.store_created_records(result)
412
- end
413
-
414
- # We need only skeletal records with timestamp. We can't save the ones without timestamp, because e.g. skeletal
415
- # precreate would be updating records with default values, that are not correct.
416
- pre_filtered = hashes.select { |x| x[:resource_timestamps_max] || x[:resource_versions_max] }
417
-
418
- results = {}
419
- # TODO(lsmola) we don't need to process rows that were save by the create -> oncoflict do nothing
420
- (all_attribute_keys - inventory_collection.base_columns).each do |column_name|
421
- filtered = pre_filtered.select { |x| x.key?(column_name) }
422
-
423
- filtered.each_slice(batch_size_for_persisting) do |batch|
424
- # We need to set correct timestamps_max for this particular attribute, based on what is in timestamps
425
- if supports_remote_data_timestamp?(all_attribute_keys)
426
- batch.each { |x| x[:resource_timestamps_max] = x[:__non_serialized_versions][column_name] if x[:__non_serialized_versions][column_name] }
427
- elsif supports_remote_data_version?(all_attribute_keys)
428
- batch.each { |x| x[:resource_versions_max] = x[:__non_serialized_versions][column_name] if x[:__non_serialized_versions][column_name] }
429
- end
430
-
431
- result = create_partial!(inventory_collection.base_columns + [column_name],
432
- batch,
433
- :on_conflict => :do_update,
434
- :column_name => column_name)
435
- result.each do |res|
436
- results[res["id"]] = res
437
- end
438
- end
439
- end
440
-
441
- inventory_collection.store_updated_records(results.values)
442
-
443
- # TODO(lsmola) we need to move here the hash loading ar object etc. otherwise the lazy_find with key will not
444
- # be correct
445
- if inventory_collection.dependees.present?
446
- # We need to get primary keys of the created objects, but only if there are dependees that would use them
447
- map_ids_to_inventory_objects(indexed_inventory_objects,
448
- all_attribute_keys,
449
- hashes,
450
- nil,
451
- :on_conflict => :do_nothing)
452
- end
453
- end
454
-
455
- # Batch upserts 1 data column of the row, plus the internal columns
456
- #
457
- # @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
458
- # @param hashes [Array<InventoryRefresh::InventoryObject>] Array of InventoryObject objects we will be inserting
459
- # into the DB
460
- # @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values
461
- # are :do_update, :do_nothing, nil
462
- # @param column_name [Symbol] Name of the data column we will be upserting
463
- def create_partial!(all_attribute_keys, hashes, on_conflict: nil, column_name: nil)
464
- get_connection.execute(
465
- build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => :partial, :column_name => column_name)
466
- )
467
- end
468
-
469
340
  # Batch inserts records using attributes_index data. With on_conflict option using :do_update, this method
470
341
  # does atomic upsert.
471
342
  #
@@ -510,10 +381,10 @@ module InventoryRefresh::SaveCollection
510
381
  if inventory_collection.parallel_safe?
511
382
  # We've done upsert, so records were either created or updated. We can recognize that by checking if
512
383
  # created and updated timestamps are the same
513
- created_attr = "created_on" if inventory_collection.supports_created_on?
514
- created_attr ||= "created_at" if inventory_collection.supports_created_at?
515
- updated_attr = "updated_on" if inventory_collection.supports_updated_on?
516
- updated_attr ||= "updated_at" if inventory_collection.supports_updated_at?
384
+ created_attr = "created_on" if inventory_collection.supports_column?(:created_on)
385
+ created_attr ||= "created_at" if inventory_collection.supports_column?(:created_at)
386
+ updated_attr = "updated_on" if inventory_collection.supports_column?(:updated_on)
387
+ updated_attr ||= "updated_at" if inventory_collection.supports_column?(:updated_at)
517
388
 
518
389
  if created_attr && updated_attr
519
390
  created, updated = result.to_a.partition { |x| x[created_attr] == x[updated_attr] }
@@ -604,29 +475,6 @@ module InventoryRefresh::SaveCollection
604
475
  end
605
476
  end
606
477
  end
607
-
608
- def skeletonize_or_skip_record(record_version, hash_version, record_versions_max, inventory_object)
609
- # Skip updating this record, because it is old
610
- return true if record_version && hash_version && record_version >= hash_version
611
-
612
- # Some column has bigger version than the whole row, we need to store the row partially
613
- if record_versions_max && hash_version && record_versions_max > hash_version
614
- inventory_collection.skeletal_primary_index.skeletonize_primary_index(inventory_object.manager_uuid)
615
- return true
616
- end
617
-
618
- false
619
- end
620
-
621
- def assign_partial_row_version_attributes!(full_row_version_attr, partial_row_version_attr,
622
- partial_row_version_attr_max, hash, all_attribute_keys)
623
- hash[partial_row_version_attr_max] = hash.delete(full_row_version_attr)
624
-
625
- if hash[partial_row_version_attr].present?
626
- # Lets clean to only what we save, since when we build the skeletal object, we can set more
627
- hash[partial_row_version_attr] = hash[partial_row_version_attr].slice(*all_attribute_keys)
628
- end
629
- end
630
478
  end
631
479
  end
632
480
  end
@@ -0,0 +1,226 @@
1
+ module InventoryRefresh::SaveCollection
2
+ module Saver
3
+ module PartialUpsertHelper
4
+ private
5
+
6
+ # Taking result from update or upsert of the row. The records that were not saved will be turned into skeletal
7
+ # records and we will save them attribute by attribute.
8
+ #
9
+ # @param hash [Hash{String => InventoryObject}>] Hash with indexed data we want to save
10
+ # @param result [Array<Hash>] Result from the DB containing the data that were actually saved
11
+ # @param all_unique_columns [Boolean] True if index is consisted from all columns of the unique index. False if
12
+ # index is just made from manager_ref turned in DB column names.
13
+ def skeletonize_ignored_records!(hash, result, all_unique_columns: false)
14
+ updated = if all_unique_columns
15
+ result.map { |x| unique_index_columns_to_s.map { |key| x[key] } }
16
+ else
17
+ result.map { |x| db_columns_index(x, :pure_sql => true) }
18
+ end
19
+
20
+ updated.each { |x| hash.delete(x) }
21
+
22
+ # Now lets skeletonize all inventory_objects that were not saved by update or upsert. Old rows that can't be
23
+ # saved are not being sent here. We have only rows that are new, but become old as we send the query (so other
24
+ # parallel process saved the data in the meantime). Or if some attributes are newer than the whole row
25
+ # being sent.
26
+ hash.each_key do |db_index|
27
+ inventory_collection.skeletal_primary_index.skeletonize_primary_index(hash[db_index].manager_uuid)
28
+ end
29
+ end
30
+
31
+ # Saves partial records using upsert, taking records from skeletal_primary_index. This is used both for
32
+ # skeletal precreate as well as for saving partial rows.
33
+ #
34
+ # @param all_attribute_keys [Set] Superset of all keys of all records being saved
35
+ def create_or_update_partial_records(all_attribute_keys)
36
+ skeletal_inventory_objects_index, skeletal_attributes_index = load_partial_attributes(all_attribute_keys)
37
+
38
+ indexed_inventory_objects, hashes = process_partial_data(skeletal_inventory_objects_index,
39
+ skeletal_attributes_index,
40
+ all_attribute_keys)
41
+ return if hashes.blank?
42
+
43
+ processed_record_refs = skeletal_precreate!(hashes, all_attribute_keys)
44
+ hashes_for_update = hashes_for_update(hashes, processed_record_refs)
45
+ partial_updates!(hashes_for_update, all_attribute_keys)
46
+
47
+ if inventory_collection.dependees.present?
48
+ # We need to get primary keys of the created objects, but only if there are dependees that would use them
49
+ map_ids_to_inventory_objects(indexed_inventory_objects,
50
+ all_attribute_keys,
51
+ hashes,
52
+ nil,
53
+ :on_conflict => :do_nothing)
54
+ end
55
+ end
56
+
57
+ def load_partial_attributes(all_attribute_keys)
58
+ skeletal_attributes_index = {}
59
+ skeletal_inventory_objects_index = {}
60
+
61
+ inventory_collection.skeletal_primary_index.each_value do |inventory_object|
62
+ attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys, inventory_object)
63
+ index = build_stringified_reference(attributes, unique_index_keys)
64
+
65
+ skeletal_attributes_index[index] = attributes
66
+ skeletal_inventory_objects_index[index] = inventory_object
67
+ end
68
+
69
+ if supports_remote_data_timestamp?(all_attribute_keys)
70
+ all_attribute_keys << :resource_timestamps
71
+ all_attribute_keys << :resource_timestamps_max
72
+ elsif supports_remote_data_version?(all_attribute_keys)
73
+ all_attribute_keys << :resource_counters
74
+ all_attribute_keys << :resource_counters_max
75
+ end
76
+
77
+ # We cannot set the resource_version doing partial update
78
+ all_attribute_keys.delete(resource_version_column)
79
+
80
+ return skeletal_inventory_objects_index, skeletal_attributes_index
81
+ end
82
+
83
+ def hashes_for_update(hashes, processed_record_refs)
84
+ indexed_hashes = hashes.each_with_object({}) { |hash, obj| obj[unique_index_columns.map { |x| hash[x] }] = hash }
85
+ indexed_hashes.except!(*processed_record_refs)
86
+ hashes_for_update = indexed_hashes.values
87
+
88
+ # We need only skeletal records with timestamp. We can't save the ones without timestamp, because e.g. skeletal
89
+ # precreate would be updating records with default values, that are not correct.
90
+ hashes_for_update.select { |x| x[:resource_timestamps_max] || x[:resource_counters_max] }
91
+ end
92
+
93
+ def skeletal_precreate!(hashes, all_attribute_keys)
94
+ processed_record_refs = []
95
+ # First, lets try to create all partial records
96
+ hashes.each_slice(batch_size_for_persisting) do |batch|
97
+ result = create_partial!(all_attribute_keys,
98
+ batch,
99
+ :on_conflict => :do_nothing)
100
+ inventory_collection.store_created_records(result)
101
+ # Store refs of created records, so we can ignore them for update
102
+ result.each { |hash| processed_record_refs << unique_index_columns.map { |x| hash[x.to_s] } }
103
+ end
104
+
105
+ processed_record_refs
106
+ end
107
+
108
+ def partial_updates!(hashes, all_attribute_keys)
109
+ results = {}
110
+ (all_attribute_keys - inventory_collection.base_columns).each do |column_name|
111
+ filtered = hashes.select { |x| x.key?(column_name) }
112
+
113
+ filtered.each_slice(batch_size_for_persisting) do |batch|
114
+ partial_update!(batch, all_attribute_keys, column_name, results)
115
+ end
116
+ end
117
+
118
+ inventory_collection.store_updated_records(results.values)
119
+ end
120
+
121
+ def partial_update!(batch, all_attribute_keys, column_name, results)
122
+ fill_comparables_max!(batch, all_attribute_keys, column_name)
123
+ result = create_partial!((inventory_collection.base_columns + [column_name]).to_set & all_attribute_keys,
124
+ batch,
125
+ :on_conflict => :do_update,
126
+ :column_name => column_name)
127
+ result.each do |res|
128
+ results[res["id"]] = res
129
+ end
130
+ end
131
+
132
+ def fill_comparables_max!(batch, all_attribute_keys, column_name)
133
+ comparables_max_name = comparable_max_column_name(all_attribute_keys)
134
+
135
+ # We need to set correct timestamps_max for this particular attribute, based on what is in timestamps
136
+ batch.each do |x|
137
+ next unless x[:__non_serialized_versions][column_name]
138
+ x[comparables_max_name] = x[:__non_serialized_versions][column_name]
139
+ end
140
+ end
141
+
142
+ def comparable_max_column_name(all_attribute_keys)
143
+ if supports_remote_data_timestamp?(all_attribute_keys)
144
+ :resource_timestamps_max
145
+ elsif supports_remote_data_version?(all_attribute_keys)
146
+ :resource_counters_max
147
+ end
148
+ end
149
+
150
+ def process_partial_data(skeletal_inventory_objects_index, skeletal_attributes_index, all_attribute_keys)
151
+ indexed_inventory_objects = {}
152
+ hashes = []
153
+ create_time = time_now
154
+
155
+ skeletal_inventory_objects_index.each do |index, inventory_object|
156
+ hash = prepare_partial_hash(skeletal_attributes_index.delete(index), all_attribute_keys, create_time)
157
+ next unless assert_referential_integrity(hash)
158
+
159
+ hashes << hash
160
+ # Index on Unique Columns values, so we can easily fill in the :id later
161
+ indexed_inventory_objects[unique_index_columns.map { |x| hash[x] }] = inventory_object
162
+ end
163
+
164
+ return indexed_inventory_objects, hashes
165
+ end
166
+
167
+ def prepare_partial_hash(hash, all_attribute_keys, create_time)
168
+ # Partial create or update must never set a timestamp for the whole row
169
+ timestamps = if supports_remote_data_timestamp?(all_attribute_keys) && supports_column?(:resource_timestamps_max)
170
+ assign_partial_row_version_attributes!(:resource_timestamp,
171
+ :resource_timestamps,
172
+ hash,
173
+ all_attribute_keys)
174
+ elsif supports_remote_data_version?(all_attribute_keys) && supports_column?(:resource_counters_max)
175
+ assign_partial_row_version_attributes!(:resource_counter,
176
+ :resource_counters,
177
+ hash,
178
+ all_attribute_keys)
179
+ end
180
+ # Transform hash to DB format
181
+ hash = transform_to_hash!(all_attribute_keys, hash)
182
+
183
+ assign_attributes_for_create!(hash, create_time)
184
+
185
+ hash[:__non_serialized_versions] = timestamps # store non serialized timestamps for the partial updates
186
+ hash
187
+ end
188
+
189
+ # Batch upserts 1 data column of the row, plus the internal columns
190
+ #
191
+ # @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
192
+ # @param hashes [Array<InventoryRefresh::InventoryObject>] Array of InventoryObject objects we will be inserting
193
+ # into the DB
194
+ # @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values
195
+ # are :do_update, :do_nothing, nil
196
+ # @param column_name [Symbol] Name of the data column we will be upserting
197
+ def create_partial!(all_attribute_keys, hashes, on_conflict: nil, column_name: nil)
198
+ get_connection.execute(
199
+ build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => :partial, :column_name => column_name)
200
+ )
201
+ end
202
+
203
+ def skeletonize_or_skip_record(record_version, hash_version, record_versions_max, inventory_object)
204
+ # Skip updating this record, because it is old
205
+ return true if record_version && hash_version && record_version >= hash_version
206
+
207
+ # Some column has bigger version than the whole row, we need to store the row partially
208
+ if record_versions_max && hash_version && record_versions_max > hash_version
209
+ inventory_collection.skeletal_primary_index.skeletonize_primary_index(inventory_object.manager_uuid)
210
+ return true
211
+ end
212
+
213
+ false
214
+ end
215
+
216
+ def assign_partial_row_version_attributes!(full_row_version_attr, partial_row_version_attr, hash, all_attribute_keys)
217
+ hash[comparable_max_column_name(all_attribute_keys)] = hash.delete(full_row_version_attr)
218
+
219
+ return if hash[partial_row_version_attr].blank?
220
+
221
+ # Lets clean to only what we save, since when we build the skeletal object, we can set more
222
+ hash[partial_row_version_attr] = hash[partial_row_version_attr].slice(*all_attribute_keys)
223
+ end
224
+ end
225
+ end
226
+ end