inventory_refresh 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +7 -0
  2. data/.codeclimate.yml +47 -0
  3. data/.gitignore +13 -0
  4. data/.rspec +4 -0
  5. data/.rspec_ci +4 -0
  6. data/.rubocop.yml +4 -0
  7. data/.rubocop_cc.yml +5 -0
  8. data/.rubocop_local.yml +2 -0
  9. data/.travis.yml +12 -0
  10. data/.yamllint +12 -0
  11. data/CHANGELOG.md +0 -0
  12. data/Gemfile +6 -0
  13. data/LICENSE +202 -0
  14. data/README.md +35 -0
  15. data/Rakefile +47 -0
  16. data/bin/console +14 -0
  17. data/bin/setup +8 -0
  18. data/inventory_refresh.gemspec +34 -0
  19. data/lib/inventory_refresh.rb +11 -0
  20. data/lib/inventory_refresh/application_record_iterator.rb +56 -0
  21. data/lib/inventory_refresh/application_record_reference.rb +15 -0
  22. data/lib/inventory_refresh/graph.rb +157 -0
  23. data/lib/inventory_refresh/graph/topological_sort.rb +66 -0
  24. data/lib/inventory_refresh/inventory_collection.rb +1175 -0
  25. data/lib/inventory_refresh/inventory_collection/data_storage.rb +178 -0
  26. data/lib/inventory_refresh/inventory_collection/graph.rb +170 -0
  27. data/lib/inventory_refresh/inventory_collection/index/proxy.rb +230 -0
  28. data/lib/inventory_refresh/inventory_collection/index/type/base.rb +80 -0
  29. data/lib/inventory_refresh/inventory_collection/index/type/data.rb +26 -0
  30. data/lib/inventory_refresh/inventory_collection/index/type/local_db.rb +286 -0
  31. data/lib/inventory_refresh/inventory_collection/index/type/skeletal.rb +116 -0
  32. data/lib/inventory_refresh/inventory_collection/reference.rb +96 -0
  33. data/lib/inventory_refresh/inventory_collection/references_storage.rb +106 -0
  34. data/lib/inventory_refresh/inventory_collection/scanner.rb +117 -0
  35. data/lib/inventory_refresh/inventory_collection/serialization.rb +140 -0
  36. data/lib/inventory_refresh/inventory_object.rb +303 -0
  37. data/lib/inventory_refresh/inventory_object_lazy.rb +151 -0
  38. data/lib/inventory_refresh/save_collection/base.rb +38 -0
  39. data/lib/inventory_refresh/save_collection/recursive.rb +52 -0
  40. data/lib/inventory_refresh/save_collection/saver/base.rb +390 -0
  41. data/lib/inventory_refresh/save_collection/saver/batch.rb +17 -0
  42. data/lib/inventory_refresh/save_collection/saver/concurrent_safe.rb +71 -0
  43. data/lib/inventory_refresh/save_collection/saver/concurrent_safe_batch.rb +632 -0
  44. data/lib/inventory_refresh/save_collection/saver/default.rb +57 -0
  45. data/lib/inventory_refresh/save_collection/saver/sql_helper.rb +85 -0
  46. data/lib/inventory_refresh/save_collection/saver/sql_helper_update.rb +120 -0
  47. data/lib/inventory_refresh/save_collection/saver/sql_helper_upsert.rb +196 -0
  48. data/lib/inventory_refresh/save_collection/topological_sort.rb +38 -0
  49. data/lib/inventory_refresh/save_inventory.rb +38 -0
  50. data/lib/inventory_refresh/target.rb +73 -0
  51. data/lib/inventory_refresh/target_collection.rb +80 -0
  52. data/lib/inventory_refresh/version.rb +3 -0
  53. data/tools/ci/create_db_user.sh +3 -0
  54. metadata +207 -0
@@ -0,0 +1,17 @@
1
+ require "inventory_refresh/save_collection/saver/concurrent_safe_batch"
2
+
3
+ module InventoryRefresh::SaveCollection
4
+ module Saver
5
+ class Batch < InventoryRefresh::SaveCollection::Saver::ConcurrentSafeBatch
6
+ private
7
+
8
+ # Just returning manager ref transformed to column names, for strategies that do not expect to have unique DB
9
+ # indexes.
10
+ #
11
+ # @return [Array<Symbol>] manager ref transformed to column names
12
+ def unique_index_columns
13
+ inventory_collection.manager_ref_to_cols.map(&:to_sym)
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,71 @@
1
+ require "inventory_refresh/save_collection/saver/base"
2
+
3
+ module InventoryRefresh::SaveCollection
4
+ module Saver
5
+ class ConcurrentSafe < InventoryRefresh::SaveCollection::Saver::Base
6
+ # TODO(lsmola) this strategy does not make much sense, it's better to use concurent_safe_batch and make batch size
7
+ # configurable
8
+ private
9
+
10
+ # Updates the passed record with hash data and stores primary key value into inventory_object.
11
+ #
12
+ # @param record [ApplicationRecord] record we want to update in DB
13
+ # @param hash [Hash] data we want to update the record with
14
+ # @param inventory_object [InventoryRefresh::InventoryObject] InventoryObject instance where we will store primary
15
+ # key value
16
+ def update_record!(record, hash, inventory_object)
17
+ assign_attributes_for_update!(hash, time_now)
18
+ record.assign_attributes(hash.except(:id))
19
+
20
+ if !inventory_object.inventory_collection.check_changed? || record.changed?
21
+ update_query = inventory_object.inventory_collection.model_class.where(:id => record.id)
22
+ if hash[:remote_data_timestamp]
23
+ timestamp_field = inventory_collection.model_class.arel_table[:remote_data_timestamp]
24
+ update_query = update_query.where(timestamp_field.lt(hash[:remote_data_timestamp]))
25
+ end
26
+
27
+ update_query.update_all(hash)
28
+ inventory_collection.store_updated_records(record)
29
+ end
30
+
31
+ inventory_object.id = record.id
32
+ end
33
+
34
+ # Creates a new record in the DB using the passed hash data
35
+ #
36
+ # @param hash [Hash] hash with data we want to persist to DB
37
+ # @param inventory_object [InventoryRefresh::InventoryObject] InventoryObject instance where we will store primary
38
+ # key value
39
+ def create_record!(hash, inventory_object)
40
+ all_attribute_keys = hash.keys
41
+ data = inventory_collection.model_class.new(hash).attributes.symbolize_keys
42
+
43
+ # TODO(lsmola) abstract common behavior into base class
44
+ all_attribute_keys << :type if supports_sti?
45
+ all_attribute_keys << :created_at if supports_created_at?
46
+ all_attribute_keys << :updated_at if supports_updated_at?
47
+ all_attribute_keys << :created_on if supports_created_on?
48
+ all_attribute_keys << :updated_on if supports_updated_on?
49
+ hash_for_creation = if inventory_collection.use_ar_object?
50
+ record = inventory_collection.model_class.new(data)
51
+ values_for_database!(all_attribute_keys,
52
+ record.attributes.symbolize_keys)
53
+ elsif serializable_keys?
54
+ values_for_database!(all_attribute_keys,
55
+ data)
56
+ else
57
+ data
58
+ end
59
+
60
+ assign_attributes_for_create!(hash_for_creation, time_now)
61
+
62
+ result_id = ActiveRecord::Base.connection.execute(
63
+ build_insert_query(all_attribute_keys, [hash_for_creation])
64
+ )
65
+
66
+ inventory_object.id = result_id.to_a.try(:first).try(:[], "id")
67
+ inventory_collection.store_created_records(inventory_object)
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,632 @@
1
+ require "inventory_refresh/save_collection/saver/base"
2
+ require "active_support/core_ext/module/delegation"
3
+
4
+ module InventoryRefresh::SaveCollection
5
+ module Saver
6
+ class ConcurrentSafeBatch < InventoryRefresh::SaveCollection::Saver::Base
7
+ private
8
+
9
+ delegate :association_to_base_class_mapping,
10
+ :association_to_foreign_key_mapping,
11
+ :association_to_foreign_type_mapping,
12
+ :attribute_references,
13
+ :to => :inventory_collection
14
+
15
+ # Attribute accessor to ApplicationRecord object or Hash
16
+ #
17
+ # @param record [Hash, ApplicationRecord] record or hash
18
+ # @param key [Symbol] key pointing to attribute of the record
19
+ # @return [Object] value of the record on the key
20
+ def record_key(record, key)
21
+ send(record_key_method, record, key)
22
+ end
23
+
24
+ # Attribute accessor to ApplicationRecord object
25
+ #
26
+ # @param record [ApplicationRecord] record
27
+ # @param key [Symbol] key pointing to attribute of the record
28
+ # @return [Object] value of the record on the key
29
+ def ar_record_key(record, key)
30
+ record.public_send(key)
31
+ end
32
+
33
+ # Attribute accessor to Hash object
34
+ #
35
+ # @param record [Hash] hash
36
+ # @param key [Symbol] key pointing to attribute of the record
37
+ # @return [Object] value of the record on the key
38
+ def pure_sql_record_key(record, key)
39
+ record[select_keys_indexes[key]]
40
+ end
41
+
42
+ # Returns iterator or relation based on settings
43
+ #
44
+ # @param association [Symbol] An existing association on manager
45
+ # @return [ActiveRecord::Relation, InventoryRefresh::ApplicationRecordIterator] iterator or relation based on settings
46
+ def batch_iterator(association)
47
+ if pure_sql_records_fetching
48
+ # Building fast iterator doing pure SQL query and therefore avoiding redundant creation of AR objects. The
49
+ # iterator responds to find_in_batches, so it acts like the AR relation. For targeted refresh, the association
50
+ # can already be ApplicationRecordIterator, so we will skip that.
51
+ pure_sql_iterator = lambda do |&block|
52
+ primary_key_offset = nil
53
+ loop do
54
+ relation = association.select(*select_keys)
55
+ .reorder("#{primary_key} ASC")
56
+ .limit(batch_size)
57
+ # Using rails way of comparing primary key instead of offset
58
+ relation = relation.where(arel_primary_key.gt(primary_key_offset)) if primary_key_offset
59
+ records = get_connection.query(relation.to_sql)
60
+ last_record = records.last
61
+ block.call(records)
62
+
63
+ break if records.size < batch_size
64
+ primary_key_offset = record_key(last_record, primary_key)
65
+ end
66
+ end
67
+
68
+ InventoryRefresh::ApplicationRecordIterator.new(:iterator => pure_sql_iterator)
69
+ else
70
+ # Normal Rails ActiveRecord::Relation where we can call find_in_batches or
71
+ # InventoryRefresh::ApplicationRecordIterator passed from targeted refresh
72
+ association
73
+ end
74
+ end
75
+
76
+ # Saves the InventoryCollection
77
+ #
78
+ # @param association [Symbol] An existing association on manager
79
+ def save!(association)
80
+ attributes_index = {}
81
+ inventory_objects_index = {}
82
+ all_attribute_keys = Set.new + inventory_collection.batch_extra_attributes
83
+
84
+ inventory_collection.each do |inventory_object|
85
+ attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys)
86
+ index = build_stringified_reference(attributes, unique_index_keys)
87
+
88
+ # Interesting fact: not building attributes_index and using only inventory_objects_index doesn't do much
89
+ # of a difference, since the most objects inside are shared.
90
+ attributes_index[index] = attributes
91
+ inventory_objects_index[index] = inventory_object
92
+ end
93
+
94
+ all_attribute_keys << :created_at if supports_created_at?
95
+ all_attribute_keys << :updated_at if supports_updated_at?
96
+ all_attribute_keys << :created_on if supports_created_on?
97
+ all_attribute_keys << :updated_on if supports_updated_on?
98
+ all_attribute_keys << :type if supports_sti?
99
+
100
+ #_log.debug("Processing #{inventory_collection} of size #{inventory_collection.size}...")
101
+
102
+ unless inventory_collection.create_only?
103
+ update_or_destroy_records!(batch_iterator(association), inventory_objects_index, attributes_index, all_attribute_keys)
104
+ end
105
+
106
+ unless inventory_collection.create_only?
107
+ inventory_collection.custom_reconnect_block&.call(inventory_collection, inventory_objects_index, attributes_index)
108
+ end
109
+
110
+ # Records that were not found in the DB but sent for saving, we will be creating these in the DB.
111
+ if inventory_collection.create_allowed?
112
+ on_conflict = inventory_collection.parallel_safe? ? :do_update : nil
113
+
114
+ inventory_objects_index.each_slice(batch_size_for_persisting) do |batch|
115
+ create_records!(all_attribute_keys, batch, attributes_index, :on_conflict => on_conflict)
116
+ end
117
+
118
+ # Let the GC clean this up
119
+ inventory_objects_index = nil
120
+ attributes_index = nil
121
+
122
+ if inventory_collection.parallel_safe?
123
+ create_or_update_partial_records(all_attribute_keys)
124
+ end
125
+ end
126
+ #_log.debug("Processing #{inventory_collection}, "\
127
+ # "created=#{inventory_collection.created_records.count}, "\
128
+ # "updated=#{inventory_collection.updated_records.count}, "\
129
+ # "deleted=#{inventory_collection.deleted_records.count}...Complete")
130
+ rescue => e
131
+ #_log.error("Error when saving #{inventory_collection} with #{inventory_collection_details}. Message: #{e.message}")
132
+ raise e
133
+ end
134
+
135
+ # Batch updates existing records that are in the DB using attributes_index. And delete the ones that were not
136
+ # present in inventory_objects_index.
137
+ #
138
+ # @param records_batch_iterator [ActiveRecord::Relation, InventoryRefresh::ApplicationRecordIterator] iterator or
139
+ # relation, both responding to :find_in_batches method
140
+ # @param inventory_objects_index [Hash{String => InventoryRefresh::InventoryObject}] Hash of InventoryObject objects
141
+ # @param attributes_index [Hash{String => Hash}] Hash of data hashes with only keys that are column names of the
142
+ # models's table
143
+ # @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
144
+ def update_or_destroy_records!(records_batch_iterator, inventory_objects_index, attributes_index, all_attribute_keys)
145
+ hashes_for_update = []
146
+ records_for_destroy = []
147
+ indexed_inventory_objects = {}
148
+
149
+ records_batch_iterator.find_in_batches(:batch_size => batch_size) do |batch|
150
+ update_time = time_now
151
+
152
+ batch.each do |record|
153
+ primary_key_value = record_key(record, primary_key)
154
+
155
+ next unless assert_distinct_relation(primary_key_value)
156
+
157
+ index = db_columns_index(record)
158
+
159
+ inventory_object = inventory_objects_index.delete(index)
160
+ hash = attributes_index.delete(index)
161
+
162
+ if inventory_object.nil?
163
+ # Record was found in the DB but not sent for saving, that means it doesn't exist anymore and we should
164
+ # delete it from the DB.
165
+ if inventory_collection.delete_allowed?
166
+ records_for_destroy << record
167
+ end
168
+ else
169
+ # Record was found in the DB and sent for saving, we will be updating the DB.
170
+ next unless assert_referential_integrity(hash)
171
+ inventory_object.id = primary_key_value
172
+
173
+ if inventory_collection.parallel_safe? &&
174
+ (supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys))
175
+
176
+ version_attr, max_version_attr = if supports_remote_data_timestamp?(all_attribute_keys)
177
+ [:resource_timestamp, :resource_timestamps_max]
178
+ elsif supports_remote_data_version?(all_attribute_keys)
179
+ [:resource_version, :resource_versions_max]
180
+ end
181
+
182
+ next if skeletonize_or_skip_record(record.try(version_attr) || record.try(:[], version_attr),
183
+ hash[version_attr],
184
+ record.try(max_version_attr) || record.try(:[], max_version_attr),
185
+ inventory_object)
186
+ end
187
+
188
+ hash_for_update = if inventory_collection.use_ar_object?
189
+ record.assign_attributes(hash.except(:id))
190
+ values_for_database!(all_attribute_keys,
191
+ record.attributes.symbolize_keys)
192
+ elsif serializable_keys?
193
+ # TODO(lsmola) hash data with current DB data to allow subset of data being sent,
194
+ # otherwise we would nullify the not sent attributes. Test e.g. on disks in cloud
195
+ values_for_database!(all_attribute_keys,
196
+ hash)
197
+ else
198
+ # TODO(lsmola) hash data with current DB data to allow subset of data being sent,
199
+ # otherwise we would nullify the not sent attributes. Test e.g. on disks in cloud
200
+ hash
201
+ end
202
+ assign_attributes_for_update!(hash_for_update, update_time)
203
+
204
+ hash_for_update[:id] = primary_key_value
205
+ indexed_inventory_objects[index] = inventory_object
206
+ hashes_for_update << hash_for_update
207
+ end
208
+ end
209
+
210
+ # Update in batches
211
+ if hashes_for_update.size >= batch_size_for_persisting
212
+ update_records!(all_attribute_keys, hashes_for_update, indexed_inventory_objects)
213
+
214
+ hashes_for_update = []
215
+ indexed_inventory_objects = {}
216
+ end
217
+
218
+ # Destroy in batches
219
+ if records_for_destroy.size >= batch_size_for_persisting
220
+ destroy_records!(records_for_destroy)
221
+ records_for_destroy = []
222
+ end
223
+ end
224
+
225
+ # Update the last batch
226
+ update_records!(all_attribute_keys, hashes_for_update, indexed_inventory_objects)
227
+ hashes_for_update = [] # Cleanup so GC can release it sooner
228
+
229
+ # Destroy the last batch
230
+ destroy_records!(records_for_destroy)
231
+ records_for_destroy = [] # Cleanup so GC can release it sooner
232
+ end
233
+
234
+ def db_columns_index(record, pure_sql: false)
235
+ # Incoming values are in SQL string form.
236
+ # TODO(lsmola) unify this behavior with object_index_with_keys method in InventoryCollection
237
+ # TODO(lsmola) maybe we can drop the whole pure sql fetching, since everything will be targeted refresh
238
+ # with streaming refresh? Maybe just metrics and events will not be, but those should be upsert only
239
+ # TODO(lsmola) taking ^ in account, we can't drop pure sql, since that is returned by batch insert and
240
+ # update queries
241
+ unique_index_keys_to_s.map do |attribute|
242
+ value = if pure_sql
243
+ record[attribute]
244
+ else
245
+ record_key(record, attribute)
246
+ end
247
+
248
+ format_value(attribute, value)
249
+ end.join("__")
250
+ end
251
+
252
+ def format_value(attribute, value)
253
+ if attribute == "timestamp"
254
+ # TODO: can this be covered by @deserializable_keys?
255
+ type = model_class.type_for_attribute(attribute)
256
+ type.cast(value).utc.iso8601.to_s
257
+ elsif (type = deserializable_keys[attribute.to_sym])
258
+ type.deserialize(value).to_s
259
+ else
260
+ value.to_s
261
+ end
262
+ end
263
+
264
+ # Deletes or sof-deletes records. If the model_class supports a custom class delete method, we will use it for
265
+ # batch soft-delete.
266
+ #
267
+ # @param records [Array<ApplicationRecord, Hash>] Records we want to delete. If we have only hashes, we need to
268
+ # to fetch ApplicationRecord objects from the DB
269
+ def destroy_records!(records)
270
+ return false unless inventory_collection.delete_allowed?
271
+ return if records.blank?
272
+
273
+ # Is the delete_method rails standard deleting method?
274
+ rails_delete = %i(destroy delete).include?(inventory_collection.delete_method)
275
+ if !rails_delete && inventory_collection.model_class.respond_to?(inventory_collection.delete_method)
276
+ # We have custom delete method defined on a class, that means it supports batch destroy
277
+ inventory_collection.store_deleted_records(records.map { |x| {:id => record_key(x, primary_key)} })
278
+ inventory_collection.model_class.public_send(inventory_collection.delete_method, records.map { |x| record_key(x, primary_key) })
279
+ else
280
+ # We have either standard :destroy and :delete rails method, or custom instance level delete method
281
+ # Note: The standard :destroy and :delete rails method can't be batched because of the hooks and cascade destroy
282
+ ActiveRecord::Base.transaction do
283
+ if pure_sql_records_fetching
284
+ # For pure SQL fetching, we need to get the AR objects again, so we can call destroy
285
+ inventory_collection.model_class.where(:id => records.map { |x| record_key(x, primary_key) }).find_each do |record|
286
+ delete_record!(record)
287
+ end
288
+ else
289
+ records.each do |record|
290
+ delete_record!(record)
291
+ end
292
+ end
293
+ end
294
+ end
295
+ end
296
+
297
+ # Batch updates existing records
298
+ #
299
+ # @param hashes [Array<Hash>] data used for building a batch update sql query
300
+ # @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
301
+ def update_records!(all_attribute_keys, hashes, indexed_inventory_objects)
302
+ return if hashes.blank?
303
+
304
+ unless inventory_collection.parallel_safe?
305
+ # We need to update the stored records before we save it, since hashes are modified
306
+ inventory_collection.store_updated_records(hashes)
307
+ end
308
+
309
+ query = build_update_query(all_attribute_keys, hashes)
310
+ result = get_connection.execute(query)
311
+
312
+ if inventory_collection.parallel_safe?
313
+ # We will check for timestamp clashes of full row update and we will fallback to skeletal update
314
+ inventory_collection.store_updated_records(result)
315
+
316
+ skeletonize_ignored_records!(indexed_inventory_objects, result)
317
+ end
318
+
319
+ result
320
+ end
321
+
322
+ # Taking result from update or upsert of the row. The records that were not saved will be turned into skeletal
323
+ # records and we will save them attribute by attribute.
324
+ #
325
+ # @param hash [Hash{String => InventoryObject}>] Hash with indexed data we want to save
326
+ # @param result [Array<Hash>] Result from the DB containing the data that were actually saved
327
+ # @param all_unique_columns [Boolean] True if index is consisted from all columns of the unique index. False if
328
+ # index is just made from manager_ref turned in DB column names.
329
+ def skeletonize_ignored_records!(hash, result, all_unique_columns: false)
330
+ updated = if all_unique_columns
331
+ result.map { |x| unique_index_columns_to_s.map { |key| x[key] } }
332
+ else
333
+ result.map { |x| db_columns_index(x, :pure_sql => true) }
334
+ end
335
+
336
+ updated.each { |x| hash.delete(x) }
337
+
338
+ # Now lets skeletonize all inventory_objects that were not saved by update or upsert. Old rows that can't be
339
+ # saved are not being sent here. We have only rows that are new, but become old as we send the query (so other
340
+ # parallel process saved the data in the meantime). Or if some attributes are newer than the whole row
341
+ # being sent.
342
+ hash.each_key do |db_index|
343
+ inventory_collection.skeletal_primary_index.skeletonize_primary_index(hash[db_index].manager_uuid)
344
+ end
345
+ end
346
+
347
+ # Saves partial records using upsert, taking records from skeletal_primary_index. This is used both for
348
+ # skeletal precreate as well as for saving partial rows.
349
+ #
350
+ # @param all_attribute_keys [Set] Superset of all keys of all records being saved
351
+ def create_or_update_partial_records(all_attribute_keys)
352
+ skeletal_attributes_index = {}
353
+ skeletal_inventory_objects_index = {}
354
+
355
+ inventory_collection.skeletal_primary_index.each_value do |inventory_object|
356
+ attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys)
357
+ index = build_stringified_reference(attributes, unique_index_keys)
358
+
359
+ skeletal_attributes_index[index] = attributes
360
+ skeletal_inventory_objects_index[index] = inventory_object
361
+ end
362
+
363
+ if supports_remote_data_timestamp?(all_attribute_keys)
364
+ all_attribute_keys << :resource_timestamps
365
+ all_attribute_keys << :resource_timestamps_max
366
+ elsif supports_remote_data_version?(all_attribute_keys)
367
+ all_attribute_keys << :resource_versions
368
+ all_attribute_keys << :resource_versions_max
369
+ end
370
+
371
+ indexed_inventory_objects = {}
372
+ hashes = []
373
+ create_time = time_now
374
+
375
+ skeletal_inventory_objects_index.each do |index, inventory_object|
376
+ hash = skeletal_attributes_index.delete(index)
377
+ # Partial create or update must never set a timestamp for the whole row
378
+ timestamps = if supports_remote_data_timestamp?(all_attribute_keys) && supports_resource_timestamps_max?
379
+ assign_partial_row_version_attributes!(:resource_timestamp,
380
+ :resource_timestamps,
381
+ :resource_timestamps_max,
382
+ hash,
383
+ all_attribute_keys)
384
+ elsif supports_remote_data_version?(all_attribute_keys) && supports_resource_versions_max?
385
+ assign_partial_row_version_attributes!(:resource_version,
386
+ :resource_versions,
387
+ :resource_versions_max,
388
+ hash,
389
+ all_attribute_keys)
390
+ end
391
+ # Transform hash to DB format
392
+ hash = transform_to_hash!(all_attribute_keys, hash)
393
+
394
+ assign_attributes_for_create!(hash, create_time)
395
+
396
+ next unless assert_referential_integrity(hash)
397
+
398
+ hash[:__non_serialized_versions] = timestamps # store non serialized timestamps for the partial updates
399
+ hashes << hash
400
+ # Index on Unique Columns values, so we can easily fill in the :id later
401
+ indexed_inventory_objects[unique_index_columns.map { |x| hash[x] }] = inventory_object
402
+ end
403
+
404
+ return if hashes.blank?
405
+
406
+ # First, lets try to create all partial records
407
+ hashes.each_slice(batch_size_for_persisting) do |batch|
408
+ result = create_partial!(all_attribute_keys,
409
+ batch,
410
+ :on_conflict => :do_nothing)
411
+ inventory_collection.store_created_records(result)
412
+ end
413
+
414
+ # We need only skeletal records with timestamp. We can't save the ones without timestamp, because e.g. skeletal
415
+ # precreate would be updating records with default values, that are not correct.
416
+ pre_filtered = hashes.select { |x| x[:resource_timestamps_max] || x[:resource_versions_max] }
417
+
418
+ results = {}
419
+ # TODO(lsmola) we don't need to process rows that were save by the create -> oncoflict do nothing
420
+ (all_attribute_keys - inventory_collection.base_columns).each do |column_name|
421
+ filtered = pre_filtered.select { |x| x.key?(column_name) }
422
+
423
+ filtered.each_slice(batch_size_for_persisting) do |batch|
424
+ # We need to set correct timestamps_max for this particular attribute, based on what is in timestamps
425
+ if supports_remote_data_timestamp?(all_attribute_keys)
426
+ batch.each { |x| x[:resource_timestamps_max] = x[:__non_serialized_versions][column_name] if x[:__non_serialized_versions][column_name] }
427
+ elsif supports_remote_data_version?(all_attribute_keys)
428
+ batch.each { |x| x[:resource_versions_max] = x[:__non_serialized_versions][column_name] if x[:__non_serialized_versions][column_name] }
429
+ end
430
+
431
+ result = create_partial!(inventory_collection.base_columns + [column_name],
432
+ batch,
433
+ :on_conflict => :do_update,
434
+ :column_name => column_name)
435
+ result.each do |res|
436
+ results[res["id"]] = res
437
+ end
438
+ end
439
+ end
440
+
441
+ inventory_collection.store_updated_records(results.values)
442
+
443
+ # TODO(lsmola) we need to move here the hash loading ar object etc. otherwise the lazy_find with key will not
444
+ # be correct
445
+ if inventory_collection.dependees.present?
446
+ # We need to get primary keys of the created objects, but only if there are dependees that would use them
447
+ map_ids_to_inventory_objects(indexed_inventory_objects,
448
+ all_attribute_keys,
449
+ hashes,
450
+ nil,
451
+ :on_conflict => :do_nothing)
452
+ end
453
+ end
454
+
455
+ # Batch upserts 1 data column of the row, plus the internal columns
456
+ #
457
+ # @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
458
+ # @param hashes [Array<InventoryRefresh::InventoryObject>] Array of InventoryObject objects we will be inserting
459
+ # into the DB
460
+ # @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values
461
+ # are :do_update, :do_nothing, nil
462
+ # @param column_name [Symbol] Name of the data column we will be upserting
463
+ def create_partial!(all_attribute_keys, hashes, on_conflict: nil, column_name: nil)
464
+ get_connection.execute(
465
+ build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => :partial, :column_name => column_name)
466
+ )
467
+ end
468
+
469
+ # Batch inserts records using attributes_index data. With on_conflict option using :do_update, this method
470
+ # does atomic upsert.
471
+ #
472
+ # @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
473
+ # @param batch [Array<InventoryRefresh::InventoryObject>] Array of InventoryObject object we will be inserting into
474
+ # the DB
475
+ # @param attributes_index [Hash{String => Hash}] Hash of data hashes with only keys that are column names of the
476
+ # models's table
477
+ # @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values
478
+ # are :do_update, :do_nothing, nil
479
+ def create_records!(all_attribute_keys, batch, attributes_index, on_conflict: nil)
480
+ indexed_inventory_objects = {}
481
+ hashes = []
482
+ create_time = time_now
483
+ batch.each do |index, inventory_object|
484
+ hash = if inventory_collection.use_ar_object?
485
+ record = inventory_collection.model_class.new(attributes_index[index])
486
+ values_for_database!(all_attribute_keys,
487
+ record.attributes.symbolize_keys)
488
+ elsif serializable_keys?
489
+ values_for_database!(all_attribute_keys,
490
+ attributes_index[index])
491
+ else
492
+ attributes_index[index]
493
+ end
494
+
495
+ assign_attributes_for_create!(hash, create_time)
496
+
497
+ next unless assert_referential_integrity(hash)
498
+
499
+ hashes << hash
500
+ # Index on Unique Columns values, so we can easily fill in the :id later
501
+ indexed_inventory_objects[unique_index_columns.map { |x| hash[x] }] = inventory_object
502
+ end
503
+
504
+ return if hashes.blank?
505
+
506
+ result = get_connection.execute(
507
+ build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => :full)
508
+ )
509
+
510
+ if inventory_collection.parallel_safe?
511
+ # We've done upsert, so records were either created or updated. We can recognize that by checking if
512
+ # created and updated timestamps are the same
513
+ created_attr = "created_on" if inventory_collection.supports_created_on?
514
+ created_attr ||= "created_at" if inventory_collection.supports_created_at?
515
+ updated_attr = "updated_on" if inventory_collection.supports_updated_on?
516
+ updated_attr ||= "updated_at" if inventory_collection.supports_updated_at?
517
+
518
+ if created_attr && updated_attr
519
+ created, updated = result.to_a.partition { |x| x[created_attr] == x[updated_attr] }
520
+ inventory_collection.store_created_records(created)
521
+ inventory_collection.store_updated_records(updated)
522
+ else
523
+ # The record doesn't have both created and updated attrs, so we'll take all as created
524
+ inventory_collection.store_created_records(result)
525
+ end
526
+ else
527
+ # We've done just insert, so all records were created
528
+ inventory_collection.store_created_records(result)
529
+ end
530
+
531
+ if inventory_collection.dependees.present?
532
+ # We need to get primary keys of the created objects, but only if there are dependees that would use them
533
+ map_ids_to_inventory_objects(indexed_inventory_objects,
534
+ all_attribute_keys,
535
+ hashes,
536
+ result,
537
+ :on_conflict => on_conflict)
538
+ end
539
+
540
+ if inventory_collection.parallel_safe?
541
+ skeletonize_ignored_records!(indexed_inventory_objects, result, :all_unique_columns => true)
542
+ end
543
+ end
544
+
545
+ # Stores primary_key values of created records into associated InventoryObject objects.
546
+ #
547
+ # @param indexed_inventory_objects [Hash{String => InventoryRefresh::InventoryObject}] inventory objects indexed
548
+ # by stringified value made from db_columns
549
+ # @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
550
+ # @param hashes [Array<Hashes>] Array of hashes that were used for inserting of the data
551
+ # @param result [Array<Hashes>] Array of hashes that are a result of the batch insert query, each result
552
+ # contains a primary key_value plus all columns that are a part of the unique index
553
+ # @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values
554
+ # are :do_update, :do_nothing, nil
555
+ def map_ids_to_inventory_objects(indexed_inventory_objects, all_attribute_keys, hashes, result, on_conflict:)
556
+ if on_conflict == :do_nothing
557
+ # TODO(lsmola) is the comment below still accurate? We will update some partial rows, the actual skeletal
558
+ # precreate will still do nothing.
559
+ # For ON CONFLICT DO NOTHING, we need to always fetch the records plus the attribute_references. This path
560
+ # applies only for skeletal precreate.
561
+ inventory_collection.model_class.where(
562
+ build_multi_selection_query(hashes)
563
+ ).select(unique_index_columns + [:id] + attribute_references.to_a).each do |record|
564
+ key = unique_index_columns.map { |x| record.public_send(x) }
565
+ inventory_object = indexed_inventory_objects[key]
566
+
567
+ # Load also attribute_references, so lazy_find with :key pointing to skeletal reference works
568
+ attributes = record.attributes.symbolize_keys
569
+ attribute_references.each do |ref|
570
+ inventory_object[ref] = attributes[ref]
571
+
572
+ next unless (foreign_key = association_to_foreign_key_mapping[ref])
573
+ base_class_name = attributes[association_to_foreign_type_mapping[ref].try(:to_sym)] || association_to_base_class_mapping[ref]
574
+ id = attributes[foreign_key.to_sym]
575
+ inventory_object[ref] = InventoryRefresh::ApplicationRecordReference.new(base_class_name, id)
576
+ end
577
+
578
+ inventory_object.id = record.id if inventory_object
579
+ end
580
+ elsif !supports_remote_data_timestamp?(all_attribute_keys) || result.count == batch_size_for_persisting
581
+ # We can use the insert query result to fetch all primary_key values, which makes this the most effective
582
+ # path.
583
+ result.each do |inserted_record|
584
+ key = unique_index_columns.map do |x|
585
+ value = inserted_record[x.to_s]
586
+ type = deserializable_keys[x]
587
+ type ? type.deserialize(value) : value
588
+ end
589
+ inventory_object = indexed_inventory_objects[key]
590
+ inventory_object.id = inserted_record[primary_key] if inventory_object
591
+ end
592
+ else
593
+ # The remote_data_timestamp is adding a WHERE condition to ON CONFLICT UPDATE. As a result, the RETURNING
594
+ # clause is not guaranteed to return all ids of the inserted/updated records in the result. In that case
595
+ # we test if the number of results matches the expected batch size. Then if the counts do not match, the only
596
+ # safe option is to query all the data from the DB, using the unique_indexes. The batch size will also not match
597
+ # for every remainders(a last batch in a stream of batches)
598
+ inventory_collection.model_class.where(
599
+ build_multi_selection_query(hashes)
600
+ ).select(unique_index_columns + [:id]).each do |inserted_record|
601
+ key = unique_index_columns.map { |x| inserted_record.public_send(x) }
602
+ inventory_object = indexed_inventory_objects[key]
603
+ inventory_object.id = inserted_record.id if inventory_object
604
+ end
605
+ end
606
+ end
607
+
608
+ def skeletonize_or_skip_record(record_version, hash_version, record_versions_max, inventory_object)
609
+ # Skip updating this record, because it is old
610
+ return true if record_version && hash_version && record_version >= hash_version
611
+
612
+ # Some column has bigger version than the whole row, we need to store the row partially
613
+ if record_versions_max && hash_version && record_versions_max > hash_version
614
+ inventory_collection.skeletal_primary_index.skeletonize_primary_index(inventory_object.manager_uuid)
615
+ return true
616
+ end
617
+
618
+ false
619
+ end
620
+
621
+ def assign_partial_row_version_attributes!(full_row_version_attr, partial_row_version_attr,
622
+ partial_row_version_attr_max, hash, all_attribute_keys)
623
+ hash[partial_row_version_attr_max] = hash.delete(full_row_version_attr)
624
+
625
+ if hash[partial_row_version_attr].present?
626
+ # Lets clean to only what we save, since when we build the skeletal object, we can set more
627
+ hash[partial_row_version_attr] = hash[partial_row_version_attr].slice(*all_attribute_keys)
628
+ end
629
+ end
630
+ end
631
+ end
632
+ end