inventory_refresh 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +7 -0
  2. data/.codeclimate.yml +47 -0
  3. data/.gitignore +13 -0
  4. data/.rspec +4 -0
  5. data/.rspec_ci +4 -0
  6. data/.rubocop.yml +4 -0
  7. data/.rubocop_cc.yml +5 -0
  8. data/.rubocop_local.yml +2 -0
  9. data/.travis.yml +12 -0
  10. data/.yamllint +12 -0
  11. data/CHANGELOG.md +0 -0
  12. data/Gemfile +6 -0
  13. data/LICENSE +202 -0
  14. data/README.md +35 -0
  15. data/Rakefile +47 -0
  16. data/bin/console +14 -0
  17. data/bin/setup +8 -0
  18. data/inventory_refresh.gemspec +34 -0
  19. data/lib/inventory_refresh.rb +11 -0
  20. data/lib/inventory_refresh/application_record_iterator.rb +56 -0
  21. data/lib/inventory_refresh/application_record_reference.rb +15 -0
  22. data/lib/inventory_refresh/graph.rb +157 -0
  23. data/lib/inventory_refresh/graph/topological_sort.rb +66 -0
  24. data/lib/inventory_refresh/inventory_collection.rb +1175 -0
  25. data/lib/inventory_refresh/inventory_collection/data_storage.rb +178 -0
  26. data/lib/inventory_refresh/inventory_collection/graph.rb +170 -0
  27. data/lib/inventory_refresh/inventory_collection/index/proxy.rb +230 -0
  28. data/lib/inventory_refresh/inventory_collection/index/type/base.rb +80 -0
  29. data/lib/inventory_refresh/inventory_collection/index/type/data.rb +26 -0
  30. data/lib/inventory_refresh/inventory_collection/index/type/local_db.rb +286 -0
  31. data/lib/inventory_refresh/inventory_collection/index/type/skeletal.rb +116 -0
  32. data/lib/inventory_refresh/inventory_collection/reference.rb +96 -0
  33. data/lib/inventory_refresh/inventory_collection/references_storage.rb +106 -0
  34. data/lib/inventory_refresh/inventory_collection/scanner.rb +117 -0
  35. data/lib/inventory_refresh/inventory_collection/serialization.rb +140 -0
  36. data/lib/inventory_refresh/inventory_object.rb +303 -0
  37. data/lib/inventory_refresh/inventory_object_lazy.rb +151 -0
  38. data/lib/inventory_refresh/save_collection/base.rb +38 -0
  39. data/lib/inventory_refresh/save_collection/recursive.rb +52 -0
  40. data/lib/inventory_refresh/save_collection/saver/base.rb +390 -0
  41. data/lib/inventory_refresh/save_collection/saver/batch.rb +17 -0
  42. data/lib/inventory_refresh/save_collection/saver/concurrent_safe.rb +71 -0
  43. data/lib/inventory_refresh/save_collection/saver/concurrent_safe_batch.rb +632 -0
  44. data/lib/inventory_refresh/save_collection/saver/default.rb +57 -0
  45. data/lib/inventory_refresh/save_collection/saver/sql_helper.rb +85 -0
  46. data/lib/inventory_refresh/save_collection/saver/sql_helper_update.rb +120 -0
  47. data/lib/inventory_refresh/save_collection/saver/sql_helper_upsert.rb +196 -0
  48. data/lib/inventory_refresh/save_collection/topological_sort.rb +38 -0
  49. data/lib/inventory_refresh/save_inventory.rb +38 -0
  50. data/lib/inventory_refresh/target.rb +73 -0
  51. data/lib/inventory_refresh/target_collection.rb +80 -0
  52. data/lib/inventory_refresh/version.rb +3 -0
  53. data/tools/ci/create_db_user.sh +3 -0
  54. metadata +207 -0
@@ -0,0 +1,17 @@
1
+ require "inventory_refresh/save_collection/saver/concurrent_safe_batch"
2
+
3
+ module InventoryRefresh::SaveCollection
4
+ module Saver
5
+ class Batch < InventoryRefresh::SaveCollection::Saver::ConcurrentSafeBatch
6
+ private
7
+
8
+ # Just returning manager ref transformed to column names, for strategies that do not expect to have unique DB
9
+ # indexes.
10
+ #
11
+ # @return [Array<Symbol>] manager ref transformed to column names
12
+ def unique_index_columns
13
+ inventory_collection.manager_ref_to_cols.map(&:to_sym)
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,71 @@
1
+ require "inventory_refresh/save_collection/saver/base"
2
+
3
+ module InventoryRefresh::SaveCollection
4
+ module Saver
5
+ class ConcurrentSafe < InventoryRefresh::SaveCollection::Saver::Base
6
+ # TODO(lsmola) this strategy does not make much sense, it's better to use concurent_safe_batch and make batch size
7
+ # configurable
8
+ private
9
+
10
+ # Updates the passed record with hash data and stores primary key value into inventory_object.
11
+ #
12
+ # @param record [ApplicationRecord] record we want to update in DB
13
+ # @param hash [Hash] data we want to update the record with
14
+ # @param inventory_object [InventoryRefresh::InventoryObject] InventoryObject instance where we will store primary
15
+ # key value
16
+ def update_record!(record, hash, inventory_object)
17
+ assign_attributes_for_update!(hash, time_now)
18
+ record.assign_attributes(hash.except(:id))
19
+
20
+ if !inventory_object.inventory_collection.check_changed? || record.changed?
21
+ update_query = inventory_object.inventory_collection.model_class.where(:id => record.id)
22
+ if hash[:remote_data_timestamp]
23
+ timestamp_field = inventory_collection.model_class.arel_table[:remote_data_timestamp]
24
+ update_query = update_query.where(timestamp_field.lt(hash[:remote_data_timestamp]))
25
+ end
26
+
27
+ update_query.update_all(hash)
28
+ inventory_collection.store_updated_records(record)
29
+ end
30
+
31
+ inventory_object.id = record.id
32
+ end
33
+
34
+ # Creates a new record in the DB using the passed hash data
35
+ #
36
+ # @param hash [Hash] hash with data we want to persist to DB
37
+ # @param inventory_object [InventoryRefresh::InventoryObject] InventoryObject instance where we will store primary
38
+ # key value
39
+ def create_record!(hash, inventory_object)
40
+ all_attribute_keys = hash.keys
41
+ data = inventory_collection.model_class.new(hash).attributes.symbolize_keys
42
+
43
+ # TODO(lsmola) abstract common behavior into base class
44
+ all_attribute_keys << :type if supports_sti?
45
+ all_attribute_keys << :created_at if supports_created_at?
46
+ all_attribute_keys << :updated_at if supports_updated_at?
47
+ all_attribute_keys << :created_on if supports_created_on?
48
+ all_attribute_keys << :updated_on if supports_updated_on?
49
+ hash_for_creation = if inventory_collection.use_ar_object?
50
+ record = inventory_collection.model_class.new(data)
51
+ values_for_database!(all_attribute_keys,
52
+ record.attributes.symbolize_keys)
53
+ elsif serializable_keys?
54
+ values_for_database!(all_attribute_keys,
55
+ data)
56
+ else
57
+ data
58
+ end
59
+
60
+ assign_attributes_for_create!(hash_for_creation, time_now)
61
+
62
+ result_id = ActiveRecord::Base.connection.execute(
63
+ build_insert_query(all_attribute_keys, [hash_for_creation])
64
+ )
65
+
66
+ inventory_object.id = result_id.to_a.try(:first).try(:[], "id")
67
+ inventory_collection.store_created_records(inventory_object)
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,632 @@
1
+ require "inventory_refresh/save_collection/saver/base"
2
+ require "active_support/core_ext/module/delegation"
3
+
4
+ module InventoryRefresh::SaveCollection
5
+ module Saver
6
+ class ConcurrentSafeBatch < InventoryRefresh::SaveCollection::Saver::Base
7
+ private
8
+
9
+ delegate :association_to_base_class_mapping,
10
+ :association_to_foreign_key_mapping,
11
+ :association_to_foreign_type_mapping,
12
+ :attribute_references,
13
+ :to => :inventory_collection
14
+
15
+ # Attribute accessor to ApplicationRecord object or Hash
16
+ #
17
+ # @param record [Hash, ApplicationRecord] record or hash
18
+ # @param key [Symbol] key pointing to attribute of the record
19
+ # @return [Object] value of the record on the key
20
+ def record_key(record, key)
21
+ send(record_key_method, record, key)
22
+ end
23
+
24
+ # Attribute accessor to ApplicationRecord object
25
+ #
26
+ # @param record [ApplicationRecord] record
27
+ # @param key [Symbol] key pointing to attribute of the record
28
+ # @return [Object] value of the record on the key
29
+ def ar_record_key(record, key)
30
+ record.public_send(key)
31
+ end
32
+
33
+ # Attribute accessor to Hash object
34
+ #
35
+ # @param record [Hash] hash
36
+ # @param key [Symbol] key pointing to attribute of the record
37
+ # @return [Object] value of the record on the key
38
+ def pure_sql_record_key(record, key)
39
+ record[select_keys_indexes[key]]
40
+ end
41
+
42
+ # Returns iterator or relation based on settings
43
+ #
44
+ # @param association [Symbol] An existing association on manager
45
+ # @return [ActiveRecord::Relation, InventoryRefresh::ApplicationRecordIterator] iterator or relation based on settings
46
+ def batch_iterator(association)
47
+ if pure_sql_records_fetching
48
+ # Building fast iterator doing pure SQL query and therefore avoiding redundant creation of AR objects. The
49
+ # iterator responds to find_in_batches, so it acts like the AR relation. For targeted refresh, the association
50
+ # can already be ApplicationRecordIterator, so we will skip that.
51
+ pure_sql_iterator = lambda do |&block|
52
+ primary_key_offset = nil
53
+ loop do
54
+ relation = association.select(*select_keys)
55
+ .reorder("#{primary_key} ASC")
56
+ .limit(batch_size)
57
+ # Using rails way of comparing primary key instead of offset
58
+ relation = relation.where(arel_primary_key.gt(primary_key_offset)) if primary_key_offset
59
+ records = get_connection.query(relation.to_sql)
60
+ last_record = records.last
61
+ block.call(records)
62
+
63
+ break if records.size < batch_size
64
+ primary_key_offset = record_key(last_record, primary_key)
65
+ end
66
+ end
67
+
68
+ InventoryRefresh::ApplicationRecordIterator.new(:iterator => pure_sql_iterator)
69
+ else
70
+ # Normal Rails ActiveRecord::Relation where we can call find_in_batches or
71
+ # InventoryRefresh::ApplicationRecordIterator passed from targeted refresh
72
+ association
73
+ end
74
+ end
75
+
76
+ # Saves the InventoryCollection
77
+ #
78
+ # @param association [Symbol] An existing association on manager
79
+ def save!(association)
80
+ attributes_index = {}
81
+ inventory_objects_index = {}
82
+ all_attribute_keys = Set.new + inventory_collection.batch_extra_attributes
83
+
84
+ inventory_collection.each do |inventory_object|
85
+ attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys)
86
+ index = build_stringified_reference(attributes, unique_index_keys)
87
+
88
+ # Interesting fact: not building attributes_index and using only inventory_objects_index doesn't do much
89
+ # of a difference, since the most objects inside are shared.
90
+ attributes_index[index] = attributes
91
+ inventory_objects_index[index] = inventory_object
92
+ end
93
+
94
+ all_attribute_keys << :created_at if supports_created_at?
95
+ all_attribute_keys << :updated_at if supports_updated_at?
96
+ all_attribute_keys << :created_on if supports_created_on?
97
+ all_attribute_keys << :updated_on if supports_updated_on?
98
+ all_attribute_keys << :type if supports_sti?
99
+
100
+ #_log.debug("Processing #{inventory_collection} of size #{inventory_collection.size}...")
101
+
102
+ unless inventory_collection.create_only?
103
+ update_or_destroy_records!(batch_iterator(association), inventory_objects_index, attributes_index, all_attribute_keys)
104
+ end
105
+
106
+ unless inventory_collection.create_only?
107
+ inventory_collection.custom_reconnect_block&.call(inventory_collection, inventory_objects_index, attributes_index)
108
+ end
109
+
110
+ # Records that were not found in the DB but sent for saving, we will be creating these in the DB.
111
+ if inventory_collection.create_allowed?
112
+ on_conflict = inventory_collection.parallel_safe? ? :do_update : nil
113
+
114
+ inventory_objects_index.each_slice(batch_size_for_persisting) do |batch|
115
+ create_records!(all_attribute_keys, batch, attributes_index, :on_conflict => on_conflict)
116
+ end
117
+
118
+ # Let the GC clean this up
119
+ inventory_objects_index = nil
120
+ attributes_index = nil
121
+
122
+ if inventory_collection.parallel_safe?
123
+ create_or_update_partial_records(all_attribute_keys)
124
+ end
125
+ end
126
+ #_log.debug("Processing #{inventory_collection}, "\
127
+ # "created=#{inventory_collection.created_records.count}, "\
128
+ # "updated=#{inventory_collection.updated_records.count}, "\
129
+ # "deleted=#{inventory_collection.deleted_records.count}...Complete")
130
+ rescue => e
131
+ #_log.error("Error when saving #{inventory_collection} with #{inventory_collection_details}. Message: #{e.message}")
132
+ raise e
133
+ end
134
+
135
+ # Batch updates existing records that are in the DB using attributes_index. And delete the ones that were not
136
+ # present in inventory_objects_index.
137
+ #
138
+ # @param records_batch_iterator [ActiveRecord::Relation, InventoryRefresh::ApplicationRecordIterator] iterator or
139
+ # relation, both responding to :find_in_batches method
140
+ # @param inventory_objects_index [Hash{String => InventoryRefresh::InventoryObject}] Hash of InventoryObject objects
141
+ # @param attributes_index [Hash{String => Hash}] Hash of data hashes with only keys that are column names of the
142
+ # models's table
143
+ # @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
144
+ def update_or_destroy_records!(records_batch_iterator, inventory_objects_index, attributes_index, all_attribute_keys)
145
+ hashes_for_update = []
146
+ records_for_destroy = []
147
+ indexed_inventory_objects = {}
148
+
149
+ records_batch_iterator.find_in_batches(:batch_size => batch_size) do |batch|
150
+ update_time = time_now
151
+
152
+ batch.each do |record|
153
+ primary_key_value = record_key(record, primary_key)
154
+
155
+ next unless assert_distinct_relation(primary_key_value)
156
+
157
+ index = db_columns_index(record)
158
+
159
+ inventory_object = inventory_objects_index.delete(index)
160
+ hash = attributes_index.delete(index)
161
+
162
+ if inventory_object.nil?
163
+ # Record was found in the DB but not sent for saving, that means it doesn't exist anymore and we should
164
+ # delete it from the DB.
165
+ if inventory_collection.delete_allowed?
166
+ records_for_destroy << record
167
+ end
168
+ else
169
+ # Record was found in the DB and sent for saving, we will be updating the DB.
170
+ next unless assert_referential_integrity(hash)
171
+ inventory_object.id = primary_key_value
172
+
173
+ if inventory_collection.parallel_safe? &&
174
+ (supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys))
175
+
176
+ version_attr, max_version_attr = if supports_remote_data_timestamp?(all_attribute_keys)
177
+ [:resource_timestamp, :resource_timestamps_max]
178
+ elsif supports_remote_data_version?(all_attribute_keys)
179
+ [:resource_version, :resource_versions_max]
180
+ end
181
+
182
+ next if skeletonize_or_skip_record(record.try(version_attr) || record.try(:[], version_attr),
183
+ hash[version_attr],
184
+ record.try(max_version_attr) || record.try(:[], max_version_attr),
185
+ inventory_object)
186
+ end
187
+
188
+ hash_for_update = if inventory_collection.use_ar_object?
189
+ record.assign_attributes(hash.except(:id))
190
+ values_for_database!(all_attribute_keys,
191
+ record.attributes.symbolize_keys)
192
+ elsif serializable_keys?
193
+ # TODO(lsmola) hash data with current DB data to allow subset of data being sent,
194
+ # otherwise we would nullify the not sent attributes. Test e.g. on disks in cloud
195
+ values_for_database!(all_attribute_keys,
196
+ hash)
197
+ else
198
+ # TODO(lsmola) hash data with current DB data to allow subset of data being sent,
199
+ # otherwise we would nullify the not sent attributes. Test e.g. on disks in cloud
200
+ hash
201
+ end
202
+ assign_attributes_for_update!(hash_for_update, update_time)
203
+
204
+ hash_for_update[:id] = primary_key_value
205
+ indexed_inventory_objects[index] = inventory_object
206
+ hashes_for_update << hash_for_update
207
+ end
208
+ end
209
+
210
+ # Update in batches
211
+ if hashes_for_update.size >= batch_size_for_persisting
212
+ update_records!(all_attribute_keys, hashes_for_update, indexed_inventory_objects)
213
+
214
+ hashes_for_update = []
215
+ indexed_inventory_objects = {}
216
+ end
217
+
218
+ # Destroy in batches
219
+ if records_for_destroy.size >= batch_size_for_persisting
220
+ destroy_records!(records_for_destroy)
221
+ records_for_destroy = []
222
+ end
223
+ end
224
+
225
+ # Update the last batch
226
+ update_records!(all_attribute_keys, hashes_for_update, indexed_inventory_objects)
227
+ hashes_for_update = [] # Cleanup so GC can release it sooner
228
+
229
+ # Destroy the last batch
230
+ destroy_records!(records_for_destroy)
231
+ records_for_destroy = [] # Cleanup so GC can release it sooner
232
+ end
233
+
234
+ def db_columns_index(record, pure_sql: false)
235
+ # Incoming values are in SQL string form.
236
+ # TODO(lsmola) unify this behavior with object_index_with_keys method in InventoryCollection
237
+ # TODO(lsmola) maybe we can drop the whole pure sql fetching, since everything will be targeted refresh
238
+ # with streaming refresh? Maybe just metrics and events will not be, but those should be upsert only
239
+ # TODO(lsmola) taking ^ in account, we can't drop pure sql, since that is returned by batch insert and
240
+ # update queries
241
+ unique_index_keys_to_s.map do |attribute|
242
+ value = if pure_sql
243
+ record[attribute]
244
+ else
245
+ record_key(record, attribute)
246
+ end
247
+
248
+ format_value(attribute, value)
249
+ end.join("__")
250
+ end
251
+
252
+ def format_value(attribute, value)
253
+ if attribute == "timestamp"
254
+ # TODO: can this be covered by @deserializable_keys?
255
+ type = model_class.type_for_attribute(attribute)
256
+ type.cast(value).utc.iso8601.to_s
257
+ elsif (type = deserializable_keys[attribute.to_sym])
258
+ type.deserialize(value).to_s
259
+ else
260
+ value.to_s
261
+ end
262
+ end
263
+
264
+ # Deletes or sof-deletes records. If the model_class supports a custom class delete method, we will use it for
265
+ # batch soft-delete.
266
+ #
267
+ # @param records [Array<ApplicationRecord, Hash>] Records we want to delete. If we have only hashes, we need to
268
+ # to fetch ApplicationRecord objects from the DB
269
+ def destroy_records!(records)
270
+ return false unless inventory_collection.delete_allowed?
271
+ return if records.blank?
272
+
273
+ # Is the delete_method rails standard deleting method?
274
+ rails_delete = %i(destroy delete).include?(inventory_collection.delete_method)
275
+ if !rails_delete && inventory_collection.model_class.respond_to?(inventory_collection.delete_method)
276
+ # We have custom delete method defined on a class, that means it supports batch destroy
277
+ inventory_collection.store_deleted_records(records.map { |x| {:id => record_key(x, primary_key)} })
278
+ inventory_collection.model_class.public_send(inventory_collection.delete_method, records.map { |x| record_key(x, primary_key) })
279
+ else
280
+ # We have either standard :destroy and :delete rails method, or custom instance level delete method
281
+ # Note: The standard :destroy and :delete rails method can't be batched because of the hooks and cascade destroy
282
+ ActiveRecord::Base.transaction do
283
+ if pure_sql_records_fetching
284
+ # For pure SQL fetching, we need to get the AR objects again, so we can call destroy
285
+ inventory_collection.model_class.where(:id => records.map { |x| record_key(x, primary_key) }).find_each do |record|
286
+ delete_record!(record)
287
+ end
288
+ else
289
+ records.each do |record|
290
+ delete_record!(record)
291
+ end
292
+ end
293
+ end
294
+ end
295
+ end
296
+
297
+ # Batch updates existing records
298
+ #
299
+ # @param hashes [Array<Hash>] data used for building a batch update sql query
300
+ # @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
301
+ def update_records!(all_attribute_keys, hashes, indexed_inventory_objects)
302
+ return if hashes.blank?
303
+
304
+ unless inventory_collection.parallel_safe?
305
+ # We need to update the stored records before we save it, since hashes are modified
306
+ inventory_collection.store_updated_records(hashes)
307
+ end
308
+
309
+ query = build_update_query(all_attribute_keys, hashes)
310
+ result = get_connection.execute(query)
311
+
312
+ if inventory_collection.parallel_safe?
313
+ # We will check for timestamp clashes of full row update and we will fallback to skeletal update
314
+ inventory_collection.store_updated_records(result)
315
+
316
+ skeletonize_ignored_records!(indexed_inventory_objects, result)
317
+ end
318
+
319
+ result
320
+ end
321
+
322
+ # Taking result from update or upsert of the row. The records that were not saved will be turned into skeletal
323
+ # records and we will save them attribute by attribute.
324
+ #
325
+ # @param hash [Hash{String => InventoryObject}>] Hash with indexed data we want to save
326
+ # @param result [Array<Hash>] Result from the DB containing the data that were actually saved
327
+ # @param all_unique_columns [Boolean] True if index is consisted from all columns of the unique index. False if
328
+ # index is just made from manager_ref turned in DB column names.
329
+ def skeletonize_ignored_records!(hash, result, all_unique_columns: false)
330
+ updated = if all_unique_columns
331
+ result.map { |x| unique_index_columns_to_s.map { |key| x[key] } }
332
+ else
333
+ result.map { |x| db_columns_index(x, :pure_sql => true) }
334
+ end
335
+
336
+ updated.each { |x| hash.delete(x) }
337
+
338
+ # Now lets skeletonize all inventory_objects that were not saved by update or upsert. Old rows that can't be
339
+ # saved are not being sent here. We have only rows that are new, but become old as we send the query (so other
340
+ # parallel process saved the data in the meantime). Or if some attributes are newer than the whole row
341
+ # being sent.
342
+ hash.each_key do |db_index|
343
+ inventory_collection.skeletal_primary_index.skeletonize_primary_index(hash[db_index].manager_uuid)
344
+ end
345
+ end
346
+
347
+ # Saves partial records using upsert, taking records from skeletal_primary_index. This is used both for
348
+ # skeletal precreate as well as for saving partial rows.
349
+ #
350
+ # @param all_attribute_keys [Set] Superset of all keys of all records being saved
351
+ def create_or_update_partial_records(all_attribute_keys)
352
+ skeletal_attributes_index = {}
353
+ skeletal_inventory_objects_index = {}
354
+
355
+ inventory_collection.skeletal_primary_index.each_value do |inventory_object|
356
+ attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys)
357
+ index = build_stringified_reference(attributes, unique_index_keys)
358
+
359
+ skeletal_attributes_index[index] = attributes
360
+ skeletal_inventory_objects_index[index] = inventory_object
361
+ end
362
+
363
+ if supports_remote_data_timestamp?(all_attribute_keys)
364
+ all_attribute_keys << :resource_timestamps
365
+ all_attribute_keys << :resource_timestamps_max
366
+ elsif supports_remote_data_version?(all_attribute_keys)
367
+ all_attribute_keys << :resource_versions
368
+ all_attribute_keys << :resource_versions_max
369
+ end
370
+
371
+ indexed_inventory_objects = {}
372
+ hashes = []
373
+ create_time = time_now
374
+
375
+ skeletal_inventory_objects_index.each do |index, inventory_object|
376
+ hash = skeletal_attributes_index.delete(index)
377
+ # Partial create or update must never set a timestamp for the whole row
378
+ timestamps = if supports_remote_data_timestamp?(all_attribute_keys) && supports_resource_timestamps_max?
379
+ assign_partial_row_version_attributes!(:resource_timestamp,
380
+ :resource_timestamps,
381
+ :resource_timestamps_max,
382
+ hash,
383
+ all_attribute_keys)
384
+ elsif supports_remote_data_version?(all_attribute_keys) && supports_resource_versions_max?
385
+ assign_partial_row_version_attributes!(:resource_version,
386
+ :resource_versions,
387
+ :resource_versions_max,
388
+ hash,
389
+ all_attribute_keys)
390
+ end
391
+ # Transform hash to DB format
392
+ hash = transform_to_hash!(all_attribute_keys, hash)
393
+
394
+ assign_attributes_for_create!(hash, create_time)
395
+
396
+ next unless assert_referential_integrity(hash)
397
+
398
+ hash[:__non_serialized_versions] = timestamps # store non serialized timestamps for the partial updates
399
+ hashes << hash
400
+ # Index on Unique Columns values, so we can easily fill in the :id later
401
+ indexed_inventory_objects[unique_index_columns.map { |x| hash[x] }] = inventory_object
402
+ end
403
+
404
+ return if hashes.blank?
405
+
406
+ # First, lets try to create all partial records
407
+ hashes.each_slice(batch_size_for_persisting) do |batch|
408
+ result = create_partial!(all_attribute_keys,
409
+ batch,
410
+ :on_conflict => :do_nothing)
411
+ inventory_collection.store_created_records(result)
412
+ end
413
+
414
+ # We need only skeletal records with timestamp. We can't save the ones without timestamp, because e.g. skeletal
415
+ # precreate would be updating records with default values, that are not correct.
416
+ pre_filtered = hashes.select { |x| x[:resource_timestamps_max] || x[:resource_versions_max] }
417
+
418
+ results = {}
419
+ # TODO(lsmola) we don't need to process rows that were save by the create -> oncoflict do nothing
420
+ (all_attribute_keys - inventory_collection.base_columns).each do |column_name|
421
+ filtered = pre_filtered.select { |x| x.key?(column_name) }
422
+
423
+ filtered.each_slice(batch_size_for_persisting) do |batch|
424
+ # We need to set correct timestamps_max for this particular attribute, based on what is in timestamps
425
+ if supports_remote_data_timestamp?(all_attribute_keys)
426
+ batch.each { |x| x[:resource_timestamps_max] = x[:__non_serialized_versions][column_name] if x[:__non_serialized_versions][column_name] }
427
+ elsif supports_remote_data_version?(all_attribute_keys)
428
+ batch.each { |x| x[:resource_versions_max] = x[:__non_serialized_versions][column_name] if x[:__non_serialized_versions][column_name] }
429
+ end
430
+
431
+ result = create_partial!(inventory_collection.base_columns + [column_name],
432
+ batch,
433
+ :on_conflict => :do_update,
434
+ :column_name => column_name)
435
+ result.each do |res|
436
+ results[res["id"]] = res
437
+ end
438
+ end
439
+ end
440
+
441
+ inventory_collection.store_updated_records(results.values)
442
+
443
+ # TODO(lsmola) we need to move here the hash loading ar object etc. otherwise the lazy_find with key will not
444
+ # be correct
445
+ if inventory_collection.dependees.present?
446
+ # We need to get primary keys of the created objects, but only if there are dependees that would use them
447
+ map_ids_to_inventory_objects(indexed_inventory_objects,
448
+ all_attribute_keys,
449
+ hashes,
450
+ nil,
451
+ :on_conflict => :do_nothing)
452
+ end
453
+ end
454
+
455
+ # Batch upserts 1 data column of the row, plus the internal columns
456
+ #
457
+ # @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
458
+ # @param hashes [Array<InventoryRefresh::InventoryObject>] Array of InventoryObject objects we will be inserting
459
+ # into the DB
460
+ # @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values
461
+ # are :do_update, :do_nothing, nil
462
+ # @param column_name [Symbol] Name of the data column we will be upserting
463
+ def create_partial!(all_attribute_keys, hashes, on_conflict: nil, column_name: nil)
464
+ get_connection.execute(
465
+ build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => :partial, :column_name => column_name)
466
+ )
467
+ end
468
+
469
+ # Batch inserts records using attributes_index data. With on_conflict option using :do_update, this method
470
+ # does atomic upsert.
471
+ #
472
+ # @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
473
+ # @param batch [Array<InventoryRefresh::InventoryObject>] Array of InventoryObject object we will be inserting into
474
+ # the DB
475
+ # @param attributes_index [Hash{String => Hash}] Hash of data hashes with only keys that are column names of the
476
+ # models's table
477
+ # @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values
478
+ # are :do_update, :do_nothing, nil
479
+ def create_records!(all_attribute_keys, batch, attributes_index, on_conflict: nil)
480
+ indexed_inventory_objects = {}
481
+ hashes = []
482
+ create_time = time_now
483
+ batch.each do |index, inventory_object|
484
+ hash = if inventory_collection.use_ar_object?
485
+ record = inventory_collection.model_class.new(attributes_index[index])
486
+ values_for_database!(all_attribute_keys,
487
+ record.attributes.symbolize_keys)
488
+ elsif serializable_keys?
489
+ values_for_database!(all_attribute_keys,
490
+ attributes_index[index])
491
+ else
492
+ attributes_index[index]
493
+ end
494
+
495
+ assign_attributes_for_create!(hash, create_time)
496
+
497
+ next unless assert_referential_integrity(hash)
498
+
499
+ hashes << hash
500
+ # Index on Unique Columns values, so we can easily fill in the :id later
501
+ indexed_inventory_objects[unique_index_columns.map { |x| hash[x] }] = inventory_object
502
+ end
503
+
504
+ return if hashes.blank?
505
+
506
+ result = get_connection.execute(
507
+ build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => :full)
508
+ )
509
+
510
+ if inventory_collection.parallel_safe?
511
+ # We've done upsert, so records were either created or updated. We can recognize that by checking if
512
+ # created and updated timestamps are the same
513
+ created_attr = "created_on" if inventory_collection.supports_created_on?
514
+ created_attr ||= "created_at" if inventory_collection.supports_created_at?
515
+ updated_attr = "updated_on" if inventory_collection.supports_updated_on?
516
+ updated_attr ||= "updated_at" if inventory_collection.supports_updated_at?
517
+
518
+ if created_attr && updated_attr
519
+ created, updated = result.to_a.partition { |x| x[created_attr] == x[updated_attr] }
520
+ inventory_collection.store_created_records(created)
521
+ inventory_collection.store_updated_records(updated)
522
+ else
523
+ # The record doesn't have both created and updated attrs, so we'll take all as created
524
+ inventory_collection.store_created_records(result)
525
+ end
526
+ else
527
+ # We've done just insert, so all records were created
528
+ inventory_collection.store_created_records(result)
529
+ end
530
+
531
+ if inventory_collection.dependees.present?
532
+ # We need to get primary keys of the created objects, but only if there are dependees that would use them
533
+ map_ids_to_inventory_objects(indexed_inventory_objects,
534
+ all_attribute_keys,
535
+ hashes,
536
+ result,
537
+ :on_conflict => on_conflict)
538
+ end
539
+
540
+ if inventory_collection.parallel_safe?
541
+ skeletonize_ignored_records!(indexed_inventory_objects, result, :all_unique_columns => true)
542
+ end
543
+ end
544
+
545
+ # Stores primary_key values of created records into associated InventoryObject objects.
546
+ #
547
+ # @param indexed_inventory_objects [Hash{String => InventoryRefresh::InventoryObject}] inventory objects indexed
548
+ # by stringified value made from db_columns
549
+ # @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
550
+ # @param hashes [Array<Hashes>] Array of hashes that were used for inserting of the data
551
+ # @param result [Array<Hashes>] Array of hashes that are a result of the batch insert query, each result
552
+ # contains a primary key_value plus all columns that are a part of the unique index
553
+ # @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values
554
+ # are :do_update, :do_nothing, nil
555
+ def map_ids_to_inventory_objects(indexed_inventory_objects, all_attribute_keys, hashes, result, on_conflict:)
556
+ if on_conflict == :do_nothing
557
+ # TODO(lsmola) is the comment below still accurate? We will update some partial rows, the actual skeletal
558
+ # precreate will still do nothing.
559
+ # For ON CONFLICT DO NOTHING, we need to always fetch the records plus the attribute_references. This path
560
+ # applies only for skeletal precreate.
561
+ inventory_collection.model_class.where(
562
+ build_multi_selection_query(hashes)
563
+ ).select(unique_index_columns + [:id] + attribute_references.to_a).each do |record|
564
+ key = unique_index_columns.map { |x| record.public_send(x) }
565
+ inventory_object = indexed_inventory_objects[key]
566
+
567
+ # Load also attribute_references, so lazy_find with :key pointing to skeletal reference works
568
+ attributes = record.attributes.symbolize_keys
569
+ attribute_references.each do |ref|
570
+ inventory_object[ref] = attributes[ref]
571
+
572
+ next unless (foreign_key = association_to_foreign_key_mapping[ref])
573
+ base_class_name = attributes[association_to_foreign_type_mapping[ref].try(:to_sym)] || association_to_base_class_mapping[ref]
574
+ id = attributes[foreign_key.to_sym]
575
+ inventory_object[ref] = InventoryRefresh::ApplicationRecordReference.new(base_class_name, id)
576
+ end
577
+
578
+ inventory_object.id = record.id if inventory_object
579
+ end
580
+ elsif !supports_remote_data_timestamp?(all_attribute_keys) || result.count == batch_size_for_persisting
581
+ # We can use the insert query result to fetch all primary_key values, which makes this the most effective
582
+ # path.
583
+ result.each do |inserted_record|
584
+ key = unique_index_columns.map do |x|
585
+ value = inserted_record[x.to_s]
586
+ type = deserializable_keys[x]
587
+ type ? type.deserialize(value) : value
588
+ end
589
+ inventory_object = indexed_inventory_objects[key]
590
+ inventory_object.id = inserted_record[primary_key] if inventory_object
591
+ end
592
+ else
593
+ # The remote_data_timestamp is adding a WHERE condition to ON CONFLICT UPDATE. As a result, the RETURNING
594
+ # clause is not guaranteed to return all ids of the inserted/updated records in the result. In that case
595
+ # we test if the number of results matches the expected batch size. Then if the counts do not match, the only
596
+ # safe option is to query all the data from the DB, using the unique_indexes. The batch size will also not match
597
+ # for every remainders(a last batch in a stream of batches)
598
+ inventory_collection.model_class.where(
599
+ build_multi_selection_query(hashes)
600
+ ).select(unique_index_columns + [:id]).each do |inserted_record|
601
+ key = unique_index_columns.map { |x| inserted_record.public_send(x) }
602
+ inventory_object = indexed_inventory_objects[key]
603
+ inventory_object.id = inserted_record.id if inventory_object
604
+ end
605
+ end
606
+ end
607
+
608
+ def skeletonize_or_skip_record(record_version, hash_version, record_versions_max, inventory_object)
609
+ # Skip updating this record, because it is old
610
+ return true if record_version && hash_version && record_version >= hash_version
611
+
612
+ # Some column has bigger version than the whole row, we need to store the row partially
613
+ if record_versions_max && hash_version && record_versions_max > hash_version
614
+ inventory_collection.skeletal_primary_index.skeletonize_primary_index(inventory_object.manager_uuid)
615
+ return true
616
+ end
617
+
618
+ false
619
+ end
620
+
621
+ def assign_partial_row_version_attributes!(full_row_version_attr, partial_row_version_attr,
622
+ partial_row_version_attr_max, hash, all_attribute_keys)
623
+ hash[partial_row_version_attr_max] = hash.delete(full_row_version_attr)
624
+
625
+ if hash[partial_row_version_attr].present?
626
+ # Lets clean to only what we save, since when we build the skeletal object, we can set more
627
+ hash[partial_row_version_attr] = hash[partial_row_version_attr].slice(*all_attribute_keys)
628
+ end
629
+ end
630
+ end
631
+ end
632
+ end