inventory_refresh 0.3.3 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.codeclimate.yml +25 -30
- data/.github/workflows/ci.yaml +47 -0
- data/.rubocop.yml +3 -3
- data/.rubocop_cc.yml +3 -4
- data/.rubocop_local.yml +5 -2
- data/.whitesource +3 -0
- data/CHANGELOG.md +19 -0
- data/Gemfile +10 -4
- data/README.md +1 -2
- data/Rakefile +2 -2
- data/inventory_refresh.gemspec +9 -10
- data/lib/inventory_refresh/application_record_iterator.rb +25 -12
- data/lib/inventory_refresh/graph/topological_sort.rb +24 -26
- data/lib/inventory_refresh/graph.rb +2 -2
- data/lib/inventory_refresh/inventory_collection/builder.rb +37 -15
- data/lib/inventory_refresh/inventory_collection/data_storage.rb +9 -0
- data/lib/inventory_refresh/inventory_collection/helpers/initialize_helper.rb +147 -38
- data/lib/inventory_refresh/inventory_collection/helpers/questions_helper.rb +49 -5
- data/lib/inventory_refresh/inventory_collection/index/proxy.rb +35 -3
- data/lib/inventory_refresh/inventory_collection/index/type/base.rb +8 -0
- data/lib/inventory_refresh/inventory_collection/index/type/local_db.rb +2 -0
- data/lib/inventory_refresh/inventory_collection/index/type/skeletal.rb +1 -0
- data/lib/inventory_refresh/inventory_collection/reference.rb +1 -0
- data/lib/inventory_refresh/inventory_collection/references_storage.rb +17 -0
- data/lib/inventory_refresh/inventory_collection/scanner.rb +91 -3
- data/lib/inventory_refresh/inventory_collection/serialization.rb +16 -10
- data/lib/inventory_refresh/inventory_collection.rb +122 -64
- data/lib/inventory_refresh/inventory_object.rb +74 -40
- data/lib/inventory_refresh/inventory_object_lazy.rb +17 -10
- data/lib/inventory_refresh/null_logger.rb +2 -2
- data/lib/inventory_refresh/persister.rb +31 -65
- data/lib/inventory_refresh/save_collection/base.rb +4 -2
- data/lib/inventory_refresh/save_collection/saver/base.rb +114 -15
- data/lib/inventory_refresh/save_collection/saver/batch.rb +17 -0
- data/lib/inventory_refresh/save_collection/saver/concurrent_safe_batch.rb +129 -51
- data/lib/inventory_refresh/save_collection/saver/default.rb +57 -0
- data/lib/inventory_refresh/save_collection/saver/partial_upsert_helper.rb +2 -19
- data/lib/inventory_refresh/save_collection/saver/retention_helper.rb +68 -3
- data/lib/inventory_refresh/save_collection/saver/sql_helper.rb +125 -0
- data/lib/inventory_refresh/save_collection/saver/sql_helper_update.rb +10 -6
- data/lib/inventory_refresh/save_collection/saver/sql_helper_upsert.rb +28 -16
- data/lib/inventory_refresh/save_collection/sweeper.rb +17 -93
- data/lib/inventory_refresh/save_collection/topological_sort.rb +5 -5
- data/lib/inventory_refresh/save_inventory.rb +5 -12
- data/lib/inventory_refresh/target.rb +73 -0
- data/lib/inventory_refresh/target_collection.rb +92 -0
- data/lib/inventory_refresh/version.rb +1 -1
- data/lib/inventory_refresh.rb +2 -0
- metadata +42 -39
- data/.travis.yml +0 -23
- data/lib/inventory_refresh/exception.rb +0 -8
@@ -46,6 +46,41 @@ module InventoryRefresh::SaveCollection
|
|
46
46
|
record[select_keys_indexes[key]]
|
47
47
|
end
|
48
48
|
|
49
|
+
# Returns iterator or relation based on settings
|
50
|
+
#
|
51
|
+
# @param association [Symbol] An existing association on manager
|
52
|
+
# @return [ActiveRecord::Relation, InventoryRefresh::ApplicationRecordIterator] iterator or relation based on settings
|
53
|
+
def batch_iterator(association)
|
54
|
+
if pure_sql_records_fetching
|
55
|
+
# Building fast iterator doing pure SQL query and therefore avoiding redundant creation of AR objects. The
|
56
|
+
# iterator responds to find_in_batches, so it acts like the AR relation. For targeted refresh, the association
|
57
|
+
# can already be ApplicationRecordIterator, so we will skip that.
|
58
|
+
pure_sql_iterator = lambda do |&block|
|
59
|
+
primary_key_offset = nil
|
60
|
+
loop do
|
61
|
+
relation = association.select(*select_keys)
|
62
|
+
.reorder("#{primary_key} ASC")
|
63
|
+
.limit(batch_size)
|
64
|
+
# Using rails way of comparing primary key instead of offset
|
65
|
+
relation = relation.where(arel_primary_key.gt(primary_key_offset)) if primary_key_offset
|
66
|
+
records = get_connection.query(relation.to_sql)
|
67
|
+
last_record = records.last
|
68
|
+
block.call(records)
|
69
|
+
|
70
|
+
break if records.size < batch_size
|
71
|
+
|
72
|
+
primary_key_offset = record_key(last_record, primary_key)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
InventoryRefresh::ApplicationRecordIterator.new(:iterator => pure_sql_iterator)
|
77
|
+
else
|
78
|
+
# Normal Rails ActiveRecord::Relation where we can call find_in_batches or
|
79
|
+
# InventoryRefresh::ApplicationRecordIterator passed from targeted refresh
|
80
|
+
association
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
49
84
|
# Saves the InventoryCollection
|
50
85
|
#
|
51
86
|
# @param association [Symbol] An existing association on manager
|
@@ -55,7 +90,7 @@ module InventoryRefresh::SaveCollection
|
|
55
90
|
all_attribute_keys = Set.new + inventory_collection.batch_extra_attributes
|
56
91
|
|
57
92
|
inventory_collection.each do |inventory_object|
|
58
|
-
attributes = inventory_object.
|
93
|
+
attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys, inventory_object)
|
59
94
|
index = build_stringified_reference(attributes, unique_index_keys)
|
60
95
|
|
61
96
|
# Interesting fact: not building attributes_index and using only inventory_objects_index doesn't do much
|
@@ -69,7 +104,7 @@ module InventoryRefresh::SaveCollection
|
|
69
104
|
logger.debug("Processing #{inventory_collection} of size #{inventory_collection.size}...")
|
70
105
|
|
71
106
|
unless inventory_collection.create_only?
|
72
|
-
|
107
|
+
update_or_destroy_records!(batch_iterator(association), inventory_objects_index, attributes_index, all_attribute_keys)
|
73
108
|
end
|
74
109
|
|
75
110
|
unless inventory_collection.create_only?
|
@@ -78,11 +113,15 @@ module InventoryRefresh::SaveCollection
|
|
78
113
|
|
79
114
|
# Records that were not found in the DB but sent for saving, we will be creating these in the DB.
|
80
115
|
if inventory_collection.create_allowed?
|
116
|
+
on_conflict = inventory_collection.parallel_safe? ? :do_update : nil
|
117
|
+
|
81
118
|
inventory_objects_index.each_slice(batch_size_for_persisting) do |batch|
|
82
|
-
create_records!(all_attribute_keys, batch, attributes_index, :on_conflict =>
|
119
|
+
create_records!(all_attribute_keys, batch, attributes_index, :on_conflict => on_conflict)
|
83
120
|
end
|
84
121
|
|
85
|
-
|
122
|
+
if inventory_collection.parallel_safe?
|
123
|
+
create_or_update_partial_records(all_attribute_keys)
|
124
|
+
end
|
86
125
|
end
|
87
126
|
|
88
127
|
logger.debug("Marking :last_seen_at of #{inventory_collection} of size #{inventory_collection.size}...")
|
@@ -103,7 +142,7 @@ module InventoryRefresh::SaveCollection
|
|
103
142
|
end
|
104
143
|
|
105
144
|
def expand_all_attribute_keys!(all_attribute_keys)
|
106
|
-
%i
|
145
|
+
%i[created_at updated_at created_on updated_on].each do |col|
|
107
146
|
all_attribute_keys << col if supports_column?(col)
|
108
147
|
end
|
109
148
|
all_attribute_keys << :type if supports_sti?
|
@@ -111,7 +150,7 @@ module InventoryRefresh::SaveCollection
|
|
111
150
|
end
|
112
151
|
|
113
152
|
def mark_last_seen_at(attributes_index)
|
114
|
-
return unless supports_column?(:last_seen_at)
|
153
|
+
return unless supports_column?(:last_seen_at) && inventory_collection.parallel_safe?
|
115
154
|
return if attributes_index.blank?
|
116
155
|
|
117
156
|
all_attribute_keys = [:last_seen_at]
|
@@ -124,7 +163,8 @@ module InventoryRefresh::SaveCollection
|
|
124
163
|
get_connection.execute(query)
|
125
164
|
end
|
126
165
|
|
127
|
-
# Batch updates existing records that are in the DB using attributes_index.
|
166
|
+
# Batch updates existing records that are in the DB using attributes_index. And delete the ones that were not
|
167
|
+
# present in inventory_objects_index.
|
128
168
|
#
|
129
169
|
# @param records_batch_iterator [ActiveRecord::Relation, InventoryRefresh::ApplicationRecordIterator] iterator or
|
130
170
|
# relation, both responding to :find_in_batches method
|
@@ -132,11 +172,12 @@ module InventoryRefresh::SaveCollection
|
|
132
172
|
# @param attributes_index [Hash{String => Hash}] Hash of data hashes with only keys that are column names of the
|
133
173
|
# models's table
|
134
174
|
# @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
|
135
|
-
def
|
136
|
-
hashes_for_update
|
175
|
+
def update_or_destroy_records!(records_batch_iterator, inventory_objects_index, attributes_index, all_attribute_keys)
|
176
|
+
hashes_for_update = []
|
177
|
+
records_for_destroy = []
|
137
178
|
indexed_inventory_objects = {}
|
138
179
|
|
139
|
-
records_batch_iterator.find_in_batches(:batch_size => batch_size
|
180
|
+
records_batch_iterator.find_in_batches(:batch_size => batch_size) do |batch|
|
140
181
|
update_time = time_now
|
141
182
|
|
142
183
|
batch.each do |record|
|
@@ -149,14 +190,20 @@ module InventoryRefresh::SaveCollection
|
|
149
190
|
inventory_object = inventory_objects_index.delete(index)
|
150
191
|
hash = attributes_index[index]
|
151
192
|
|
152
|
-
if inventory_object
|
193
|
+
if inventory_object.nil?
|
194
|
+
# Record was found in the DB but not sent for saving, that means it doesn't exist anymore and we should
|
195
|
+
# delete it from the DB.
|
196
|
+
if inventory_collection.delete_allowed?
|
197
|
+
records_for_destroy << record
|
198
|
+
end
|
199
|
+
else
|
153
200
|
# Record was found in the DB and sent for saving, we will be updating the DB.
|
154
201
|
inventory_object.id = primary_key_value
|
155
202
|
next unless assert_referential_integrity(hash)
|
203
|
+
next unless changed?(record, hash, all_attribute_keys)
|
156
204
|
|
157
|
-
|
158
|
-
|
159
|
-
if supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys)
|
205
|
+
if inventory_collection.parallel_safe? &&
|
206
|
+
(supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys))
|
160
207
|
|
161
208
|
version_attr, max_version_attr = if supports_remote_data_timestamp?(all_attribute_keys)
|
162
209
|
[:resource_timestamp, :resource_timestamps_max]
|
@@ -164,16 +211,16 @@ module InventoryRefresh::SaveCollection
|
|
164
211
|
[:resource_counter, :resource_counters_max]
|
165
212
|
end
|
166
213
|
|
167
|
-
|
168
|
-
|
214
|
+
next if skeletonize_or_skip_record(record_key(record, version_attr),
|
215
|
+
hash[version_attr],
|
216
|
+
record_key(record, max_version_attr),
|
217
|
+
inventory_object)
|
169
218
|
end
|
170
219
|
|
171
220
|
hash_for_update = if inventory_collection.use_ar_object?
|
172
221
|
record.assign_attributes(hash.except(:id))
|
173
|
-
next unless changed?(record)
|
174
|
-
|
175
222
|
values_for_database!(all_attribute_keys,
|
176
|
-
|
223
|
+
record.attributes.symbolize_keys)
|
177
224
|
elsif serializable_keys?
|
178
225
|
# TODO(lsmola) hash data with current DB data to allow subset of data being sent,
|
179
226
|
# otherwise we would nullify the not sent attributes. Test e.g. on disks in cloud
|
@@ -184,14 +231,6 @@ module InventoryRefresh::SaveCollection
|
|
184
231
|
# otherwise we would nullify the not sent attributes. Test e.g. on disks in cloud
|
185
232
|
hash
|
186
233
|
end
|
187
|
-
|
188
|
-
if supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys)
|
189
|
-
next if skeletonize_or_skip_record(record_version,
|
190
|
-
hash[version_attr],
|
191
|
-
record_version_max,
|
192
|
-
inventory_object)
|
193
|
-
end
|
194
|
-
|
195
234
|
assign_attributes_for_update!(hash_for_update, update_time)
|
196
235
|
|
197
236
|
hash_for_update[:id] = primary_key_value
|
@@ -207,22 +246,39 @@ module InventoryRefresh::SaveCollection
|
|
207
246
|
hashes_for_update = []
|
208
247
|
indexed_inventory_objects = {}
|
209
248
|
end
|
249
|
+
|
250
|
+
# Destroy in batches
|
251
|
+
if records_for_destroy.size >= batch_size_for_persisting
|
252
|
+
destroy_records!(records_for_destroy)
|
253
|
+
records_for_destroy = []
|
254
|
+
end
|
210
255
|
end
|
211
256
|
|
212
257
|
# Update the last batch
|
213
258
|
update_records!(all_attribute_keys, hashes_for_update, indexed_inventory_objects)
|
214
259
|
hashes_for_update = [] # Cleanup so GC can release it sooner
|
260
|
+
|
261
|
+
# Destroy the last batch
|
262
|
+
destroy_records!(records_for_destroy)
|
263
|
+
records_for_destroy = [] # Cleanup so GC can release it sooner
|
215
264
|
end
|
216
265
|
|
217
|
-
def changed?(
|
266
|
+
def changed?(_record, _hash, _all_attribute_keys)
|
218
267
|
return true unless inventory_collection.check_changed?
|
219
268
|
|
220
|
-
#
|
221
|
-
|
222
|
-
#
|
223
|
-
|
224
|
-
#
|
225
|
-
|
269
|
+
# TODO(lsmola) this check needs to be disabled now, because it doesn't work with lazy_find having secondary
|
270
|
+
# indexes. Examples: we save a pod before we save a project, that means the project lazy_find won't evaluate,
|
271
|
+
# because we load it with secondary index and can't do skeletal precreate. Then when the object is being saved
|
272
|
+
# again, the lazy_find is evaluated, but the resource version is not changed, so the row is not saved.
|
273
|
+
#
|
274
|
+
# To keep this quick .changed? check, we might need to extend this, so the resource_version doesn't save until
|
275
|
+
# all lazy_links of the row are evaluated.
|
276
|
+
#
|
277
|
+
# if supports_resource_version?(all_attribute_keys) && supports_column?(resource_version_column)
|
278
|
+
# record_resource_version = record_key(record, resource_version_column.to_s)
|
279
|
+
#
|
280
|
+
# return record_resource_version != hash[resource_version_column]
|
281
|
+
# end
|
226
282
|
|
227
283
|
true
|
228
284
|
end
|
@@ -230,7 +286,10 @@ module InventoryRefresh::SaveCollection
|
|
230
286
|
def db_columns_index(record, pure_sql: false)
|
231
287
|
# Incoming values are in SQL string form.
|
232
288
|
# TODO(lsmola) unify this behavior with object_index_with_keys method in InventoryCollection
|
289
|
+
# TODO(lsmola) maybe we can drop the whole pure sql fetching, since everything will be targeted refresh
|
233
290
|
# with streaming refresh? Maybe just metrics and events will not be, but those should be upsert only
|
291
|
+
# TODO(lsmola) taking ^ in account, we can't drop pure sql, since that is returned by batch insert and
|
292
|
+
# update queries
|
234
293
|
unique_index_keys_to_s.map do |attribute|
|
235
294
|
value = if pure_sql
|
236
295
|
record[attribute]
|
@@ -261,13 +320,20 @@ module InventoryRefresh::SaveCollection
|
|
261
320
|
def update_records!(all_attribute_keys, hashes, indexed_inventory_objects)
|
262
321
|
return if hashes.blank?
|
263
322
|
|
323
|
+
unless inventory_collection.parallel_safe?
|
324
|
+
# We need to update the stored records before we save it, since hashes are modified
|
325
|
+
inventory_collection.store_updated_records(hashes)
|
326
|
+
end
|
327
|
+
|
264
328
|
query = build_update_query(all_attribute_keys, hashes)
|
265
329
|
result = get_connection.execute(query)
|
266
330
|
|
267
|
-
|
268
|
-
|
331
|
+
if inventory_collection.parallel_safe?
|
332
|
+
# We will check for timestamp clashes of full row update and we will fallback to skeletal update
|
333
|
+
inventory_collection.store_updated_records(result)
|
269
334
|
|
270
|
-
|
335
|
+
skeletonize_ignored_records!(indexed_inventory_objects, result)
|
336
|
+
end
|
271
337
|
|
272
338
|
result
|
273
339
|
end
|
@@ -287,7 +353,11 @@ module InventoryRefresh::SaveCollection
|
|
287
353
|
hashes = []
|
288
354
|
create_time = time_now
|
289
355
|
batch.each do |index, inventory_object|
|
290
|
-
hash = if
|
356
|
+
hash = if inventory_collection.use_ar_object?
|
357
|
+
record = inventory_collection.model_class.new(attributes_index[index])
|
358
|
+
values_for_database!(all_attribute_keys,
|
359
|
+
record.attributes.symbolize_keys)
|
360
|
+
elsif serializable_keys?
|
291
361
|
values_for_database!(all_attribute_keys,
|
292
362
|
attributes_index[index])
|
293
363
|
else
|
@@ -309,19 +379,24 @@ module InventoryRefresh::SaveCollection
|
|
309
379
|
build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => :full)
|
310
380
|
)
|
311
381
|
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
382
|
+
if inventory_collection.parallel_safe?
|
383
|
+
# We've done upsert, so records were either created or updated. We can recognize that by checking if
|
384
|
+
# created and updated timestamps are the same
|
385
|
+
created_attr = "created_on" if inventory_collection.supports_column?(:created_on)
|
386
|
+
created_attr ||= "created_at" if inventory_collection.supports_column?(:created_at)
|
387
|
+
updated_attr = "updated_on" if inventory_collection.supports_column?(:updated_on)
|
388
|
+
updated_attr ||= "updated_at" if inventory_collection.supports_column?(:updated_at)
|
389
|
+
|
390
|
+
if created_attr && updated_attr
|
391
|
+
created, updated = result.to_a.partition { |x| x[created_attr] == x[updated_attr] }
|
392
|
+
inventory_collection.store_created_records(created)
|
393
|
+
inventory_collection.store_updated_records(updated)
|
394
|
+
else
|
395
|
+
# The record doesn't have both created and updated attrs, so we'll take all as created
|
396
|
+
inventory_collection.store_created_records(result)
|
397
|
+
end
|
323
398
|
else
|
324
|
-
#
|
399
|
+
# We've done just insert, so all records were created
|
325
400
|
inventory_collection.store_created_records(result)
|
326
401
|
end
|
327
402
|
|
@@ -334,7 +409,9 @@ module InventoryRefresh::SaveCollection
|
|
334
409
|
:on_conflict => on_conflict)
|
335
410
|
end
|
336
411
|
|
337
|
-
|
412
|
+
if inventory_collection.parallel_safe?
|
413
|
+
skeletonize_ignored_records!(indexed_inventory_objects, result, :all_unique_columns => true)
|
414
|
+
end
|
338
415
|
end
|
339
416
|
|
340
417
|
# Stores primary_key values of created records into associated InventoryObject objects.
|
@@ -365,6 +442,7 @@ module InventoryRefresh::SaveCollection
|
|
365
442
|
inventory_object[ref] = attributes[ref]
|
366
443
|
|
367
444
|
next unless (foreign_key = association_to_foreign_key_mapping[ref])
|
445
|
+
|
368
446
|
base_class_name = attributes[association_to_foreign_type_mapping[ref].try(:to_sym)] || association_to_base_class_mapping[ref]
|
369
447
|
id = attributes[foreign_key.to_sym]
|
370
448
|
inventory_object[ref] = InventoryRefresh::ApplicationRecordReference.new(base_class_name, id)
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require "inventory_refresh/save_collection/saver/base"
|
2
|
+
|
3
|
+
module InventoryRefresh::SaveCollection
|
4
|
+
module Saver
|
5
|
+
class Default < InventoryRefresh::SaveCollection::Saver::Base
|
6
|
+
private
|
7
|
+
|
8
|
+
# Updates the passed record with hash data and stores primary key value into inventory_object.
|
9
|
+
#
|
10
|
+
# @param record [ApplicationRecord] record we want to update in DB
|
11
|
+
# @param hash [Hash] data we want to update the record with
|
12
|
+
# @param inventory_object [InventoryRefresh::InventoryObject] InventoryObject instance where we will store primary
|
13
|
+
# key value
|
14
|
+
def update_record!(record, hash, inventory_object)
|
15
|
+
record.assign_attributes(hash.except(:id))
|
16
|
+
if !inventory_collection.check_changed? || record.changed?
|
17
|
+
record.save
|
18
|
+
inventory_collection.store_updated_records(record)
|
19
|
+
end
|
20
|
+
|
21
|
+
inventory_object.id = record.id
|
22
|
+
end
|
23
|
+
|
24
|
+
# Creates a new record in the DB using the passed hash data
|
25
|
+
#
|
26
|
+
# @param hash [Hash] hash with data we want to persist to DB
|
27
|
+
# @param inventory_object [InventoryRefresh::InventoryObject] InventoryObject instance where we will store primary
|
28
|
+
# key value
|
29
|
+
def create_record!(hash, inventory_object)
|
30
|
+
record = inventory_collection.model_class.create!(hash.except(:id))
|
31
|
+
inventory_collection.store_created_records(record)
|
32
|
+
|
33
|
+
inventory_object.id = record.id
|
34
|
+
end
|
35
|
+
|
36
|
+
# Asserts we do not have duplicate records in the DB. If the record is duplicate we will destroy it.
|
37
|
+
#
|
38
|
+
# @param record [ApplicationRecord] record we want to update in DB
|
39
|
+
# @param index [String] manager_uuid of the record
|
40
|
+
# @return [Boolean] false if the record is duplicate
|
41
|
+
def assert_unique_record(record, index)
|
42
|
+
# TODO(lsmola) can go away once we indexed our DB with unique indexes
|
43
|
+
if unique_db_indexes.include?(index) # Include on Set is O(1)
|
44
|
+
# We have a duplicate in the DB, destroy it. A find_each method does automatically .order(:id => :asc)
|
45
|
+
# so we always keep the oldest record in the case of duplicates.
|
46
|
+
logger.warn("A duplicate record was detected and destroyed, inventory_collection: "\
|
47
|
+
"'#{inventory_collection}', record: '#{record}', duplicate_index: '#{index}'")
|
48
|
+
record.destroy
|
49
|
+
return false
|
50
|
+
else
|
51
|
+
unique_db_indexes << index
|
52
|
+
end
|
53
|
+
true
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -59,7 +59,7 @@ module InventoryRefresh::SaveCollection
|
|
59
59
|
skeletal_inventory_objects_index = {}
|
60
60
|
|
61
61
|
inventory_collection.skeletal_primary_index.each_value do |inventory_object|
|
62
|
-
attributes = inventory_object.
|
62
|
+
attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys, inventory_object)
|
63
63
|
index = build_stringified_reference(attributes, unique_index_keys)
|
64
64
|
|
65
65
|
skeletal_attributes_index[index] = attributes
|
@@ -135,6 +135,7 @@ module InventoryRefresh::SaveCollection
|
|
135
135
|
# We need to set correct timestamps_max for this particular attribute, based on what is in timestamps
|
136
136
|
batch.each do |x|
|
137
137
|
next unless x[:__non_serialized_versions][column_name]
|
138
|
+
|
138
139
|
x[comparables_max_name] = x[:__non_serialized_versions][column_name]
|
139
140
|
end
|
140
141
|
end
|
@@ -200,25 +201,7 @@ module InventoryRefresh::SaveCollection
|
|
200
201
|
)
|
201
202
|
end
|
202
203
|
|
203
|
-
def comparable_timestamp(timestamp)
|
204
|
-
# Lets cast all timestamps to to_f, rounding the time comparing precision to miliseconds, that should be
|
205
|
-
# enough, since we are the ones setting the record version in collector. Otherwise we will have hard time with
|
206
|
-
# doing equality, since the value changes going through DB (DB. cuts it at 5 decimal places)
|
207
|
-
|
208
|
-
if timestamp.kind_of?(String)
|
209
|
-
Time.use_zone('UTC') { Time.zone.parse(timestamp) }.to_f.round(3)
|
210
|
-
elsif timestamp.kind_of?(Time)
|
211
|
-
timestamp.in_time_zone('UTC').to_f.round(3)
|
212
|
-
else
|
213
|
-
timestamp
|
214
|
-
end
|
215
|
-
end
|
216
|
-
|
217
204
|
def skeletonize_or_skip_record(record_version, hash_version, record_versions_max, inventory_object)
|
218
|
-
record_version = comparable_timestamp(record_version)
|
219
|
-
record_versions_max = comparable_timestamp(record_versions_max) if record_versions_max
|
220
|
-
hash_version = comparable_timestamp(hash_version)
|
221
|
-
|
222
205
|
# Skip updating this record, because it is old
|
223
206
|
return true if record_version && hash_version && record_version >= hash_version
|
224
207
|
|
@@ -3,6 +3,28 @@ module InventoryRefresh::SaveCollection
|
|
3
3
|
module RetentionHelper
|
4
4
|
private
|
5
5
|
|
6
|
+
# Deletes a complement of referenced data
|
7
|
+
def delete_complement
|
8
|
+
return unless inventory_collection.delete_allowed?
|
9
|
+
|
10
|
+
all_manager_uuids_size = inventory_collection.all_manager_uuids.size
|
11
|
+
|
12
|
+
logger.debug("Processing :delete_complement of #{inventory_collection} of size "\
|
13
|
+
"#{all_manager_uuids_size}...")
|
14
|
+
|
15
|
+
query = complement_of!(inventory_collection.all_manager_uuids,
|
16
|
+
inventory_collection.all_manager_uuids_scope,
|
17
|
+
inventory_collection.all_manager_uuids_timestamp)
|
18
|
+
|
19
|
+
ids_of_non_active_entities = ActiveRecord::Base.connection.execute(query.to_sql).to_a
|
20
|
+
ids_of_non_active_entities.each_slice(10_000) do |batch|
|
21
|
+
destroy_records!(batch)
|
22
|
+
end
|
23
|
+
|
24
|
+
logger.debug("Processing :delete_complement of #{inventory_collection} of size "\
|
25
|
+
"#{all_manager_uuids_size}, deleted=#{inventory_collection.deleted_records.size}...Complete")
|
26
|
+
end
|
27
|
+
|
6
28
|
# Applies strategy based on :retention_strategy parameter, or fallbacks to legacy_destroy_records.
|
7
29
|
#
|
8
30
|
# @param records [Array<ApplicationRecord, Hash, Array>] Records we want to delete or archive
|
@@ -13,9 +35,13 @@ module InventoryRefresh::SaveCollection
|
|
13
35
|
return false unless inventory_collection.delete_allowed?
|
14
36
|
return if records.blank?
|
15
37
|
|
16
|
-
|
17
|
-
|
18
|
-
|
38
|
+
if inventory_collection.retention_strategy
|
39
|
+
ids = ids_array(records)
|
40
|
+
inventory_collection.store_deleted_records(ids)
|
41
|
+
send("#{inventory_collection.retention_strategy}_all_records!", ids)
|
42
|
+
else
|
43
|
+
legacy_destroy_records!(records)
|
44
|
+
end
|
19
45
|
end
|
20
46
|
|
21
47
|
# Convert records to list of ids in format [{:id => X}, {:id => Y}...]
|
@@ -45,6 +71,45 @@ module InventoryRefresh::SaveCollection
|
|
45
71
|
def destroy_all_records!(records)
|
46
72
|
inventory_collection.model_class.where(:id => records.map { |x| x[:id] }).delete_all
|
47
73
|
end
|
74
|
+
|
75
|
+
# Deletes or sof-deletes records. If the model_class supports a custom class delete method, we will use it for
|
76
|
+
# batch soft-delete. This is the legacy method doing either ineffective deletion/archiving or requiring a method
|
77
|
+
# on a class.
|
78
|
+
#
|
79
|
+
# @param records [Array<ApplicationRecord, Hash>] Records we want to delete. If we have only hashes, we need to
|
80
|
+
# to fetch ApplicationRecord objects from the DB
|
81
|
+
def legacy_destroy_records!(records)
|
82
|
+
# Is the delete_method rails standard deleting method?
|
83
|
+
rails_delete = %i[destroy delete].include?(inventory_collection.delete_method)
|
84
|
+
if !rails_delete && inventory_collection.model_class.respond_to?(inventory_collection.delete_method)
|
85
|
+
# We have custom delete method defined on a class, that means it supports batch destroy
|
86
|
+
inventory_collection.store_deleted_records(records.map { |x| {:id => record_key(x, primary_key)} })
|
87
|
+
inventory_collection.model_class.public_send(inventory_collection.delete_method, records.map { |x| record_key(x, primary_key) })
|
88
|
+
else
|
89
|
+
legacy_ineffective_destroy_records(records)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# Very ineffective way of deleting records, but is needed if we want to invoke hooks.
|
94
|
+
#
|
95
|
+
# @param records [Array<ApplicationRecord, Hash>] Records we want to delete. If we have only hashes, we need to
|
96
|
+
# to fetch ApplicationRecord objects from the DB
|
97
|
+
def legacy_ineffective_destroy_records(records)
|
98
|
+
# We have either standard :destroy and :delete rails method, or custom instance level delete method
|
99
|
+
# Note: The standard :destroy and :delete rails method can't be batched because of the hooks and cascade destroy
|
100
|
+
ActiveRecord::Base.transaction do
|
101
|
+
if pure_sql_records_fetching
|
102
|
+
# For pure SQL fetching, we need to get the AR objects again, so we can call destroy
|
103
|
+
inventory_collection.model_class.where(:id => records.map { |x| record_key(x, primary_key) }).find_each do |record|
|
104
|
+
delete_record!(record)
|
105
|
+
end
|
106
|
+
else
|
107
|
+
records.each do |record|
|
108
|
+
delete_record!(record)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
48
113
|
end
|
49
114
|
end
|
50
115
|
end
|
@@ -8,6 +8,9 @@ module InventoryRefresh::SaveCollection
|
|
8
8
|
module SqlHelper
|
9
9
|
include InventoryRefresh::Logging
|
10
10
|
|
11
|
+
# TODO(lsmola) all below methods should be rewritten to arel, but we need to first extend arel to be able to do
|
12
|
+
# this
|
13
|
+
|
11
14
|
extend ActiveSupport::Concern
|
12
15
|
|
13
16
|
included do
|
@@ -80,6 +83,128 @@ module InventoryRefresh::SaveCollection
|
|
80
83
|
"#{value}::#{sql_type}"
|
81
84
|
end
|
82
85
|
end
|
86
|
+
|
87
|
+
# Effective way of doing multiselect
|
88
|
+
#
|
89
|
+
# If we use "(col1, col2) IN [(a,e), (b,f), (b,e)]" it's not great, just with 10k batch, we see
|
90
|
+
# *** ActiveRecord::StatementInvalid Exception: PG::StatementTooComplex: ERROR: stack depth limit exceeded
|
91
|
+
# HINT: Increase the configuration parameter "max_stack_depth" (currently 2048kB), after ensuring the
|
92
|
+
# platform's stack depth limit is adequate.
|
93
|
+
#
|
94
|
+
# If we use "(col1 = a AND col2 = e) OR (col1 = b AND col2 = f) OR (col1 = b AND col2 = e)" with 10k batch, it
|
95
|
+
# takes about 6s and consumes 300MB, with 100k it takes ~1h and consume 3GB in Postgre process
|
96
|
+
#
|
97
|
+
# The best way seems to be using CTE, where the list of values we want to map is turned to 'table' and we just
|
98
|
+
# do RIGHT OUTER JOIN to get the complement of given identifiers. Tested on getting complement of 100k items,
|
99
|
+
# using 2 cols (:ems_ref and :uid_ems) from total 150k rows. It takes ~1s and 350MB in Postgre process
|
100
|
+
#
|
101
|
+
# @param manager_uuids [Array<String>, Array[Hash]] Array with manager_uuids of entities. The keys have to match
|
102
|
+
# inventory_collection.manager_ref. We allow passing just array of strings, if manager_ref.size ==1, to
|
103
|
+
# spare some memory
|
104
|
+
# @return [Arel::SelectManager] Arel for getting complement of uuids. This method modifies the passed
|
105
|
+
# manager_uuids to spare some memory
|
106
|
+
def complement_of!(manager_uuids, all_manager_uuids_scope, all_manager_uuids_timestamp)
|
107
|
+
all_attribute_keys = inventory_collection.manager_ref
|
108
|
+
all_attribute_keys_array = inventory_collection.manager_ref.map(&:to_s)
|
109
|
+
|
110
|
+
active_entities = Arel::Table.new(:active_entities)
|
111
|
+
active_entities_cte = Arel::Nodes::As.new(
|
112
|
+
active_entities,
|
113
|
+
Arel.sql("(#{active_entities_query(all_attribute_keys_array, manager_uuids)})")
|
114
|
+
)
|
115
|
+
|
116
|
+
all_entities = Arel::Table.new(:all_entities)
|
117
|
+
all_entities_cte = Arel::Nodes::As.new(
|
118
|
+
all_entities,
|
119
|
+
Arel.sql("(#{all_entities_query(all_manager_uuids_scope, all_manager_uuids_timestamp).select(:id, *all_attribute_keys_array).to_sql})")
|
120
|
+
)
|
121
|
+
join_condition = all_attribute_keys.map { |key| active_entities[key].eq(all_entities[key]) }.inject(:and)
|
122
|
+
where_condition = all_attribute_keys.map { |key| active_entities[key].eq(nil) }.inject(:and)
|
123
|
+
|
124
|
+
active_entities
|
125
|
+
.project(all_entities[:id])
|
126
|
+
.join(all_entities, Arel::Nodes::RightOuterJoin)
|
127
|
+
.on(join_condition)
|
128
|
+
.with(active_entities_cte, all_entities_cte)
|
129
|
+
.where(where_condition)
|
130
|
+
end
|
131
|
+
|
132
|
+
private
|
133
|
+
|
134
|
+
def all_entities_query(all_manager_uuids_scope, all_manager_uuids_timestamp)
|
135
|
+
all_entities_query = inventory_collection.full_collection_for_comparison
|
136
|
+
all_entities_query = all_entities_query.active if inventory_collection.retention_strategy == :archive
|
137
|
+
|
138
|
+
if all_manager_uuids_scope
|
139
|
+
scope_keys = all_manager_uuids_scope.first.keys.map { |x| association_to_foreign_key_mapping[x.to_sym] }.map(&:to_s)
|
140
|
+
scope = load_scope(all_manager_uuids_scope)
|
141
|
+
condition = inventory_collection.build_multi_selection_condition(scope, scope_keys)
|
142
|
+
all_entities_query = all_entities_query.where(condition)
|
143
|
+
end
|
144
|
+
|
145
|
+
if all_manager_uuids_timestamp && supports_column?(:resource_timestamp)
|
146
|
+
all_manager_uuids_timestamp = Time.parse(all_manager_uuids_timestamp).utc
|
147
|
+
|
148
|
+
date_field = model_class.arel_table[:resource_timestamp]
|
149
|
+
all_entities_query = all_entities_query.where(date_field.lt(all_manager_uuids_timestamp))
|
150
|
+
end
|
151
|
+
all_entities_query
|
152
|
+
end
|
153
|
+
|
154
|
+
def load_scope(all_manager_uuids_scope)
|
155
|
+
scope_keys = all_manager_uuids_scope.first.keys.to_set
|
156
|
+
|
157
|
+
all_manager_uuids_scope.map do |cond|
|
158
|
+
assert_scope!(scope_keys, cond)
|
159
|
+
|
160
|
+
cond.map do |key, value|
|
161
|
+
foreign_key = association_to_foreign_key_mapping[key.to_sym]
|
162
|
+
foreign_key_value = value.load&.id
|
163
|
+
|
164
|
+
assert_foreign_keys!(key, value, foreign_key, foreign_key_value)
|
165
|
+
|
166
|
+
[foreign_key, foreign_key_value]
|
167
|
+
end.to_h
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def assert_scope!(scope_keys, cond)
|
172
|
+
if cond.keys.to_set != scope_keys
|
173
|
+
raise "'#{inventory_collection}' expected keys for :all_manager_uuids_scope are #{scope_keys.to_a}, got"\
|
174
|
+
" #{cond.keys}. Keys must be the same for all scopes provided."
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
def assert_foreign_keys!(key, value, foreign_key, foreign_key_value)
|
179
|
+
unless foreign_key
|
180
|
+
raise "'#{inventory_collection}' doesn't have relation :#{key} provided in :all_manager_uuids_scope."
|
181
|
+
end
|
182
|
+
|
183
|
+
unless foreign_key_value
|
184
|
+
raise "'#{inventory_collection}' couldn't load scope value :#{key} => #{value.inspect} provided in :all_manager_uuids_scope"
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
def active_entities_query(all_attribute_keys_array, manager_uuids)
|
189
|
+
connection = ActiveRecord::Base.connection
|
190
|
+
|
191
|
+
all_attribute_keys_array_q = all_attribute_keys_array.map { |x| quote_column_name(x) }
|
192
|
+
# For Postgre, only first set of values should contain the type casts
|
193
|
+
first_value = manager_uuids.shift.to_h
|
194
|
+
first_value = "(#{all_attribute_keys_array.map { |x| quote(connection, first_value[x], x, true) }.join(",")})"
|
195
|
+
|
196
|
+
# Rest of the values, without the type cast
|
197
|
+
values = manager_uuids.map! do |hash|
|
198
|
+
"(#{all_attribute_keys_array.map { |x| quote(connection, hash[x], x, false) }.join(",")})"
|
199
|
+
end.join(",")
|
200
|
+
|
201
|
+
values = values.blank? ? first_value : [first_value, values].join(",")
|
202
|
+
|
203
|
+
<<-SQL
|
204
|
+
SELECT *
|
205
|
+
FROM (VALUES #{values}) AS active_entities_table(#{all_attribute_keys_array_q.join(",")})
|
206
|
+
SQL
|
207
|
+
end
|
83
208
|
end
|
84
209
|
end
|
85
210
|
end
|