inventory_refresh 0.3.3 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.codeclimate.yml +25 -30
- data/.github/workflows/ci.yaml +47 -0
- data/.rubocop.yml +3 -3
- data/.rubocop_cc.yml +3 -4
- data/.rubocop_local.yml +5 -2
- data/.whitesource +3 -0
- data/CHANGELOG.md +19 -0
- data/Gemfile +10 -4
- data/README.md +1 -2
- data/Rakefile +2 -2
- data/inventory_refresh.gemspec +9 -10
- data/lib/inventory_refresh/application_record_iterator.rb +25 -12
- data/lib/inventory_refresh/graph/topological_sort.rb +24 -26
- data/lib/inventory_refresh/graph.rb +2 -2
- data/lib/inventory_refresh/inventory_collection/builder.rb +37 -15
- data/lib/inventory_refresh/inventory_collection/data_storage.rb +9 -0
- data/lib/inventory_refresh/inventory_collection/helpers/initialize_helper.rb +147 -38
- data/lib/inventory_refresh/inventory_collection/helpers/questions_helper.rb +49 -5
- data/lib/inventory_refresh/inventory_collection/index/proxy.rb +35 -3
- data/lib/inventory_refresh/inventory_collection/index/type/base.rb +8 -0
- data/lib/inventory_refresh/inventory_collection/index/type/local_db.rb +2 -0
- data/lib/inventory_refresh/inventory_collection/index/type/skeletal.rb +1 -0
- data/lib/inventory_refresh/inventory_collection/reference.rb +1 -0
- data/lib/inventory_refresh/inventory_collection/references_storage.rb +17 -0
- data/lib/inventory_refresh/inventory_collection/scanner.rb +91 -3
- data/lib/inventory_refresh/inventory_collection/serialization.rb +16 -10
- data/lib/inventory_refresh/inventory_collection.rb +122 -64
- data/lib/inventory_refresh/inventory_object.rb +74 -40
- data/lib/inventory_refresh/inventory_object_lazy.rb +17 -10
- data/lib/inventory_refresh/null_logger.rb +2 -2
- data/lib/inventory_refresh/persister.rb +31 -65
- data/lib/inventory_refresh/save_collection/base.rb +4 -2
- data/lib/inventory_refresh/save_collection/saver/base.rb +114 -15
- data/lib/inventory_refresh/save_collection/saver/batch.rb +17 -0
- data/lib/inventory_refresh/save_collection/saver/concurrent_safe_batch.rb +129 -51
- data/lib/inventory_refresh/save_collection/saver/default.rb +57 -0
- data/lib/inventory_refresh/save_collection/saver/partial_upsert_helper.rb +2 -19
- data/lib/inventory_refresh/save_collection/saver/retention_helper.rb +68 -3
- data/lib/inventory_refresh/save_collection/saver/sql_helper.rb +125 -0
- data/lib/inventory_refresh/save_collection/saver/sql_helper_update.rb +10 -6
- data/lib/inventory_refresh/save_collection/saver/sql_helper_upsert.rb +28 -16
- data/lib/inventory_refresh/save_collection/sweeper.rb +17 -93
- data/lib/inventory_refresh/save_collection/topological_sort.rb +5 -5
- data/lib/inventory_refresh/save_inventory.rb +5 -12
- data/lib/inventory_refresh/target.rb +73 -0
- data/lib/inventory_refresh/target_collection.rb +92 -0
- data/lib/inventory_refresh/version.rb +1 -1
- data/lib/inventory_refresh.rb +2 -0
- metadata +42 -39
- data/.travis.yml +0 -23
- data/lib/inventory_refresh/exception.rb +0 -8
@@ -46,6 +46,41 @@ module InventoryRefresh::SaveCollection
|
|
46
46
|
record[select_keys_indexes[key]]
|
47
47
|
end
|
48
48
|
|
49
|
+
# Returns iterator or relation based on settings
|
50
|
+
#
|
51
|
+
# @param association [Symbol] An existing association on manager
|
52
|
+
# @return [ActiveRecord::Relation, InventoryRefresh::ApplicationRecordIterator] iterator or relation based on settings
|
53
|
+
def batch_iterator(association)
|
54
|
+
if pure_sql_records_fetching
|
55
|
+
# Building fast iterator doing pure SQL query and therefore avoiding redundant creation of AR objects. The
|
56
|
+
# iterator responds to find_in_batches, so it acts like the AR relation. For targeted refresh, the association
|
57
|
+
# can already be ApplicationRecordIterator, so we will skip that.
|
58
|
+
pure_sql_iterator = lambda do |&block|
|
59
|
+
primary_key_offset = nil
|
60
|
+
loop do
|
61
|
+
relation = association.select(*select_keys)
|
62
|
+
.reorder("#{primary_key} ASC")
|
63
|
+
.limit(batch_size)
|
64
|
+
# Using rails way of comparing primary key instead of offset
|
65
|
+
relation = relation.where(arel_primary_key.gt(primary_key_offset)) if primary_key_offset
|
66
|
+
records = get_connection.query(relation.to_sql)
|
67
|
+
last_record = records.last
|
68
|
+
block.call(records)
|
69
|
+
|
70
|
+
break if records.size < batch_size
|
71
|
+
|
72
|
+
primary_key_offset = record_key(last_record, primary_key)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
InventoryRefresh::ApplicationRecordIterator.new(:iterator => pure_sql_iterator)
|
77
|
+
else
|
78
|
+
# Normal Rails ActiveRecord::Relation where we can call find_in_batches or
|
79
|
+
# InventoryRefresh::ApplicationRecordIterator passed from targeted refresh
|
80
|
+
association
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
49
84
|
# Saves the InventoryCollection
|
50
85
|
#
|
51
86
|
# @param association [Symbol] An existing association on manager
|
@@ -55,7 +90,7 @@ module InventoryRefresh::SaveCollection
|
|
55
90
|
all_attribute_keys = Set.new + inventory_collection.batch_extra_attributes
|
56
91
|
|
57
92
|
inventory_collection.each do |inventory_object|
|
58
|
-
attributes = inventory_object.
|
93
|
+
attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys, inventory_object)
|
59
94
|
index = build_stringified_reference(attributes, unique_index_keys)
|
60
95
|
|
61
96
|
# Interesting fact: not building attributes_index and using only inventory_objects_index doesn't do much
|
@@ -69,7 +104,7 @@ module InventoryRefresh::SaveCollection
|
|
69
104
|
logger.debug("Processing #{inventory_collection} of size #{inventory_collection.size}...")
|
70
105
|
|
71
106
|
unless inventory_collection.create_only?
|
72
|
-
|
107
|
+
update_or_destroy_records!(batch_iterator(association), inventory_objects_index, attributes_index, all_attribute_keys)
|
73
108
|
end
|
74
109
|
|
75
110
|
unless inventory_collection.create_only?
|
@@ -78,11 +113,15 @@ module InventoryRefresh::SaveCollection
|
|
78
113
|
|
79
114
|
# Records that were not found in the DB but sent for saving, we will be creating these in the DB.
|
80
115
|
if inventory_collection.create_allowed?
|
116
|
+
on_conflict = inventory_collection.parallel_safe? ? :do_update : nil
|
117
|
+
|
81
118
|
inventory_objects_index.each_slice(batch_size_for_persisting) do |batch|
|
82
|
-
create_records!(all_attribute_keys, batch, attributes_index, :on_conflict =>
|
119
|
+
create_records!(all_attribute_keys, batch, attributes_index, :on_conflict => on_conflict)
|
83
120
|
end
|
84
121
|
|
85
|
-
|
122
|
+
if inventory_collection.parallel_safe?
|
123
|
+
create_or_update_partial_records(all_attribute_keys)
|
124
|
+
end
|
86
125
|
end
|
87
126
|
|
88
127
|
logger.debug("Marking :last_seen_at of #{inventory_collection} of size #{inventory_collection.size}...")
|
@@ -103,7 +142,7 @@ module InventoryRefresh::SaveCollection
|
|
103
142
|
end
|
104
143
|
|
105
144
|
def expand_all_attribute_keys!(all_attribute_keys)
|
106
|
-
%i
|
145
|
+
%i[created_at updated_at created_on updated_on].each do |col|
|
107
146
|
all_attribute_keys << col if supports_column?(col)
|
108
147
|
end
|
109
148
|
all_attribute_keys << :type if supports_sti?
|
@@ -111,7 +150,7 @@ module InventoryRefresh::SaveCollection
|
|
111
150
|
end
|
112
151
|
|
113
152
|
def mark_last_seen_at(attributes_index)
|
114
|
-
return unless supports_column?(:last_seen_at)
|
153
|
+
return unless supports_column?(:last_seen_at) && inventory_collection.parallel_safe?
|
115
154
|
return if attributes_index.blank?
|
116
155
|
|
117
156
|
all_attribute_keys = [:last_seen_at]
|
@@ -124,7 +163,8 @@ module InventoryRefresh::SaveCollection
|
|
124
163
|
get_connection.execute(query)
|
125
164
|
end
|
126
165
|
|
127
|
-
# Batch updates existing records that are in the DB using attributes_index.
|
166
|
+
# Batch updates existing records that are in the DB using attributes_index. And delete the ones that were not
|
167
|
+
# present in inventory_objects_index.
|
128
168
|
#
|
129
169
|
# @param records_batch_iterator [ActiveRecord::Relation, InventoryRefresh::ApplicationRecordIterator] iterator or
|
130
170
|
# relation, both responding to :find_in_batches method
|
@@ -132,11 +172,12 @@ module InventoryRefresh::SaveCollection
|
|
132
172
|
# @param attributes_index [Hash{String => Hash}] Hash of data hashes with only keys that are column names of the
|
133
173
|
# models's table
|
134
174
|
# @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
|
135
|
-
def
|
136
|
-
hashes_for_update
|
175
|
+
def update_or_destroy_records!(records_batch_iterator, inventory_objects_index, attributes_index, all_attribute_keys)
|
176
|
+
hashes_for_update = []
|
177
|
+
records_for_destroy = []
|
137
178
|
indexed_inventory_objects = {}
|
138
179
|
|
139
|
-
records_batch_iterator.find_in_batches(:batch_size => batch_size
|
180
|
+
records_batch_iterator.find_in_batches(:batch_size => batch_size) do |batch|
|
140
181
|
update_time = time_now
|
141
182
|
|
142
183
|
batch.each do |record|
|
@@ -149,14 +190,20 @@ module InventoryRefresh::SaveCollection
|
|
149
190
|
inventory_object = inventory_objects_index.delete(index)
|
150
191
|
hash = attributes_index[index]
|
151
192
|
|
152
|
-
if inventory_object
|
193
|
+
if inventory_object.nil?
|
194
|
+
# Record was found in the DB but not sent for saving, that means it doesn't exist anymore and we should
|
195
|
+
# delete it from the DB.
|
196
|
+
if inventory_collection.delete_allowed?
|
197
|
+
records_for_destroy << record
|
198
|
+
end
|
199
|
+
else
|
153
200
|
# Record was found in the DB and sent for saving, we will be updating the DB.
|
154
201
|
inventory_object.id = primary_key_value
|
155
202
|
next unless assert_referential_integrity(hash)
|
203
|
+
next unless changed?(record, hash, all_attribute_keys)
|
156
204
|
|
157
|
-
|
158
|
-
|
159
|
-
if supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys)
|
205
|
+
if inventory_collection.parallel_safe? &&
|
206
|
+
(supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys))
|
160
207
|
|
161
208
|
version_attr, max_version_attr = if supports_remote_data_timestamp?(all_attribute_keys)
|
162
209
|
[:resource_timestamp, :resource_timestamps_max]
|
@@ -164,16 +211,16 @@ module InventoryRefresh::SaveCollection
|
|
164
211
|
[:resource_counter, :resource_counters_max]
|
165
212
|
end
|
166
213
|
|
167
|
-
|
168
|
-
|
214
|
+
next if skeletonize_or_skip_record(record_key(record, version_attr),
|
215
|
+
hash[version_attr],
|
216
|
+
record_key(record, max_version_attr),
|
217
|
+
inventory_object)
|
169
218
|
end
|
170
219
|
|
171
220
|
hash_for_update = if inventory_collection.use_ar_object?
|
172
221
|
record.assign_attributes(hash.except(:id))
|
173
|
-
next unless changed?(record)
|
174
|
-
|
175
222
|
values_for_database!(all_attribute_keys,
|
176
|
-
|
223
|
+
record.attributes.symbolize_keys)
|
177
224
|
elsif serializable_keys?
|
178
225
|
# TODO(lsmola) hash data with current DB data to allow subset of data being sent,
|
179
226
|
# otherwise we would nullify the not sent attributes. Test e.g. on disks in cloud
|
@@ -184,14 +231,6 @@ module InventoryRefresh::SaveCollection
|
|
184
231
|
# otherwise we would nullify the not sent attributes. Test e.g. on disks in cloud
|
185
232
|
hash
|
186
233
|
end
|
187
|
-
|
188
|
-
if supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys)
|
189
|
-
next if skeletonize_or_skip_record(record_version,
|
190
|
-
hash[version_attr],
|
191
|
-
record_version_max,
|
192
|
-
inventory_object)
|
193
|
-
end
|
194
|
-
|
195
234
|
assign_attributes_for_update!(hash_for_update, update_time)
|
196
235
|
|
197
236
|
hash_for_update[:id] = primary_key_value
|
@@ -207,22 +246,39 @@ module InventoryRefresh::SaveCollection
|
|
207
246
|
hashes_for_update = []
|
208
247
|
indexed_inventory_objects = {}
|
209
248
|
end
|
249
|
+
|
250
|
+
# Destroy in batches
|
251
|
+
if records_for_destroy.size >= batch_size_for_persisting
|
252
|
+
destroy_records!(records_for_destroy)
|
253
|
+
records_for_destroy = []
|
254
|
+
end
|
210
255
|
end
|
211
256
|
|
212
257
|
# Update the last batch
|
213
258
|
update_records!(all_attribute_keys, hashes_for_update, indexed_inventory_objects)
|
214
259
|
hashes_for_update = [] # Cleanup so GC can release it sooner
|
260
|
+
|
261
|
+
# Destroy the last batch
|
262
|
+
destroy_records!(records_for_destroy)
|
263
|
+
records_for_destroy = [] # Cleanup so GC can release it sooner
|
215
264
|
end
|
216
265
|
|
217
|
-
def changed?(
|
266
|
+
def changed?(_record, _hash, _all_attribute_keys)
|
218
267
|
return true unless inventory_collection.check_changed?
|
219
268
|
|
220
|
-
#
|
221
|
-
|
222
|
-
#
|
223
|
-
|
224
|
-
#
|
225
|
-
|
269
|
+
# TODO(lsmola) this check needs to be disabled now, because it doesn't work with lazy_find having secondary
|
270
|
+
# indexes. Examples: we save a pod before we save a project, that means the project lazy_find won't evaluate,
|
271
|
+
# because we load it with secondary index and can't do skeletal precreate. Then when the object is being saved
|
272
|
+
# again, the lazy_find is evaluated, but the resource version is not changed, so the row is not saved.
|
273
|
+
#
|
274
|
+
# To keep this quick .changed? check, we might need to extend this, so the resource_version doesn't save until
|
275
|
+
# all lazy_links of the row are evaluated.
|
276
|
+
#
|
277
|
+
# if supports_resource_version?(all_attribute_keys) && supports_column?(resource_version_column)
|
278
|
+
# record_resource_version = record_key(record, resource_version_column.to_s)
|
279
|
+
#
|
280
|
+
# return record_resource_version != hash[resource_version_column]
|
281
|
+
# end
|
226
282
|
|
227
283
|
true
|
228
284
|
end
|
@@ -230,7 +286,10 @@ module InventoryRefresh::SaveCollection
|
|
230
286
|
def db_columns_index(record, pure_sql: false)
|
231
287
|
# Incoming values are in SQL string form.
|
232
288
|
# TODO(lsmola) unify this behavior with object_index_with_keys method in InventoryCollection
|
289
|
+
# TODO(lsmola) maybe we can drop the whole pure sql fetching, since everything will be targeted refresh
|
233
290
|
# with streaming refresh? Maybe just metrics and events will not be, but those should be upsert only
|
291
|
+
# TODO(lsmola) taking ^ in account, we can't drop pure sql, since that is returned by batch insert and
|
292
|
+
# update queries
|
234
293
|
unique_index_keys_to_s.map do |attribute|
|
235
294
|
value = if pure_sql
|
236
295
|
record[attribute]
|
@@ -261,13 +320,20 @@ module InventoryRefresh::SaveCollection
|
|
261
320
|
def update_records!(all_attribute_keys, hashes, indexed_inventory_objects)
|
262
321
|
return if hashes.blank?
|
263
322
|
|
323
|
+
unless inventory_collection.parallel_safe?
|
324
|
+
# We need to update the stored records before we save it, since hashes are modified
|
325
|
+
inventory_collection.store_updated_records(hashes)
|
326
|
+
end
|
327
|
+
|
264
328
|
query = build_update_query(all_attribute_keys, hashes)
|
265
329
|
result = get_connection.execute(query)
|
266
330
|
|
267
|
-
|
268
|
-
|
331
|
+
if inventory_collection.parallel_safe?
|
332
|
+
# We will check for timestamp clashes of full row update and we will fallback to skeletal update
|
333
|
+
inventory_collection.store_updated_records(result)
|
269
334
|
|
270
|
-
|
335
|
+
skeletonize_ignored_records!(indexed_inventory_objects, result)
|
336
|
+
end
|
271
337
|
|
272
338
|
result
|
273
339
|
end
|
@@ -287,7 +353,11 @@ module InventoryRefresh::SaveCollection
|
|
287
353
|
hashes = []
|
288
354
|
create_time = time_now
|
289
355
|
batch.each do |index, inventory_object|
|
290
|
-
hash = if
|
356
|
+
hash = if inventory_collection.use_ar_object?
|
357
|
+
record = inventory_collection.model_class.new(attributes_index[index])
|
358
|
+
values_for_database!(all_attribute_keys,
|
359
|
+
record.attributes.symbolize_keys)
|
360
|
+
elsif serializable_keys?
|
291
361
|
values_for_database!(all_attribute_keys,
|
292
362
|
attributes_index[index])
|
293
363
|
else
|
@@ -309,19 +379,24 @@ module InventoryRefresh::SaveCollection
|
|
309
379
|
build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => :full)
|
310
380
|
)
|
311
381
|
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
382
|
+
if inventory_collection.parallel_safe?
|
383
|
+
# We've done upsert, so records were either created or updated. We can recognize that by checking if
|
384
|
+
# created and updated timestamps are the same
|
385
|
+
created_attr = "created_on" if inventory_collection.supports_column?(:created_on)
|
386
|
+
created_attr ||= "created_at" if inventory_collection.supports_column?(:created_at)
|
387
|
+
updated_attr = "updated_on" if inventory_collection.supports_column?(:updated_on)
|
388
|
+
updated_attr ||= "updated_at" if inventory_collection.supports_column?(:updated_at)
|
389
|
+
|
390
|
+
if created_attr && updated_attr
|
391
|
+
created, updated = result.to_a.partition { |x| x[created_attr] == x[updated_attr] }
|
392
|
+
inventory_collection.store_created_records(created)
|
393
|
+
inventory_collection.store_updated_records(updated)
|
394
|
+
else
|
395
|
+
# The record doesn't have both created and updated attrs, so we'll take all as created
|
396
|
+
inventory_collection.store_created_records(result)
|
397
|
+
end
|
323
398
|
else
|
324
|
-
#
|
399
|
+
# We've done just insert, so all records were created
|
325
400
|
inventory_collection.store_created_records(result)
|
326
401
|
end
|
327
402
|
|
@@ -334,7 +409,9 @@ module InventoryRefresh::SaveCollection
|
|
334
409
|
:on_conflict => on_conflict)
|
335
410
|
end
|
336
411
|
|
337
|
-
|
412
|
+
if inventory_collection.parallel_safe?
|
413
|
+
skeletonize_ignored_records!(indexed_inventory_objects, result, :all_unique_columns => true)
|
414
|
+
end
|
338
415
|
end
|
339
416
|
|
340
417
|
# Stores primary_key values of created records into associated InventoryObject objects.
|
@@ -365,6 +442,7 @@ module InventoryRefresh::SaveCollection
|
|
365
442
|
inventory_object[ref] = attributes[ref]
|
366
443
|
|
367
444
|
next unless (foreign_key = association_to_foreign_key_mapping[ref])
|
445
|
+
|
368
446
|
base_class_name = attributes[association_to_foreign_type_mapping[ref].try(:to_sym)] || association_to_base_class_mapping[ref]
|
369
447
|
id = attributes[foreign_key.to_sym]
|
370
448
|
inventory_object[ref] = InventoryRefresh::ApplicationRecordReference.new(base_class_name, id)
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require "inventory_refresh/save_collection/saver/base"
|
2
|
+
|
3
|
+
module InventoryRefresh::SaveCollection
|
4
|
+
module Saver
|
5
|
+
class Default < InventoryRefresh::SaveCollection::Saver::Base
|
6
|
+
private
|
7
|
+
|
8
|
+
# Updates the passed record with hash data and stores primary key value into inventory_object.
|
9
|
+
#
|
10
|
+
# @param record [ApplicationRecord] record we want to update in DB
|
11
|
+
# @param hash [Hash] data we want to update the record with
|
12
|
+
# @param inventory_object [InventoryRefresh::InventoryObject] InventoryObject instance where we will store primary
|
13
|
+
# key value
|
14
|
+
def update_record!(record, hash, inventory_object)
|
15
|
+
record.assign_attributes(hash.except(:id))
|
16
|
+
if !inventory_collection.check_changed? || record.changed?
|
17
|
+
record.save
|
18
|
+
inventory_collection.store_updated_records(record)
|
19
|
+
end
|
20
|
+
|
21
|
+
inventory_object.id = record.id
|
22
|
+
end
|
23
|
+
|
24
|
+
# Creates a new record in the DB using the passed hash data
|
25
|
+
#
|
26
|
+
# @param hash [Hash] hash with data we want to persist to DB
|
27
|
+
# @param inventory_object [InventoryRefresh::InventoryObject] InventoryObject instance where we will store primary
|
28
|
+
# key value
|
29
|
+
def create_record!(hash, inventory_object)
|
30
|
+
record = inventory_collection.model_class.create!(hash.except(:id))
|
31
|
+
inventory_collection.store_created_records(record)
|
32
|
+
|
33
|
+
inventory_object.id = record.id
|
34
|
+
end
|
35
|
+
|
36
|
+
# Asserts we do not have duplicate records in the DB. If the record is duplicate we will destroy it.
|
37
|
+
#
|
38
|
+
# @param record [ApplicationRecord] record we want to update in DB
|
39
|
+
# @param index [String] manager_uuid of the record
|
40
|
+
# @return [Boolean] false if the record is duplicate
|
41
|
+
def assert_unique_record(record, index)
|
42
|
+
# TODO(lsmola) can go away once we indexed our DB with unique indexes
|
43
|
+
if unique_db_indexes.include?(index) # Include on Set is O(1)
|
44
|
+
# We have a duplicate in the DB, destroy it. A find_each method does automatically .order(:id => :asc)
|
45
|
+
# so we always keep the oldest record in the case of duplicates.
|
46
|
+
logger.warn("A duplicate record was detected and destroyed, inventory_collection: "\
|
47
|
+
"'#{inventory_collection}', record: '#{record}', duplicate_index: '#{index}'")
|
48
|
+
record.destroy
|
49
|
+
return false
|
50
|
+
else
|
51
|
+
unique_db_indexes << index
|
52
|
+
end
|
53
|
+
true
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -59,7 +59,7 @@ module InventoryRefresh::SaveCollection
|
|
59
59
|
skeletal_inventory_objects_index = {}
|
60
60
|
|
61
61
|
inventory_collection.skeletal_primary_index.each_value do |inventory_object|
|
62
|
-
attributes = inventory_object.
|
62
|
+
attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys, inventory_object)
|
63
63
|
index = build_stringified_reference(attributes, unique_index_keys)
|
64
64
|
|
65
65
|
skeletal_attributes_index[index] = attributes
|
@@ -135,6 +135,7 @@ module InventoryRefresh::SaveCollection
|
|
135
135
|
# We need to set correct timestamps_max for this particular attribute, based on what is in timestamps
|
136
136
|
batch.each do |x|
|
137
137
|
next unless x[:__non_serialized_versions][column_name]
|
138
|
+
|
138
139
|
x[comparables_max_name] = x[:__non_serialized_versions][column_name]
|
139
140
|
end
|
140
141
|
end
|
@@ -200,25 +201,7 @@ module InventoryRefresh::SaveCollection
|
|
200
201
|
)
|
201
202
|
end
|
202
203
|
|
203
|
-
def comparable_timestamp(timestamp)
|
204
|
-
# Lets cast all timestamps to to_f, rounding the time comparing precision to miliseconds, that should be
|
205
|
-
# enough, since we are the ones setting the record version in collector. Otherwise we will have hard time with
|
206
|
-
# doing equality, since the value changes going through DB (DB. cuts it at 5 decimal places)
|
207
|
-
|
208
|
-
if timestamp.kind_of?(String)
|
209
|
-
Time.use_zone('UTC') { Time.zone.parse(timestamp) }.to_f.round(3)
|
210
|
-
elsif timestamp.kind_of?(Time)
|
211
|
-
timestamp.in_time_zone('UTC').to_f.round(3)
|
212
|
-
else
|
213
|
-
timestamp
|
214
|
-
end
|
215
|
-
end
|
216
|
-
|
217
204
|
def skeletonize_or_skip_record(record_version, hash_version, record_versions_max, inventory_object)
|
218
|
-
record_version = comparable_timestamp(record_version)
|
219
|
-
record_versions_max = comparable_timestamp(record_versions_max) if record_versions_max
|
220
|
-
hash_version = comparable_timestamp(hash_version)
|
221
|
-
|
222
205
|
# Skip updating this record, because it is old
|
223
206
|
return true if record_version && hash_version && record_version >= hash_version
|
224
207
|
|
@@ -3,6 +3,28 @@ module InventoryRefresh::SaveCollection
|
|
3
3
|
module RetentionHelper
|
4
4
|
private
|
5
5
|
|
6
|
+
# Deletes a complement of referenced data
|
7
|
+
def delete_complement
|
8
|
+
return unless inventory_collection.delete_allowed?
|
9
|
+
|
10
|
+
all_manager_uuids_size = inventory_collection.all_manager_uuids.size
|
11
|
+
|
12
|
+
logger.debug("Processing :delete_complement of #{inventory_collection} of size "\
|
13
|
+
"#{all_manager_uuids_size}...")
|
14
|
+
|
15
|
+
query = complement_of!(inventory_collection.all_manager_uuids,
|
16
|
+
inventory_collection.all_manager_uuids_scope,
|
17
|
+
inventory_collection.all_manager_uuids_timestamp)
|
18
|
+
|
19
|
+
ids_of_non_active_entities = ActiveRecord::Base.connection.execute(query.to_sql).to_a
|
20
|
+
ids_of_non_active_entities.each_slice(10_000) do |batch|
|
21
|
+
destroy_records!(batch)
|
22
|
+
end
|
23
|
+
|
24
|
+
logger.debug("Processing :delete_complement of #{inventory_collection} of size "\
|
25
|
+
"#{all_manager_uuids_size}, deleted=#{inventory_collection.deleted_records.size}...Complete")
|
26
|
+
end
|
27
|
+
|
6
28
|
# Applies strategy based on :retention_strategy parameter, or fallbacks to legacy_destroy_records.
|
7
29
|
#
|
8
30
|
# @param records [Array<ApplicationRecord, Hash, Array>] Records we want to delete or archive
|
@@ -13,9 +35,13 @@ module InventoryRefresh::SaveCollection
|
|
13
35
|
return false unless inventory_collection.delete_allowed?
|
14
36
|
return if records.blank?
|
15
37
|
|
16
|
-
|
17
|
-
|
18
|
-
|
38
|
+
if inventory_collection.retention_strategy
|
39
|
+
ids = ids_array(records)
|
40
|
+
inventory_collection.store_deleted_records(ids)
|
41
|
+
send("#{inventory_collection.retention_strategy}_all_records!", ids)
|
42
|
+
else
|
43
|
+
legacy_destroy_records!(records)
|
44
|
+
end
|
19
45
|
end
|
20
46
|
|
21
47
|
# Convert records to list of ids in format [{:id => X}, {:id => Y}...]
|
@@ -45,6 +71,45 @@ module InventoryRefresh::SaveCollection
|
|
45
71
|
def destroy_all_records!(records)
|
46
72
|
inventory_collection.model_class.where(:id => records.map { |x| x[:id] }).delete_all
|
47
73
|
end
|
74
|
+
|
75
|
+
# Deletes or sof-deletes records. If the model_class supports a custom class delete method, we will use it for
|
76
|
+
# batch soft-delete. This is the legacy method doing either ineffective deletion/archiving or requiring a method
|
77
|
+
# on a class.
|
78
|
+
#
|
79
|
+
# @param records [Array<ApplicationRecord, Hash>] Records we want to delete. If we have only hashes, we need to
|
80
|
+
# to fetch ApplicationRecord objects from the DB
|
81
|
+
def legacy_destroy_records!(records)
|
82
|
+
# Is the delete_method rails standard deleting method?
|
83
|
+
rails_delete = %i[destroy delete].include?(inventory_collection.delete_method)
|
84
|
+
if !rails_delete && inventory_collection.model_class.respond_to?(inventory_collection.delete_method)
|
85
|
+
# We have custom delete method defined on a class, that means it supports batch destroy
|
86
|
+
inventory_collection.store_deleted_records(records.map { |x| {:id => record_key(x, primary_key)} })
|
87
|
+
inventory_collection.model_class.public_send(inventory_collection.delete_method, records.map { |x| record_key(x, primary_key) })
|
88
|
+
else
|
89
|
+
legacy_ineffective_destroy_records(records)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# Very ineffective way of deleting records, but is needed if we want to invoke hooks.
|
94
|
+
#
|
95
|
+
# @param records [Array<ApplicationRecord, Hash>] Records we want to delete. If we have only hashes, we need to
|
96
|
+
# to fetch ApplicationRecord objects from the DB
|
97
|
+
def legacy_ineffective_destroy_records(records)
|
98
|
+
# We have either standard :destroy and :delete rails method, or custom instance level delete method
|
99
|
+
# Note: The standard :destroy and :delete rails method can't be batched because of the hooks and cascade destroy
|
100
|
+
ActiveRecord::Base.transaction do
|
101
|
+
if pure_sql_records_fetching
|
102
|
+
# For pure SQL fetching, we need to get the AR objects again, so we can call destroy
|
103
|
+
inventory_collection.model_class.where(:id => records.map { |x| record_key(x, primary_key) }).find_each do |record|
|
104
|
+
delete_record!(record)
|
105
|
+
end
|
106
|
+
else
|
107
|
+
records.each do |record|
|
108
|
+
delete_record!(record)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
48
113
|
end
|
49
114
|
end
|
50
115
|
end
|
@@ -8,6 +8,9 @@ module InventoryRefresh::SaveCollection
|
|
8
8
|
module SqlHelper
|
9
9
|
include InventoryRefresh::Logging
|
10
10
|
|
11
|
+
# TODO(lsmola) all below methods should be rewritten to arel, but we need to first extend arel to be able to do
|
12
|
+
# this
|
13
|
+
|
11
14
|
extend ActiveSupport::Concern
|
12
15
|
|
13
16
|
included do
|
@@ -80,6 +83,128 @@ module InventoryRefresh::SaveCollection
|
|
80
83
|
"#{value}::#{sql_type}"
|
81
84
|
end
|
82
85
|
end
|
86
|
+
|
87
|
+
# Effective way of doing multiselect
|
88
|
+
#
|
89
|
+
# If we use "(col1, col2) IN [(a,e), (b,f), (b,e)]" it's not great, just with 10k batch, we see
|
90
|
+
# *** ActiveRecord::StatementInvalid Exception: PG::StatementTooComplex: ERROR: stack depth limit exceeded
|
91
|
+
# HINT: Increase the configuration parameter "max_stack_depth" (currently 2048kB), after ensuring the
|
92
|
+
# platform's stack depth limit is adequate.
|
93
|
+
#
|
94
|
+
# If we use "(col1 = a AND col2 = e) OR (col1 = b AND col2 = f) OR (col1 = b AND col2 = e)" with 10k batch, it
|
95
|
+
# takes about 6s and consumes 300MB, with 100k it takes ~1h and consume 3GB in Postgre process
|
96
|
+
#
|
97
|
+
# The best way seems to be using CTE, where the list of values we want to map is turned to 'table' and we just
|
98
|
+
# do RIGHT OUTER JOIN to get the complement of given identifiers. Tested on getting complement of 100k items,
|
99
|
+
# using 2 cols (:ems_ref and :uid_ems) from total 150k rows. It takes ~1s and 350MB in Postgre process
|
100
|
+
#
|
101
|
+
# @param manager_uuids [Array<String>, Array[Hash]] Array with manager_uuids of entities. The keys have to match
|
102
|
+
# inventory_collection.manager_ref. We allow passing just array of strings, if manager_ref.size ==1, to
|
103
|
+
# spare some memory
|
104
|
+
# @return [Arel::SelectManager] Arel for getting complement of uuids. This method modifies the passed
|
105
|
+
# manager_uuids to spare some memory
|
106
|
+
def complement_of!(manager_uuids, all_manager_uuids_scope, all_manager_uuids_timestamp)
|
107
|
+
all_attribute_keys = inventory_collection.manager_ref
|
108
|
+
all_attribute_keys_array = inventory_collection.manager_ref.map(&:to_s)
|
109
|
+
|
110
|
+
active_entities = Arel::Table.new(:active_entities)
|
111
|
+
active_entities_cte = Arel::Nodes::As.new(
|
112
|
+
active_entities,
|
113
|
+
Arel.sql("(#{active_entities_query(all_attribute_keys_array, manager_uuids)})")
|
114
|
+
)
|
115
|
+
|
116
|
+
all_entities = Arel::Table.new(:all_entities)
|
117
|
+
all_entities_cte = Arel::Nodes::As.new(
|
118
|
+
all_entities,
|
119
|
+
Arel.sql("(#{all_entities_query(all_manager_uuids_scope, all_manager_uuids_timestamp).select(:id, *all_attribute_keys_array).to_sql})")
|
120
|
+
)
|
121
|
+
join_condition = all_attribute_keys.map { |key| active_entities[key].eq(all_entities[key]) }.inject(:and)
|
122
|
+
where_condition = all_attribute_keys.map { |key| active_entities[key].eq(nil) }.inject(:and)
|
123
|
+
|
124
|
+
active_entities
|
125
|
+
.project(all_entities[:id])
|
126
|
+
.join(all_entities, Arel::Nodes::RightOuterJoin)
|
127
|
+
.on(join_condition)
|
128
|
+
.with(active_entities_cte, all_entities_cte)
|
129
|
+
.where(where_condition)
|
130
|
+
end
|
131
|
+
|
132
|
+
private
|
133
|
+
|
134
|
+
def all_entities_query(all_manager_uuids_scope, all_manager_uuids_timestamp)
|
135
|
+
all_entities_query = inventory_collection.full_collection_for_comparison
|
136
|
+
all_entities_query = all_entities_query.active if inventory_collection.retention_strategy == :archive
|
137
|
+
|
138
|
+
if all_manager_uuids_scope
|
139
|
+
scope_keys = all_manager_uuids_scope.first.keys.map { |x| association_to_foreign_key_mapping[x.to_sym] }.map(&:to_s)
|
140
|
+
scope = load_scope(all_manager_uuids_scope)
|
141
|
+
condition = inventory_collection.build_multi_selection_condition(scope, scope_keys)
|
142
|
+
all_entities_query = all_entities_query.where(condition)
|
143
|
+
end
|
144
|
+
|
145
|
+
if all_manager_uuids_timestamp && supports_column?(:resource_timestamp)
|
146
|
+
all_manager_uuids_timestamp = Time.parse(all_manager_uuids_timestamp).utc
|
147
|
+
|
148
|
+
date_field = model_class.arel_table[:resource_timestamp]
|
149
|
+
all_entities_query = all_entities_query.where(date_field.lt(all_manager_uuids_timestamp))
|
150
|
+
end
|
151
|
+
all_entities_query
|
152
|
+
end
|
153
|
+
|
154
|
+
def load_scope(all_manager_uuids_scope)
|
155
|
+
scope_keys = all_manager_uuids_scope.first.keys.to_set
|
156
|
+
|
157
|
+
all_manager_uuids_scope.map do |cond|
|
158
|
+
assert_scope!(scope_keys, cond)
|
159
|
+
|
160
|
+
cond.map do |key, value|
|
161
|
+
foreign_key = association_to_foreign_key_mapping[key.to_sym]
|
162
|
+
foreign_key_value = value.load&.id
|
163
|
+
|
164
|
+
assert_foreign_keys!(key, value, foreign_key, foreign_key_value)
|
165
|
+
|
166
|
+
[foreign_key, foreign_key_value]
|
167
|
+
end.to_h
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def assert_scope!(scope_keys, cond)
|
172
|
+
if cond.keys.to_set != scope_keys
|
173
|
+
raise "'#{inventory_collection}' expected keys for :all_manager_uuids_scope are #{scope_keys.to_a}, got"\
|
174
|
+
" #{cond.keys}. Keys must be the same for all scopes provided."
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
def assert_foreign_keys!(key, value, foreign_key, foreign_key_value)
|
179
|
+
unless foreign_key
|
180
|
+
raise "'#{inventory_collection}' doesn't have relation :#{key} provided in :all_manager_uuids_scope."
|
181
|
+
end
|
182
|
+
|
183
|
+
unless foreign_key_value
|
184
|
+
raise "'#{inventory_collection}' couldn't load scope value :#{key} => #{value.inspect} provided in :all_manager_uuids_scope"
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
def active_entities_query(all_attribute_keys_array, manager_uuids)
|
189
|
+
connection = ActiveRecord::Base.connection
|
190
|
+
|
191
|
+
all_attribute_keys_array_q = all_attribute_keys_array.map { |x| quote_column_name(x) }
|
192
|
+
# For Postgre, only first set of values should contain the type casts
|
193
|
+
first_value = manager_uuids.shift.to_h
|
194
|
+
first_value = "(#{all_attribute_keys_array.map { |x| quote(connection, first_value[x], x, true) }.join(",")})"
|
195
|
+
|
196
|
+
# Rest of the values, without the type cast
|
197
|
+
values = manager_uuids.map! do |hash|
|
198
|
+
"(#{all_attribute_keys_array.map { |x| quote(connection, hash[x], x, false) }.join(",")})"
|
199
|
+
end.join(",")
|
200
|
+
|
201
|
+
values = values.blank? ? first_value : [first_value, values].join(",")
|
202
|
+
|
203
|
+
<<-SQL
|
204
|
+
SELECT *
|
205
|
+
FROM (VALUES #{values}) AS active_entities_table(#{all_attribute_keys_array_q.join(",")})
|
206
|
+
SQL
|
207
|
+
end
|
83
208
|
end
|
84
209
|
end
|
85
210
|
end
|