inventory_refresh 0.3.5 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.codeclimate.yml +25 -30
- data/.github/workflows/ci.yaml +58 -0
- data/.rubocop.yml +3 -3
- data/.rubocop_cc.yml +3 -4
- data/.rubocop_local.yml +5 -2
- data/.whitesource +3 -0
- data/CHANGELOG.md +19 -0
- data/Gemfile +10 -4
- data/README.md +1 -2
- data/Rakefile +2 -2
- data/inventory_refresh.gemspec +9 -10
- data/lib/inventory_refresh/application_record_iterator.rb +25 -12
- data/lib/inventory_refresh/graph/topological_sort.rb +24 -26
- data/lib/inventory_refresh/graph.rb +2 -2
- data/lib/inventory_refresh/inventory_collection/builder.rb +37 -15
- data/lib/inventory_refresh/inventory_collection/data_storage.rb +9 -0
- data/lib/inventory_refresh/inventory_collection/helpers/initialize_helper.rb +147 -38
- data/lib/inventory_refresh/inventory_collection/helpers/questions_helper.rb +48 -4
- data/lib/inventory_refresh/inventory_collection/index/proxy.rb +35 -3
- data/lib/inventory_refresh/inventory_collection/index/type/base.rb +8 -0
- data/lib/inventory_refresh/inventory_collection/index/type/local_db.rb +2 -0
- data/lib/inventory_refresh/inventory_collection/index/type/skeletal.rb +1 -0
- data/lib/inventory_refresh/inventory_collection/reference.rb +1 -0
- data/lib/inventory_refresh/inventory_collection/references_storage.rb +17 -0
- data/lib/inventory_refresh/inventory_collection/scanner.rb +91 -3
- data/lib/inventory_refresh/inventory_collection/serialization.rb +16 -10
- data/lib/inventory_refresh/inventory_collection.rb +122 -64
- data/lib/inventory_refresh/inventory_object.rb +74 -40
- data/lib/inventory_refresh/inventory_object_lazy.rb +17 -10
- data/lib/inventory_refresh/null_logger.rb +2 -2
- data/lib/inventory_refresh/persister.rb +43 -93
- data/lib/inventory_refresh/save_collection/base.rb +4 -2
- data/lib/inventory_refresh/save_collection/saver/base.rb +114 -15
- data/lib/inventory_refresh/save_collection/saver/batch.rb +17 -0
- data/lib/inventory_refresh/save_collection/saver/concurrent_safe_batch.rb +129 -51
- data/lib/inventory_refresh/save_collection/saver/default.rb +57 -0
- data/lib/inventory_refresh/save_collection/saver/partial_upsert_helper.rb +2 -19
- data/lib/inventory_refresh/save_collection/saver/retention_helper.rb +68 -3
- data/lib/inventory_refresh/save_collection/saver/sql_helper.rb +125 -0
- data/lib/inventory_refresh/save_collection/saver/sql_helper_update.rb +10 -6
- data/lib/inventory_refresh/save_collection/saver/sql_helper_upsert.rb +28 -16
- data/lib/inventory_refresh/save_collection/sweeper.rb +17 -93
- data/lib/inventory_refresh/save_collection/topological_sort.rb +5 -5
- data/lib/inventory_refresh/save_inventory.rb +5 -12
- data/lib/inventory_refresh/target.rb +73 -0
- data/lib/inventory_refresh/target_collection.rb +92 -0
- data/lib/inventory_refresh/version.rb +1 -1
- data/lib/inventory_refresh.rb +2 -0
- metadata +42 -39
- data/.travis.yml +0 -23
- data/lib/inventory_refresh/exception.rb +0 -8
@@ -12,6 +12,8 @@ module InventoryRefresh::SaveCollection
|
|
12
12
|
# @param inventory_collection [InventoryRefresh::InventoryCollection] InventoryCollection object we will be saving
|
13
13
|
def initialize(inventory_collection)
|
14
14
|
@inventory_collection = inventory_collection
|
15
|
+
# TODO(lsmola) do I need to reload every time? Also it should be enough to clear the associations.
|
16
|
+
inventory_collection.parent&.reload
|
15
17
|
@association = inventory_collection.db_collection_for_comparison
|
16
18
|
|
17
19
|
# Private attrs
|
@@ -19,17 +21,21 @@ module InventoryRefresh::SaveCollection
|
|
19
21
|
@table_name = @model_class.table_name
|
20
22
|
@q_table_name = get_connection.quote_table_name(@table_name)
|
21
23
|
@primary_key = @model_class.primary_key
|
22
|
-
@arel_primary_key = @model_class.
|
24
|
+
@arel_primary_key = @model_class.arel_table[@primary_key]
|
23
25
|
@unique_index_keys = inventory_collection.unique_index_keys
|
24
26
|
@unique_index_keys_to_s = inventory_collection.manager_ref_to_cols.map(&:to_s)
|
25
27
|
@select_keys = [@primary_key] + @unique_index_keys_to_s + internal_columns.map(&:to_s)
|
26
28
|
@unique_db_primary_keys = Set.new
|
27
29
|
@unique_db_indexes = Set.new
|
28
30
|
|
31
|
+
# Right now ApplicationRecordIterator in association is used for targeted refresh. Given the small amount of
|
32
|
+
# records flowing through there, we probably don't need to optimize that association to fetch a pure SQL.
|
33
|
+
@pure_sql_records_fetching = !inventory_collection.use_ar_object? && !@association.kind_of?(InventoryRefresh::ApplicationRecordIterator)
|
34
|
+
|
29
35
|
@batch_size_for_persisting = inventory_collection.batch_size_pure_sql
|
30
|
-
@batch_size = inventory_collection.use_ar_object? ? @batch_size_for_persisting : inventory_collection.batch_size
|
31
36
|
|
32
|
-
@
|
37
|
+
@batch_size = @pure_sql_records_fetching ? @batch_size_for_persisting : inventory_collection.batch_size
|
38
|
+
@record_key_method = @pure_sql_records_fetching ? :pure_sql_record_key : :ar_record_key
|
33
39
|
@select_keys_indexes = @select_keys.each_with_object({}).with_index { |(key, obj), index| obj[key.to_s] = index }
|
34
40
|
@pg_types = @model_class.attribute_names.each_with_object({}) do |key, obj|
|
35
41
|
obj[key.to_sym] = inventory_collection.model_class.columns_hash[key]
|
@@ -69,8 +75,14 @@ module InventoryRefresh::SaveCollection
|
|
69
75
|
|
70
76
|
# Saves the InventoryCollection
|
71
77
|
def save_inventory_collection!
|
78
|
+
# If we have a targeted InventoryCollection that wouldn't do anything, quickly skip it
|
79
|
+
return if inventory_collection.noop?
|
80
|
+
|
81
|
+
# Delete_complement strategy using :all_manager_uuids attribute
|
82
|
+
delete_complement unless inventory_collection.delete_complement_noop?
|
83
|
+
|
72
84
|
# Create/Update/Archive/Delete records based on InventoryCollection data and scope
|
73
|
-
save!(association)
|
85
|
+
save!(association) unless inventory_collection.saving_noop?
|
74
86
|
end
|
75
87
|
|
76
88
|
protected
|
@@ -89,8 +101,6 @@ module InventoryRefresh::SaveCollection
|
|
89
101
|
# @param attributes [Hash] attributes hash
|
90
102
|
# @return [Hash] modified hash from parameter attributes with casted values
|
91
103
|
def values_for_database!(all_attribute_keys, attributes)
|
92
|
-
# TODO(lsmola) we'll need to fill default value from the DB to the NOT_NULL columns here, since sending NULL
|
93
|
-
# to column with NOT_NULL constraint always fails, even if there is a default value
|
94
104
|
all_attribute_keys.each do |key|
|
95
105
|
next unless attributes.key?(key)
|
96
106
|
|
@@ -102,7 +112,11 @@ module InventoryRefresh::SaveCollection
|
|
102
112
|
end
|
103
113
|
|
104
114
|
def transform_to_hash!(all_attribute_keys, hash)
|
105
|
-
if
|
115
|
+
if inventory_collection.use_ar_object?
|
116
|
+
record = inventory_collection.model_class.new(hash)
|
117
|
+
values_for_database!(all_attribute_keys,
|
118
|
+
record.attributes.slice(*record.changed_attributes.keys).symbolize_keys)
|
119
|
+
elsif serializable_keys?
|
106
120
|
values_for_database!(all_attribute_keys,
|
107
121
|
hash)
|
108
122
|
else
|
@@ -113,15 +127,99 @@ module InventoryRefresh::SaveCollection
|
|
113
127
|
private
|
114
128
|
|
115
129
|
attr_reader :unique_index_keys, :unique_index_keys_to_s, :select_keys, :unique_db_primary_keys, :unique_db_indexes,
|
116
|
-
:primary_key, :arel_primary_key, :record_key_method, :select_keys_indexes,
|
130
|
+
:primary_key, :arel_primary_key, :record_key_method, :pure_sql_records_fetching, :select_keys_indexes,
|
117
131
|
:batch_size, :batch_size_for_persisting, :model_class, :serializable_keys, :deserializable_keys, :pg_types, :table_name,
|
118
132
|
:q_table_name
|
119
133
|
|
120
134
|
delegate :supports_column?, :to => :inventory_collection
|
121
135
|
|
136
|
+
# Saves the InventoryCollection
|
137
|
+
#
|
138
|
+
# @param association [Symbol] An existing association on manager
|
139
|
+
def save!(association)
|
140
|
+
attributes_index = {}
|
141
|
+
inventory_objects_index = {}
|
142
|
+
inventory_collection.each do |inventory_object|
|
143
|
+
attributes = inventory_object.attributes(inventory_collection)
|
144
|
+
index = build_stringified_reference(attributes, unique_index_keys)
|
145
|
+
|
146
|
+
attributes_index[index] = attributes
|
147
|
+
inventory_objects_index[index] = inventory_object
|
148
|
+
end
|
149
|
+
|
150
|
+
logger.debug("Processing #{inventory_collection} of size #{inventory_collection.size}...")
|
151
|
+
# Records that are in the DB, we will be updating or deleting them.
|
152
|
+
ActiveRecord::Base.transaction do
|
153
|
+
association.find_each do |record|
|
154
|
+
index = build_stringified_reference_for_record(record, unique_index_keys)
|
155
|
+
|
156
|
+
next unless assert_distinct_relation(record.id)
|
157
|
+
next unless assert_unique_record(record, index)
|
158
|
+
|
159
|
+
inventory_object = inventory_objects_index.delete(index)
|
160
|
+
hash = attributes_index.delete(index)
|
161
|
+
|
162
|
+
if inventory_object.nil?
|
163
|
+
# Record was found in the DB but not sent for saving, that means it doesn't exist anymore and we should
|
164
|
+
# delete it from the DB.
|
165
|
+
delete_record!(record) if inventory_collection.delete_allowed?
|
166
|
+
elsif assert_referential_integrity(hash)
|
167
|
+
# Record was found in the DB and sent for saving, we will be updating the DB.
|
168
|
+
update_record!(record, hash, inventory_object)
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
inventory_collection.custom_reconnect_block&.call(inventory_collection, inventory_objects_index, attributes_index)
|
174
|
+
|
175
|
+
# Records that were not found in the DB but sent for saving, we will be creating these in the DB.
|
176
|
+
if inventory_collection.create_allowed?
|
177
|
+
ActiveRecord::Base.transaction do
|
178
|
+
inventory_objects_index.each do |index, inventory_object|
|
179
|
+
hash = attributes_index.delete(index)
|
180
|
+
|
181
|
+
create_record!(hash, inventory_object) if assert_referential_integrity(hash)
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
185
|
+
logger.debug("Processing #{inventory_collection}, "\
|
186
|
+
"created=#{inventory_collection.created_records.count}, "\
|
187
|
+
"updated=#{inventory_collection.updated_records.count}, "\
|
188
|
+
"deleted=#{inventory_collection.deleted_records.count}...Complete")
|
189
|
+
rescue => e
|
190
|
+
logger.error("Error when saving #{inventory_collection} with #{inventory_collection_details}. Message: #{e.message}")
|
191
|
+
raise e
|
192
|
+
end
|
193
|
+
|
122
194
|
# @return [String] a string for logging purposes
|
123
195
|
def inventory_collection_details
|
124
|
-
"strategy: #{inventory_collection.strategy}, saver_strategy: #{inventory_collection.saver_strategy}"
|
196
|
+
"strategy: #{inventory_collection.strategy}, saver_strategy: #{inventory_collection.saver_strategy}, targeted: #{inventory_collection.targeted?}"
|
197
|
+
end
|
198
|
+
|
199
|
+
# @param record [ApplicationRecord] ApplicationRecord object
|
200
|
+
# @param key [Symbol] A key that is an attribute of the AR object
|
201
|
+
# @return [Object] Value of attribute name :key on the :record
|
202
|
+
def record_key(record, key)
|
203
|
+
record.public_send(key)
|
204
|
+
end
|
205
|
+
|
206
|
+
# Deletes a complement of referenced data
|
207
|
+
def delete_complement
|
208
|
+
raise(":delete_complement method is supported only for :saver_strategy => [:batch, :concurrent_safe_batch]")
|
209
|
+
end
|
210
|
+
|
211
|
+
# Deletes/soft-deletes a given record
|
212
|
+
#
|
213
|
+
# @param [ApplicationRecord] record we want to delete
|
214
|
+
def delete_record!(record)
|
215
|
+
record.public_send(inventory_collection.delete_method)
|
216
|
+
inventory_collection.store_deleted_records(record)
|
217
|
+
end
|
218
|
+
|
219
|
+
# @return [TrueClass] always return true, this method is redefined in default saver
|
220
|
+
def assert_unique_record(_record, _index)
|
221
|
+
# TODO(lsmola) can go away once we indexed our DB with unique indexes
|
222
|
+
true
|
125
223
|
end
|
126
224
|
|
127
225
|
# Check if relation provided is distinct, i.e. the relation should not return the same primary key value twice.
|
@@ -134,12 +232,12 @@ module InventoryRefresh::SaveCollection
|
|
134
232
|
# Change the InventoryCollection's :association or :arel parameter to return distinct results. The :through
|
135
233
|
# relations can return the same record multiple times. We don't want to do SELECT DISTINCT by default, since
|
136
234
|
# it can be very slow.
|
137
|
-
|
235
|
+
if inventory_collection.assert_graph_integrity
|
236
|
+
raise("Please update :association or :arel for #{inventory_collection} to return a DISTINCT result. ")
|
237
|
+
else
|
138
238
|
logger.warn("Please update :association or :arel for #{inventory_collection} to return a DISTINCT result. "\
|
139
239
|
" The duplicate value is being ignored.")
|
140
240
|
return false
|
141
|
-
else
|
142
|
-
raise("Please update :association or :arel for #{inventory_collection} to return a DISTINCT result. ")
|
143
241
|
end
|
144
242
|
else
|
145
243
|
unique_db_primary_keys << primary_key_value
|
@@ -157,14 +255,15 @@ module InventoryRefresh::SaveCollection
|
|
157
255
|
def assert_referential_integrity(hash)
|
158
256
|
inventory_collection.fixed_foreign_keys.each do |x|
|
159
257
|
next unless hash[x].nil?
|
258
|
+
|
160
259
|
subject = "#{hash} of #{inventory_collection} because of missing foreign key #{x} for "\
|
161
260
|
"#{inventory_collection.parent.class.name}:"\
|
162
261
|
"#{inventory_collection.parent.try(:id)}"
|
163
|
-
|
262
|
+
if inventory_collection.assert_graph_integrity
|
263
|
+
raise("Referential integrity check violated for #{subject}")
|
264
|
+
else
|
164
265
|
logger.warn("Referential integrity check violated, ignoring #{subject}")
|
165
266
|
return false
|
166
|
-
else
|
167
|
-
raise("Referential integrity check violated for #{subject}")
|
168
267
|
end
|
169
268
|
end
|
170
269
|
true
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require "inventory_refresh/save_collection/saver/concurrent_safe_batch"
|
2
|
+
|
3
|
+
module InventoryRefresh::SaveCollection
|
4
|
+
module Saver
|
5
|
+
class Batch < InventoryRefresh::SaveCollection::Saver::ConcurrentSafeBatch
|
6
|
+
private
|
7
|
+
|
8
|
+
# Just returning manager ref transformed to column names, for strategies that do not expect to have unique DB
|
9
|
+
# indexes.
|
10
|
+
#
|
11
|
+
# @return [Array<Symbol>] manager ref transformed to column names
|
12
|
+
def unique_index_columns
|
13
|
+
inventory_collection.manager_ref_to_cols.map(&:to_sym)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -46,6 +46,41 @@ module InventoryRefresh::SaveCollection
|
|
46
46
|
record[select_keys_indexes[key]]
|
47
47
|
end
|
48
48
|
|
49
|
+
# Returns iterator or relation based on settings
|
50
|
+
#
|
51
|
+
# @param association [Symbol] An existing association on manager
|
52
|
+
# @return [ActiveRecord::Relation, InventoryRefresh::ApplicationRecordIterator] iterator or relation based on settings
|
53
|
+
def batch_iterator(association)
|
54
|
+
if pure_sql_records_fetching
|
55
|
+
# Building fast iterator doing pure SQL query and therefore avoiding redundant creation of AR objects. The
|
56
|
+
# iterator responds to find_in_batches, so it acts like the AR relation. For targeted refresh, the association
|
57
|
+
# can already be ApplicationRecordIterator, so we will skip that.
|
58
|
+
pure_sql_iterator = lambda do |&block|
|
59
|
+
primary_key_offset = nil
|
60
|
+
loop do
|
61
|
+
relation = association.select(*select_keys)
|
62
|
+
.reorder("#{primary_key} ASC")
|
63
|
+
.limit(batch_size)
|
64
|
+
# Using rails way of comparing primary key instead of offset
|
65
|
+
relation = relation.where(arel_primary_key.gt(primary_key_offset)) if primary_key_offset
|
66
|
+
records = get_connection.query(relation.to_sql)
|
67
|
+
last_record = records.last
|
68
|
+
block.call(records)
|
69
|
+
|
70
|
+
break if records.size < batch_size
|
71
|
+
|
72
|
+
primary_key_offset = record_key(last_record, primary_key)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
InventoryRefresh::ApplicationRecordIterator.new(:iterator => pure_sql_iterator)
|
77
|
+
else
|
78
|
+
# Normal Rails ActiveRecord::Relation where we can call find_in_batches or
|
79
|
+
# InventoryRefresh::ApplicationRecordIterator passed from targeted refresh
|
80
|
+
association
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
49
84
|
# Saves the InventoryCollection
|
50
85
|
#
|
51
86
|
# @param association [Symbol] An existing association on manager
|
@@ -55,7 +90,7 @@ module InventoryRefresh::SaveCollection
|
|
55
90
|
all_attribute_keys = Set.new + inventory_collection.batch_extra_attributes
|
56
91
|
|
57
92
|
inventory_collection.each do |inventory_object|
|
58
|
-
attributes = inventory_object.
|
93
|
+
attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys, inventory_object)
|
59
94
|
index = build_stringified_reference(attributes, unique_index_keys)
|
60
95
|
|
61
96
|
# Interesting fact: not building attributes_index and using only inventory_objects_index doesn't do much
|
@@ -69,7 +104,7 @@ module InventoryRefresh::SaveCollection
|
|
69
104
|
logger.debug("Processing #{inventory_collection} of size #{inventory_collection.size}...")
|
70
105
|
|
71
106
|
unless inventory_collection.create_only?
|
72
|
-
|
107
|
+
update_or_destroy_records!(batch_iterator(association), inventory_objects_index, attributes_index, all_attribute_keys)
|
73
108
|
end
|
74
109
|
|
75
110
|
unless inventory_collection.create_only?
|
@@ -78,11 +113,15 @@ module InventoryRefresh::SaveCollection
|
|
78
113
|
|
79
114
|
# Records that were not found in the DB but sent for saving, we will be creating these in the DB.
|
80
115
|
if inventory_collection.create_allowed?
|
116
|
+
on_conflict = inventory_collection.parallel_safe? ? :do_update : nil
|
117
|
+
|
81
118
|
inventory_objects_index.each_slice(batch_size_for_persisting) do |batch|
|
82
|
-
create_records!(all_attribute_keys, batch, attributes_index, :on_conflict =>
|
119
|
+
create_records!(all_attribute_keys, batch, attributes_index, :on_conflict => on_conflict)
|
83
120
|
end
|
84
121
|
|
85
|
-
|
122
|
+
if inventory_collection.parallel_safe?
|
123
|
+
create_or_update_partial_records(all_attribute_keys)
|
124
|
+
end
|
86
125
|
end
|
87
126
|
|
88
127
|
logger.debug("Marking :last_seen_at of #{inventory_collection} of size #{inventory_collection.size}...")
|
@@ -103,7 +142,7 @@ module InventoryRefresh::SaveCollection
|
|
103
142
|
end
|
104
143
|
|
105
144
|
def expand_all_attribute_keys!(all_attribute_keys)
|
106
|
-
%i
|
145
|
+
%i[created_at updated_at created_on updated_on].each do |col|
|
107
146
|
all_attribute_keys << col if supports_column?(col)
|
108
147
|
end
|
109
148
|
all_attribute_keys << :type if supports_sti?
|
@@ -111,7 +150,7 @@ module InventoryRefresh::SaveCollection
|
|
111
150
|
end
|
112
151
|
|
113
152
|
def mark_last_seen_at(attributes_index)
|
114
|
-
return unless supports_column?(:last_seen_at)
|
153
|
+
return unless supports_column?(:last_seen_at) && inventory_collection.parallel_safe?
|
115
154
|
return if attributes_index.blank?
|
116
155
|
|
117
156
|
all_attribute_keys = [:last_seen_at]
|
@@ -124,7 +163,8 @@ module InventoryRefresh::SaveCollection
|
|
124
163
|
get_connection.execute(query)
|
125
164
|
end
|
126
165
|
|
127
|
-
# Batch updates existing records that are in the DB using attributes_index.
|
166
|
+
# Batch updates existing records that are in the DB using attributes_index. And delete the ones that were not
|
167
|
+
# present in inventory_objects_index.
|
128
168
|
#
|
129
169
|
# @param records_batch_iterator [ActiveRecord::Relation, InventoryRefresh::ApplicationRecordIterator] iterator or
|
130
170
|
# relation, both responding to :find_in_batches method
|
@@ -132,11 +172,12 @@ module InventoryRefresh::SaveCollection
|
|
132
172
|
# @param attributes_index [Hash{String => Hash}] Hash of data hashes with only keys that are column names of the
|
133
173
|
# models's table
|
134
174
|
# @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
|
135
|
-
def
|
136
|
-
hashes_for_update
|
175
|
+
def update_or_destroy_records!(records_batch_iterator, inventory_objects_index, attributes_index, all_attribute_keys)
|
176
|
+
hashes_for_update = []
|
177
|
+
records_for_destroy = []
|
137
178
|
indexed_inventory_objects = {}
|
138
179
|
|
139
|
-
records_batch_iterator.find_in_batches(:batch_size => batch_size
|
180
|
+
records_batch_iterator.find_in_batches(:batch_size => batch_size) do |batch|
|
140
181
|
update_time = time_now
|
141
182
|
|
142
183
|
batch.each do |record|
|
@@ -149,14 +190,20 @@ module InventoryRefresh::SaveCollection
|
|
149
190
|
inventory_object = inventory_objects_index.delete(index)
|
150
191
|
hash = attributes_index[index]
|
151
192
|
|
152
|
-
if inventory_object
|
193
|
+
if inventory_object.nil?
|
194
|
+
# Record was found in the DB but not sent for saving, that means it doesn't exist anymore and we should
|
195
|
+
# delete it from the DB.
|
196
|
+
if inventory_collection.delete_allowed?
|
197
|
+
records_for_destroy << record
|
198
|
+
end
|
199
|
+
else
|
153
200
|
# Record was found in the DB and sent for saving, we will be updating the DB.
|
154
201
|
inventory_object.id = primary_key_value
|
155
202
|
next unless assert_referential_integrity(hash)
|
203
|
+
next unless changed?(record, hash, all_attribute_keys)
|
156
204
|
|
157
|
-
|
158
|
-
|
159
|
-
if supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys)
|
205
|
+
if inventory_collection.parallel_safe? &&
|
206
|
+
(supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys))
|
160
207
|
|
161
208
|
version_attr, max_version_attr = if supports_remote_data_timestamp?(all_attribute_keys)
|
162
209
|
[:resource_timestamp, :resource_timestamps_max]
|
@@ -164,16 +211,16 @@ module InventoryRefresh::SaveCollection
|
|
164
211
|
[:resource_counter, :resource_counters_max]
|
165
212
|
end
|
166
213
|
|
167
|
-
|
168
|
-
|
214
|
+
next if skeletonize_or_skip_record(record_key(record, version_attr),
|
215
|
+
hash[version_attr],
|
216
|
+
record_key(record, max_version_attr),
|
217
|
+
inventory_object)
|
169
218
|
end
|
170
219
|
|
171
220
|
hash_for_update = if inventory_collection.use_ar_object?
|
172
221
|
record.assign_attributes(hash.except(:id))
|
173
|
-
next unless changed?(record)
|
174
|
-
|
175
222
|
values_for_database!(all_attribute_keys,
|
176
|
-
|
223
|
+
record.attributes.symbolize_keys)
|
177
224
|
elsif serializable_keys?
|
178
225
|
# TODO(lsmola) hash data with current DB data to allow subset of data being sent,
|
179
226
|
# otherwise we would nullify the not sent attributes. Test e.g. on disks in cloud
|
@@ -184,14 +231,6 @@ module InventoryRefresh::SaveCollection
|
|
184
231
|
# otherwise we would nullify the not sent attributes. Test e.g. on disks in cloud
|
185
232
|
hash
|
186
233
|
end
|
187
|
-
|
188
|
-
if supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys)
|
189
|
-
next if skeletonize_or_skip_record(record_version,
|
190
|
-
hash[version_attr],
|
191
|
-
record_version_max,
|
192
|
-
inventory_object)
|
193
|
-
end
|
194
|
-
|
195
234
|
assign_attributes_for_update!(hash_for_update, update_time)
|
196
235
|
|
197
236
|
hash_for_update[:id] = primary_key_value
|
@@ -207,22 +246,39 @@ module InventoryRefresh::SaveCollection
|
|
207
246
|
hashes_for_update = []
|
208
247
|
indexed_inventory_objects = {}
|
209
248
|
end
|
249
|
+
|
250
|
+
# Destroy in batches
|
251
|
+
if records_for_destroy.size >= batch_size_for_persisting
|
252
|
+
destroy_records!(records_for_destroy)
|
253
|
+
records_for_destroy = []
|
254
|
+
end
|
210
255
|
end
|
211
256
|
|
212
257
|
# Update the last batch
|
213
258
|
update_records!(all_attribute_keys, hashes_for_update, indexed_inventory_objects)
|
214
259
|
hashes_for_update = [] # Cleanup so GC can release it sooner
|
260
|
+
|
261
|
+
# Destroy the last batch
|
262
|
+
destroy_records!(records_for_destroy)
|
263
|
+
records_for_destroy = [] # Cleanup so GC can release it sooner
|
215
264
|
end
|
216
265
|
|
217
|
-
def changed?(
|
266
|
+
def changed?(_record, _hash, _all_attribute_keys)
|
218
267
|
return true unless inventory_collection.check_changed?
|
219
268
|
|
220
|
-
#
|
221
|
-
|
222
|
-
#
|
223
|
-
|
224
|
-
#
|
225
|
-
|
269
|
+
# TODO(lsmola) this check needs to be disabled now, because it doesn't work with lazy_find having secondary
|
270
|
+
# indexes. Examples: we save a pod before we save a project, that means the project lazy_find won't evaluate,
|
271
|
+
# because we load it with secondary index and can't do skeletal precreate. Then when the object is being saved
|
272
|
+
# again, the lazy_find is evaluated, but the resource version is not changed, so the row is not saved.
|
273
|
+
#
|
274
|
+
# To keep this quick .changed? check, we might need to extend this, so the resource_version doesn't save until
|
275
|
+
# all lazy_links of the row are evaluated.
|
276
|
+
#
|
277
|
+
# if supports_resource_version?(all_attribute_keys) && supports_column?(resource_version_column)
|
278
|
+
# record_resource_version = record_key(record, resource_version_column.to_s)
|
279
|
+
#
|
280
|
+
# return record_resource_version != hash[resource_version_column]
|
281
|
+
# end
|
226
282
|
|
227
283
|
true
|
228
284
|
end
|
@@ -230,7 +286,10 @@ module InventoryRefresh::SaveCollection
|
|
230
286
|
def db_columns_index(record, pure_sql: false)
|
231
287
|
# Incoming values are in SQL string form.
|
232
288
|
# TODO(lsmola) unify this behavior with object_index_with_keys method in InventoryCollection
|
289
|
+
# TODO(lsmola) maybe we can drop the whole pure sql fetching, since everything will be targeted refresh
|
233
290
|
# with streaming refresh? Maybe just metrics and events will not be, but those should be upsert only
|
291
|
+
# TODO(lsmola) taking ^ in account, we can't drop pure sql, since that is returned by batch insert and
|
292
|
+
# update queries
|
234
293
|
unique_index_keys_to_s.map do |attribute|
|
235
294
|
value = if pure_sql
|
236
295
|
record[attribute]
|
@@ -261,13 +320,20 @@ module InventoryRefresh::SaveCollection
|
|
261
320
|
def update_records!(all_attribute_keys, hashes, indexed_inventory_objects)
|
262
321
|
return if hashes.blank?
|
263
322
|
|
323
|
+
unless inventory_collection.parallel_safe?
|
324
|
+
# We need to update the stored records before we save it, since hashes are modified
|
325
|
+
inventory_collection.store_updated_records(hashes)
|
326
|
+
end
|
327
|
+
|
264
328
|
query = build_update_query(all_attribute_keys, hashes)
|
265
329
|
result = get_connection.execute(query)
|
266
330
|
|
267
|
-
|
268
|
-
|
331
|
+
if inventory_collection.parallel_safe?
|
332
|
+
# We will check for timestamp clashes of full row update and we will fallback to skeletal update
|
333
|
+
inventory_collection.store_updated_records(result)
|
269
334
|
|
270
|
-
|
335
|
+
skeletonize_ignored_records!(indexed_inventory_objects, result)
|
336
|
+
end
|
271
337
|
|
272
338
|
result
|
273
339
|
end
|
@@ -287,7 +353,11 @@ module InventoryRefresh::SaveCollection
|
|
287
353
|
hashes = []
|
288
354
|
create_time = time_now
|
289
355
|
batch.each do |index, inventory_object|
|
290
|
-
hash = if
|
356
|
+
hash = if inventory_collection.use_ar_object?
|
357
|
+
record = inventory_collection.model_class.new(attributes_index[index])
|
358
|
+
values_for_database!(all_attribute_keys,
|
359
|
+
record.attributes.symbolize_keys)
|
360
|
+
elsif serializable_keys?
|
291
361
|
values_for_database!(all_attribute_keys,
|
292
362
|
attributes_index[index])
|
293
363
|
else
|
@@ -309,19 +379,24 @@ module InventoryRefresh::SaveCollection
|
|
309
379
|
build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => :full)
|
310
380
|
)
|
311
381
|
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
382
|
+
if inventory_collection.parallel_safe?
|
383
|
+
# We've done upsert, so records were either created or updated. We can recognize that by checking if
|
384
|
+
# created and updated timestamps are the same
|
385
|
+
created_attr = "created_on" if inventory_collection.supports_column?(:created_on)
|
386
|
+
created_attr ||= "created_at" if inventory_collection.supports_column?(:created_at)
|
387
|
+
updated_attr = "updated_on" if inventory_collection.supports_column?(:updated_on)
|
388
|
+
updated_attr ||= "updated_at" if inventory_collection.supports_column?(:updated_at)
|
389
|
+
|
390
|
+
if created_attr && updated_attr
|
391
|
+
created, updated = result.to_a.partition { |x| x[created_attr] == x[updated_attr] }
|
392
|
+
inventory_collection.store_created_records(created)
|
393
|
+
inventory_collection.store_updated_records(updated)
|
394
|
+
else
|
395
|
+
# The record doesn't have both created and updated attrs, so we'll take all as created
|
396
|
+
inventory_collection.store_created_records(result)
|
397
|
+
end
|
323
398
|
else
|
324
|
-
#
|
399
|
+
# We've done just insert, so all records were created
|
325
400
|
inventory_collection.store_created_records(result)
|
326
401
|
end
|
327
402
|
|
@@ -334,7 +409,9 @@ module InventoryRefresh::SaveCollection
|
|
334
409
|
:on_conflict => on_conflict)
|
335
410
|
end
|
336
411
|
|
337
|
-
|
412
|
+
if inventory_collection.parallel_safe?
|
413
|
+
skeletonize_ignored_records!(indexed_inventory_objects, result, :all_unique_columns => true)
|
414
|
+
end
|
338
415
|
end
|
339
416
|
|
340
417
|
# Stores primary_key values of created records into associated InventoryObject objects.
|
@@ -365,6 +442,7 @@ module InventoryRefresh::SaveCollection
|
|
365
442
|
inventory_object[ref] = attributes[ref]
|
366
443
|
|
367
444
|
next unless (foreign_key = association_to_foreign_key_mapping[ref])
|
445
|
+
|
368
446
|
base_class_name = attributes[association_to_foreign_type_mapping[ref].try(:to_sym)] || association_to_base_class_mapping[ref]
|
369
447
|
id = attributes[foreign_key.to_sym]
|
370
448
|
inventory_object[ref] = InventoryRefresh::ApplicationRecordReference.new(base_class_name, id)
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require "inventory_refresh/save_collection/saver/base"
|
2
|
+
|
3
|
+
module InventoryRefresh::SaveCollection
|
4
|
+
module Saver
|
5
|
+
class Default < InventoryRefresh::SaveCollection::Saver::Base
|
6
|
+
private
|
7
|
+
|
8
|
+
# Updates the passed record with hash data and stores primary key value into inventory_object.
|
9
|
+
#
|
10
|
+
# @param record [ApplicationRecord] record we want to update in DB
|
11
|
+
# @param hash [Hash] data we want to update the record with
|
12
|
+
# @param inventory_object [InventoryRefresh::InventoryObject] InventoryObject instance where we will store primary
|
13
|
+
# key value
|
14
|
+
def update_record!(record, hash, inventory_object)
|
15
|
+
record.assign_attributes(hash.except(:id))
|
16
|
+
if !inventory_collection.check_changed? || record.changed?
|
17
|
+
record.save
|
18
|
+
inventory_collection.store_updated_records(record)
|
19
|
+
end
|
20
|
+
|
21
|
+
inventory_object.id = record.id
|
22
|
+
end
|
23
|
+
|
24
|
+
# Creates a new record in the DB using the passed hash data
|
25
|
+
#
|
26
|
+
# @param hash [Hash] hash with data we want to persist to DB
|
27
|
+
# @param inventory_object [InventoryRefresh::InventoryObject] InventoryObject instance where we will store primary
|
28
|
+
# key value
|
29
|
+
def create_record!(hash, inventory_object)
|
30
|
+
record = inventory_collection.model_class.create!(hash.except(:id))
|
31
|
+
inventory_collection.store_created_records(record)
|
32
|
+
|
33
|
+
inventory_object.id = record.id
|
34
|
+
end
|
35
|
+
|
36
|
+
# Asserts we do not have duplicate records in the DB. If the record is duplicate we will destroy it.
|
37
|
+
#
|
38
|
+
# @param record [ApplicationRecord] record we want to update in DB
|
39
|
+
# @param index [String] manager_uuid of the record
|
40
|
+
# @return [Boolean] false if the record is duplicate
|
41
|
+
def assert_unique_record(record, index)
|
42
|
+
# TODO(lsmola) can go away once we indexed our DB with unique indexes
|
43
|
+
if unique_db_indexes.include?(index) # Include on Set is O(1)
|
44
|
+
# We have a duplicate in the DB, destroy it. A find_each method does automatically .order(:id => :asc)
|
45
|
+
# so we always keep the oldest record in the case of duplicates.
|
46
|
+
logger.warn("A duplicate record was detected and destroyed, inventory_collection: "\
|
47
|
+
"'#{inventory_collection}', record: '#{record}', duplicate_index: '#{index}'")
|
48
|
+
record.destroy
|
49
|
+
return false
|
50
|
+
else
|
51
|
+
unique_db_indexes << index
|
52
|
+
end
|
53
|
+
true
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -59,7 +59,7 @@ module InventoryRefresh::SaveCollection
|
|
59
59
|
skeletal_inventory_objects_index = {}
|
60
60
|
|
61
61
|
inventory_collection.skeletal_primary_index.each_value do |inventory_object|
|
62
|
-
attributes = inventory_object.
|
62
|
+
attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys, inventory_object)
|
63
63
|
index = build_stringified_reference(attributes, unique_index_keys)
|
64
64
|
|
65
65
|
skeletal_attributes_index[index] = attributes
|
@@ -135,6 +135,7 @@ module InventoryRefresh::SaveCollection
|
|
135
135
|
# We need to set correct timestamps_max for this particular attribute, based on what is in timestamps
|
136
136
|
batch.each do |x|
|
137
137
|
next unless x[:__non_serialized_versions][column_name]
|
138
|
+
|
138
139
|
x[comparables_max_name] = x[:__non_serialized_versions][column_name]
|
139
140
|
end
|
140
141
|
end
|
@@ -200,25 +201,7 @@ module InventoryRefresh::SaveCollection
|
|
200
201
|
)
|
201
202
|
end
|
202
203
|
|
203
|
-
def comparable_timestamp(timestamp)
|
204
|
-
# Lets cast all timestamps to to_f, rounding the time comparing precision to miliseconds, that should be
|
205
|
-
# enough, since we are the ones setting the record version in collector. Otherwise we will have hard time with
|
206
|
-
# doing equality, since the value changes going through DB (DB. cuts it at 5 decimal places)
|
207
|
-
|
208
|
-
if timestamp.kind_of?(String)
|
209
|
-
Time.use_zone('UTC') { Time.zone.parse(timestamp) }.to_f.round(3)
|
210
|
-
elsif timestamp.kind_of?(Time)
|
211
|
-
timestamp.in_time_zone('UTC').to_f.round(3)
|
212
|
-
else
|
213
|
-
timestamp
|
214
|
-
end
|
215
|
-
end
|
216
|
-
|
217
204
|
def skeletonize_or_skip_record(record_version, hash_version, record_versions_max, inventory_object)
|
218
|
-
record_version = comparable_timestamp(record_version)
|
219
|
-
record_versions_max = comparable_timestamp(record_versions_max) if record_versions_max
|
220
|
-
hash_version = comparable_timestamp(hash_version)
|
221
|
-
|
222
205
|
# Skip updating this record, because it is old
|
223
206
|
return true if record_version && hash_version && record_version >= hash_version
|
224
207
|
|