inventory_refresh 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.codeclimate.yml +47 -0
- data/.gitignore +13 -0
- data/.rspec +4 -0
- data/.rspec_ci +4 -0
- data/.rubocop.yml +4 -0
- data/.rubocop_cc.yml +5 -0
- data/.rubocop_local.yml +2 -0
- data/.travis.yml +12 -0
- data/.yamllint +12 -0
- data/CHANGELOG.md +0 -0
- data/Gemfile +6 -0
- data/LICENSE +202 -0
- data/README.md +35 -0
- data/Rakefile +47 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/inventory_refresh.gemspec +34 -0
- data/lib/inventory_refresh.rb +11 -0
- data/lib/inventory_refresh/application_record_iterator.rb +56 -0
- data/lib/inventory_refresh/application_record_reference.rb +15 -0
- data/lib/inventory_refresh/graph.rb +157 -0
- data/lib/inventory_refresh/graph/topological_sort.rb +66 -0
- data/lib/inventory_refresh/inventory_collection.rb +1175 -0
- data/lib/inventory_refresh/inventory_collection/data_storage.rb +178 -0
- data/lib/inventory_refresh/inventory_collection/graph.rb +170 -0
- data/lib/inventory_refresh/inventory_collection/index/proxy.rb +230 -0
- data/lib/inventory_refresh/inventory_collection/index/type/base.rb +80 -0
- data/lib/inventory_refresh/inventory_collection/index/type/data.rb +26 -0
- data/lib/inventory_refresh/inventory_collection/index/type/local_db.rb +286 -0
- data/lib/inventory_refresh/inventory_collection/index/type/skeletal.rb +116 -0
- data/lib/inventory_refresh/inventory_collection/reference.rb +96 -0
- data/lib/inventory_refresh/inventory_collection/references_storage.rb +106 -0
- data/lib/inventory_refresh/inventory_collection/scanner.rb +117 -0
- data/lib/inventory_refresh/inventory_collection/serialization.rb +140 -0
- data/lib/inventory_refresh/inventory_object.rb +303 -0
- data/lib/inventory_refresh/inventory_object_lazy.rb +151 -0
- data/lib/inventory_refresh/save_collection/base.rb +38 -0
- data/lib/inventory_refresh/save_collection/recursive.rb +52 -0
- data/lib/inventory_refresh/save_collection/saver/base.rb +390 -0
- data/lib/inventory_refresh/save_collection/saver/batch.rb +17 -0
- data/lib/inventory_refresh/save_collection/saver/concurrent_safe.rb +71 -0
- data/lib/inventory_refresh/save_collection/saver/concurrent_safe_batch.rb +632 -0
- data/lib/inventory_refresh/save_collection/saver/default.rb +57 -0
- data/lib/inventory_refresh/save_collection/saver/sql_helper.rb +85 -0
- data/lib/inventory_refresh/save_collection/saver/sql_helper_update.rb +120 -0
- data/lib/inventory_refresh/save_collection/saver/sql_helper_upsert.rb +196 -0
- data/lib/inventory_refresh/save_collection/topological_sort.rb +38 -0
- data/lib/inventory_refresh/save_inventory.rb +38 -0
- data/lib/inventory_refresh/target.rb +73 -0
- data/lib/inventory_refresh/target_collection.rb +80 -0
- data/lib/inventory_refresh/version.rb +3 -0
- data/tools/ci/create_db_user.sh +3 -0
- metadata +207 -0
@@ -0,0 +1,17 @@
|
|
1
|
+
require "inventory_refresh/save_collection/saver/concurrent_safe_batch"
|
2
|
+
|
3
|
+
module InventoryRefresh::SaveCollection
|
4
|
+
module Saver
|
5
|
+
class Batch < InventoryRefresh::SaveCollection::Saver::ConcurrentSafeBatch
|
6
|
+
private
|
7
|
+
|
8
|
+
# Just returning manager ref transformed to column names, for strategies that do not expect to have unique DB
|
9
|
+
# indexes.
|
10
|
+
#
|
11
|
+
# @return [Array<Symbol>] manager ref transformed to column names
|
12
|
+
def unique_index_columns
|
13
|
+
inventory_collection.manager_ref_to_cols.map(&:to_sym)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require "inventory_refresh/save_collection/saver/base"
|
2
|
+
|
3
|
+
module InventoryRefresh::SaveCollection
|
4
|
+
module Saver
|
5
|
+
class ConcurrentSafe < InventoryRefresh::SaveCollection::Saver::Base
|
6
|
+
# TODO(lsmola) this strategy does not make much sense, it's better to use concurent_safe_batch and make batch size
|
7
|
+
# configurable
|
8
|
+
private
|
9
|
+
|
10
|
+
# Updates the passed record with hash data and stores primary key value into inventory_object.
|
11
|
+
#
|
12
|
+
# @param record [ApplicationRecord] record we want to update in DB
|
13
|
+
# @param hash [Hash] data we want to update the record with
|
14
|
+
# @param inventory_object [InventoryRefresh::InventoryObject] InventoryObject instance where we will store primary
|
15
|
+
# key value
|
16
|
+
def update_record!(record, hash, inventory_object)
|
17
|
+
assign_attributes_for_update!(hash, time_now)
|
18
|
+
record.assign_attributes(hash.except(:id))
|
19
|
+
|
20
|
+
if !inventory_object.inventory_collection.check_changed? || record.changed?
|
21
|
+
update_query = inventory_object.inventory_collection.model_class.where(:id => record.id)
|
22
|
+
if hash[:remote_data_timestamp]
|
23
|
+
timestamp_field = inventory_collection.model_class.arel_table[:remote_data_timestamp]
|
24
|
+
update_query = update_query.where(timestamp_field.lt(hash[:remote_data_timestamp]))
|
25
|
+
end
|
26
|
+
|
27
|
+
update_query.update_all(hash)
|
28
|
+
inventory_collection.store_updated_records(record)
|
29
|
+
end
|
30
|
+
|
31
|
+
inventory_object.id = record.id
|
32
|
+
end
|
33
|
+
|
34
|
+
# Creates a new record in the DB using the passed hash data
|
35
|
+
#
|
36
|
+
# @param hash [Hash] hash with data we want to persist to DB
|
37
|
+
# @param inventory_object [InventoryRefresh::InventoryObject] InventoryObject instance where we will store primary
|
38
|
+
# key value
|
39
|
+
def create_record!(hash, inventory_object)
|
40
|
+
all_attribute_keys = hash.keys
|
41
|
+
data = inventory_collection.model_class.new(hash).attributes.symbolize_keys
|
42
|
+
|
43
|
+
# TODO(lsmola) abstract common behavior into base class
|
44
|
+
all_attribute_keys << :type if supports_sti?
|
45
|
+
all_attribute_keys << :created_at if supports_created_at?
|
46
|
+
all_attribute_keys << :updated_at if supports_updated_at?
|
47
|
+
all_attribute_keys << :created_on if supports_created_on?
|
48
|
+
all_attribute_keys << :updated_on if supports_updated_on?
|
49
|
+
hash_for_creation = if inventory_collection.use_ar_object?
|
50
|
+
record = inventory_collection.model_class.new(data)
|
51
|
+
values_for_database!(all_attribute_keys,
|
52
|
+
record.attributes.symbolize_keys)
|
53
|
+
elsif serializable_keys?
|
54
|
+
values_for_database!(all_attribute_keys,
|
55
|
+
data)
|
56
|
+
else
|
57
|
+
data
|
58
|
+
end
|
59
|
+
|
60
|
+
assign_attributes_for_create!(hash_for_creation, time_now)
|
61
|
+
|
62
|
+
result_id = ActiveRecord::Base.connection.execute(
|
63
|
+
build_insert_query(all_attribute_keys, [hash_for_creation])
|
64
|
+
)
|
65
|
+
|
66
|
+
inventory_object.id = result_id.to_a.try(:first).try(:[], "id")
|
67
|
+
inventory_collection.store_created_records(inventory_object)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,632 @@
|
|
1
|
+
require "inventory_refresh/save_collection/saver/base"
|
2
|
+
require "active_support/core_ext/module/delegation"
|
3
|
+
|
4
|
+
module InventoryRefresh::SaveCollection
|
5
|
+
module Saver
|
6
|
+
class ConcurrentSafeBatch < InventoryRefresh::SaveCollection::Saver::Base
|
7
|
+
private
|
8
|
+
|
9
|
+
delegate :association_to_base_class_mapping,
|
10
|
+
:association_to_foreign_key_mapping,
|
11
|
+
:association_to_foreign_type_mapping,
|
12
|
+
:attribute_references,
|
13
|
+
:to => :inventory_collection
|
14
|
+
|
15
|
+
# Attribute accessor to ApplicationRecord object or Hash
|
16
|
+
#
|
17
|
+
# @param record [Hash, ApplicationRecord] record or hash
|
18
|
+
# @param key [Symbol] key pointing to attribute of the record
|
19
|
+
# @return [Object] value of the record on the key
|
20
|
+
def record_key(record, key)
|
21
|
+
send(record_key_method, record, key)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Attribute accessor to ApplicationRecord object
|
25
|
+
#
|
26
|
+
# @param record [ApplicationRecord] record
|
27
|
+
# @param key [Symbol] key pointing to attribute of the record
|
28
|
+
# @return [Object] value of the record on the key
|
29
|
+
def ar_record_key(record, key)
|
30
|
+
record.public_send(key)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Attribute accessor to Hash object
|
34
|
+
#
|
35
|
+
# @param record [Hash] hash
|
36
|
+
# @param key [Symbol] key pointing to attribute of the record
|
37
|
+
# @return [Object] value of the record on the key
|
38
|
+
def pure_sql_record_key(record, key)
|
39
|
+
record[select_keys_indexes[key]]
|
40
|
+
end
|
41
|
+
|
42
|
+
# Returns iterator or relation based on settings
|
43
|
+
#
|
44
|
+
# @param association [Symbol] An existing association on manager
|
45
|
+
# @return [ActiveRecord::Relation, InventoryRefresh::ApplicationRecordIterator] iterator or relation based on settings
|
46
|
+
def batch_iterator(association)
|
47
|
+
if pure_sql_records_fetching
|
48
|
+
# Building fast iterator doing pure SQL query and therefore avoiding redundant creation of AR objects. The
|
49
|
+
# iterator responds to find_in_batches, so it acts like the AR relation. For targeted refresh, the association
|
50
|
+
# can already be ApplicationRecordIterator, so we will skip that.
|
51
|
+
pure_sql_iterator = lambda do |&block|
|
52
|
+
primary_key_offset = nil
|
53
|
+
loop do
|
54
|
+
relation = association.select(*select_keys)
|
55
|
+
.reorder("#{primary_key} ASC")
|
56
|
+
.limit(batch_size)
|
57
|
+
# Using rails way of comparing primary key instead of offset
|
58
|
+
relation = relation.where(arel_primary_key.gt(primary_key_offset)) if primary_key_offset
|
59
|
+
records = get_connection.query(relation.to_sql)
|
60
|
+
last_record = records.last
|
61
|
+
block.call(records)
|
62
|
+
|
63
|
+
break if records.size < batch_size
|
64
|
+
primary_key_offset = record_key(last_record, primary_key)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
InventoryRefresh::ApplicationRecordIterator.new(:iterator => pure_sql_iterator)
|
69
|
+
else
|
70
|
+
# Normal Rails ActiveRecord::Relation where we can call find_in_batches or
|
71
|
+
# InventoryRefresh::ApplicationRecordIterator passed from targeted refresh
|
72
|
+
association
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Saves the InventoryCollection
|
77
|
+
#
|
78
|
+
# @param association [Symbol] An existing association on manager
|
79
|
+
def save!(association)
|
80
|
+
attributes_index = {}
|
81
|
+
inventory_objects_index = {}
|
82
|
+
all_attribute_keys = Set.new + inventory_collection.batch_extra_attributes
|
83
|
+
|
84
|
+
inventory_collection.each do |inventory_object|
|
85
|
+
attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys)
|
86
|
+
index = build_stringified_reference(attributes, unique_index_keys)
|
87
|
+
|
88
|
+
# Interesting fact: not building attributes_index and using only inventory_objects_index doesn't do much
|
89
|
+
# of a difference, since the most objects inside are shared.
|
90
|
+
attributes_index[index] = attributes
|
91
|
+
inventory_objects_index[index] = inventory_object
|
92
|
+
end
|
93
|
+
|
94
|
+
all_attribute_keys << :created_at if supports_created_at?
|
95
|
+
all_attribute_keys << :updated_at if supports_updated_at?
|
96
|
+
all_attribute_keys << :created_on if supports_created_on?
|
97
|
+
all_attribute_keys << :updated_on if supports_updated_on?
|
98
|
+
all_attribute_keys << :type if supports_sti?
|
99
|
+
|
100
|
+
#_log.debug("Processing #{inventory_collection} of size #{inventory_collection.size}...")
|
101
|
+
|
102
|
+
unless inventory_collection.create_only?
|
103
|
+
update_or_destroy_records!(batch_iterator(association), inventory_objects_index, attributes_index, all_attribute_keys)
|
104
|
+
end
|
105
|
+
|
106
|
+
unless inventory_collection.create_only?
|
107
|
+
inventory_collection.custom_reconnect_block&.call(inventory_collection, inventory_objects_index, attributes_index)
|
108
|
+
end
|
109
|
+
|
110
|
+
# Records that were not found in the DB but sent for saving, we will be creating these in the DB.
|
111
|
+
if inventory_collection.create_allowed?
|
112
|
+
on_conflict = inventory_collection.parallel_safe? ? :do_update : nil
|
113
|
+
|
114
|
+
inventory_objects_index.each_slice(batch_size_for_persisting) do |batch|
|
115
|
+
create_records!(all_attribute_keys, batch, attributes_index, :on_conflict => on_conflict)
|
116
|
+
end
|
117
|
+
|
118
|
+
# Let the GC clean this up
|
119
|
+
inventory_objects_index = nil
|
120
|
+
attributes_index = nil
|
121
|
+
|
122
|
+
if inventory_collection.parallel_safe?
|
123
|
+
create_or_update_partial_records(all_attribute_keys)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
#_log.debug("Processing #{inventory_collection}, "\
|
127
|
+
# "created=#{inventory_collection.created_records.count}, "\
|
128
|
+
# "updated=#{inventory_collection.updated_records.count}, "\
|
129
|
+
# "deleted=#{inventory_collection.deleted_records.count}...Complete")
|
130
|
+
rescue => e
|
131
|
+
#_log.error("Error when saving #{inventory_collection} with #{inventory_collection_details}. Message: #{e.message}")
|
132
|
+
raise e
|
133
|
+
end
|
134
|
+
|
135
|
+
# Batch updates existing records that are in the DB using attributes_index. And delete the ones that were not
|
136
|
+
# present in inventory_objects_index.
|
137
|
+
#
|
138
|
+
# @param records_batch_iterator [ActiveRecord::Relation, InventoryRefresh::ApplicationRecordIterator] iterator or
|
139
|
+
# relation, both responding to :find_in_batches method
|
140
|
+
# @param inventory_objects_index [Hash{String => InventoryRefresh::InventoryObject}] Hash of InventoryObject objects
|
141
|
+
# @param attributes_index [Hash{String => Hash}] Hash of data hashes with only keys that are column names of the
|
142
|
+
# models's table
|
143
|
+
# @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
|
144
|
+
def update_or_destroy_records!(records_batch_iterator, inventory_objects_index, attributes_index, all_attribute_keys)
|
145
|
+
hashes_for_update = []
|
146
|
+
records_for_destroy = []
|
147
|
+
indexed_inventory_objects = {}
|
148
|
+
|
149
|
+
records_batch_iterator.find_in_batches(:batch_size => batch_size) do |batch|
|
150
|
+
update_time = time_now
|
151
|
+
|
152
|
+
batch.each do |record|
|
153
|
+
primary_key_value = record_key(record, primary_key)
|
154
|
+
|
155
|
+
next unless assert_distinct_relation(primary_key_value)
|
156
|
+
|
157
|
+
index = db_columns_index(record)
|
158
|
+
|
159
|
+
inventory_object = inventory_objects_index.delete(index)
|
160
|
+
hash = attributes_index.delete(index)
|
161
|
+
|
162
|
+
if inventory_object.nil?
|
163
|
+
# Record was found in the DB but not sent for saving, that means it doesn't exist anymore and we should
|
164
|
+
# delete it from the DB.
|
165
|
+
if inventory_collection.delete_allowed?
|
166
|
+
records_for_destroy << record
|
167
|
+
end
|
168
|
+
else
|
169
|
+
# Record was found in the DB and sent for saving, we will be updating the DB.
|
170
|
+
next unless assert_referential_integrity(hash)
|
171
|
+
inventory_object.id = primary_key_value
|
172
|
+
|
173
|
+
if inventory_collection.parallel_safe? &&
|
174
|
+
(supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys))
|
175
|
+
|
176
|
+
version_attr, max_version_attr = if supports_remote_data_timestamp?(all_attribute_keys)
|
177
|
+
[:resource_timestamp, :resource_timestamps_max]
|
178
|
+
elsif supports_remote_data_version?(all_attribute_keys)
|
179
|
+
[:resource_version, :resource_versions_max]
|
180
|
+
end
|
181
|
+
|
182
|
+
next if skeletonize_or_skip_record(record.try(version_attr) || record.try(:[], version_attr),
|
183
|
+
hash[version_attr],
|
184
|
+
record.try(max_version_attr) || record.try(:[], max_version_attr),
|
185
|
+
inventory_object)
|
186
|
+
end
|
187
|
+
|
188
|
+
hash_for_update = if inventory_collection.use_ar_object?
|
189
|
+
record.assign_attributes(hash.except(:id))
|
190
|
+
values_for_database!(all_attribute_keys,
|
191
|
+
record.attributes.symbolize_keys)
|
192
|
+
elsif serializable_keys?
|
193
|
+
# TODO(lsmola) hash data with current DB data to allow subset of data being sent,
|
194
|
+
# otherwise we would nullify the not sent attributes. Test e.g. on disks in cloud
|
195
|
+
values_for_database!(all_attribute_keys,
|
196
|
+
hash)
|
197
|
+
else
|
198
|
+
# TODO(lsmola) hash data with current DB data to allow subset of data being sent,
|
199
|
+
# otherwise we would nullify the not sent attributes. Test e.g. on disks in cloud
|
200
|
+
hash
|
201
|
+
end
|
202
|
+
assign_attributes_for_update!(hash_for_update, update_time)
|
203
|
+
|
204
|
+
hash_for_update[:id] = primary_key_value
|
205
|
+
indexed_inventory_objects[index] = inventory_object
|
206
|
+
hashes_for_update << hash_for_update
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
# Update in batches
|
211
|
+
if hashes_for_update.size >= batch_size_for_persisting
|
212
|
+
update_records!(all_attribute_keys, hashes_for_update, indexed_inventory_objects)
|
213
|
+
|
214
|
+
hashes_for_update = []
|
215
|
+
indexed_inventory_objects = {}
|
216
|
+
end
|
217
|
+
|
218
|
+
# Destroy in batches
|
219
|
+
if records_for_destroy.size >= batch_size_for_persisting
|
220
|
+
destroy_records!(records_for_destroy)
|
221
|
+
records_for_destroy = []
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
# Update the last batch
|
226
|
+
update_records!(all_attribute_keys, hashes_for_update, indexed_inventory_objects)
|
227
|
+
hashes_for_update = [] # Cleanup so GC can release it sooner
|
228
|
+
|
229
|
+
# Destroy the last batch
|
230
|
+
destroy_records!(records_for_destroy)
|
231
|
+
records_for_destroy = [] # Cleanup so GC can release it sooner
|
232
|
+
end
|
233
|
+
|
234
|
+
def db_columns_index(record, pure_sql: false)
|
235
|
+
# Incoming values are in SQL string form.
|
236
|
+
# TODO(lsmola) unify this behavior with object_index_with_keys method in InventoryCollection
|
237
|
+
# TODO(lsmola) maybe we can drop the whole pure sql fetching, since everything will be targeted refresh
|
238
|
+
# with streaming refresh? Maybe just metrics and events will not be, but those should be upsert only
|
239
|
+
# TODO(lsmola) taking ^ in account, we can't drop pure sql, since that is returned by batch insert and
|
240
|
+
# update queries
|
241
|
+
unique_index_keys_to_s.map do |attribute|
|
242
|
+
value = if pure_sql
|
243
|
+
record[attribute]
|
244
|
+
else
|
245
|
+
record_key(record, attribute)
|
246
|
+
end
|
247
|
+
|
248
|
+
format_value(attribute, value)
|
249
|
+
end.join("__")
|
250
|
+
end
|
251
|
+
|
252
|
+
def format_value(attribute, value)
|
253
|
+
if attribute == "timestamp"
|
254
|
+
# TODO: can this be covered by @deserializable_keys?
|
255
|
+
type = model_class.type_for_attribute(attribute)
|
256
|
+
type.cast(value).utc.iso8601.to_s
|
257
|
+
elsif (type = deserializable_keys[attribute.to_sym])
|
258
|
+
type.deserialize(value).to_s
|
259
|
+
else
|
260
|
+
value.to_s
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
# Deletes or sof-deletes records. If the model_class supports a custom class delete method, we will use it for
|
265
|
+
# batch soft-delete.
|
266
|
+
#
|
267
|
+
# @param records [Array<ApplicationRecord, Hash>] Records we want to delete. If we have only hashes, we need to
|
268
|
+
# to fetch ApplicationRecord objects from the DB
|
269
|
+
def destroy_records!(records)
|
270
|
+
return false unless inventory_collection.delete_allowed?
|
271
|
+
return if records.blank?
|
272
|
+
|
273
|
+
# Is the delete_method rails standard deleting method?
|
274
|
+
rails_delete = %i(destroy delete).include?(inventory_collection.delete_method)
|
275
|
+
if !rails_delete && inventory_collection.model_class.respond_to?(inventory_collection.delete_method)
|
276
|
+
# We have custom delete method defined on a class, that means it supports batch destroy
|
277
|
+
inventory_collection.store_deleted_records(records.map { |x| {:id => record_key(x, primary_key)} })
|
278
|
+
inventory_collection.model_class.public_send(inventory_collection.delete_method, records.map { |x| record_key(x, primary_key) })
|
279
|
+
else
|
280
|
+
# We have either standard :destroy and :delete rails method, or custom instance level delete method
|
281
|
+
# Note: The standard :destroy and :delete rails method can't be batched because of the hooks and cascade destroy
|
282
|
+
ActiveRecord::Base.transaction do
|
283
|
+
if pure_sql_records_fetching
|
284
|
+
# For pure SQL fetching, we need to get the AR objects again, so we can call destroy
|
285
|
+
inventory_collection.model_class.where(:id => records.map { |x| record_key(x, primary_key) }).find_each do |record|
|
286
|
+
delete_record!(record)
|
287
|
+
end
|
288
|
+
else
|
289
|
+
records.each do |record|
|
290
|
+
delete_record!(record)
|
291
|
+
end
|
292
|
+
end
|
293
|
+
end
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
# Batch updates existing records
|
298
|
+
#
|
299
|
+
# @param hashes [Array<Hash>] data used for building a batch update sql query
|
300
|
+
# @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
|
301
|
+
def update_records!(all_attribute_keys, hashes, indexed_inventory_objects)
|
302
|
+
return if hashes.blank?
|
303
|
+
|
304
|
+
unless inventory_collection.parallel_safe?
|
305
|
+
# We need to update the stored records before we save it, since hashes are modified
|
306
|
+
inventory_collection.store_updated_records(hashes)
|
307
|
+
end
|
308
|
+
|
309
|
+
query = build_update_query(all_attribute_keys, hashes)
|
310
|
+
result = get_connection.execute(query)
|
311
|
+
|
312
|
+
if inventory_collection.parallel_safe?
|
313
|
+
# We will check for timestamp clashes of full row update and we will fallback to skeletal update
|
314
|
+
inventory_collection.store_updated_records(result)
|
315
|
+
|
316
|
+
skeletonize_ignored_records!(indexed_inventory_objects, result)
|
317
|
+
end
|
318
|
+
|
319
|
+
result
|
320
|
+
end
|
321
|
+
|
322
|
+
# Taking result from update or upsert of the row. The records that were not saved will be turned into skeletal
|
323
|
+
# records and we will save them attribute by attribute.
|
324
|
+
#
|
325
|
+
# @param hash [Hash{String => InventoryObject}>] Hash with indexed data we want to save
|
326
|
+
# @param result [Array<Hash>] Result from the DB containing the data that were actually saved
|
327
|
+
# @param all_unique_columns [Boolean] True if index is consisted from all columns of the unique index. False if
|
328
|
+
# index is just made from manager_ref turned in DB column names.
|
329
|
+
def skeletonize_ignored_records!(hash, result, all_unique_columns: false)
|
330
|
+
updated = if all_unique_columns
|
331
|
+
result.map { |x| unique_index_columns_to_s.map { |key| x[key] } }
|
332
|
+
else
|
333
|
+
result.map { |x| db_columns_index(x, :pure_sql => true) }
|
334
|
+
end
|
335
|
+
|
336
|
+
updated.each { |x| hash.delete(x) }
|
337
|
+
|
338
|
+
# Now lets skeletonize all inventory_objects that were not saved by update or upsert. Old rows that can't be
|
339
|
+
# saved are not being sent here. We have only rows that are new, but become old as we send the query (so other
|
340
|
+
# parallel process saved the data in the meantime). Or if some attributes are newer than the whole row
|
341
|
+
# being sent.
|
342
|
+
hash.each_key do |db_index|
|
343
|
+
inventory_collection.skeletal_primary_index.skeletonize_primary_index(hash[db_index].manager_uuid)
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
# Saves partial records using upsert, taking records from skeletal_primary_index. This is used both for
|
348
|
+
# skeletal precreate as well as for saving partial rows.
|
349
|
+
#
|
350
|
+
# @param all_attribute_keys [Set] Superset of all keys of all records being saved
|
351
|
+
def create_or_update_partial_records(all_attribute_keys)
|
352
|
+
skeletal_attributes_index = {}
|
353
|
+
skeletal_inventory_objects_index = {}
|
354
|
+
|
355
|
+
inventory_collection.skeletal_primary_index.each_value do |inventory_object|
|
356
|
+
attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys)
|
357
|
+
index = build_stringified_reference(attributes, unique_index_keys)
|
358
|
+
|
359
|
+
skeletal_attributes_index[index] = attributes
|
360
|
+
skeletal_inventory_objects_index[index] = inventory_object
|
361
|
+
end
|
362
|
+
|
363
|
+
if supports_remote_data_timestamp?(all_attribute_keys)
|
364
|
+
all_attribute_keys << :resource_timestamps
|
365
|
+
all_attribute_keys << :resource_timestamps_max
|
366
|
+
elsif supports_remote_data_version?(all_attribute_keys)
|
367
|
+
all_attribute_keys << :resource_versions
|
368
|
+
all_attribute_keys << :resource_versions_max
|
369
|
+
end
|
370
|
+
|
371
|
+
indexed_inventory_objects = {}
|
372
|
+
hashes = []
|
373
|
+
create_time = time_now
|
374
|
+
|
375
|
+
skeletal_inventory_objects_index.each do |index, inventory_object|
|
376
|
+
hash = skeletal_attributes_index.delete(index)
|
377
|
+
# Partial create or update must never set a timestamp for the whole row
|
378
|
+
timestamps = if supports_remote_data_timestamp?(all_attribute_keys) && supports_resource_timestamps_max?
|
379
|
+
assign_partial_row_version_attributes!(:resource_timestamp,
|
380
|
+
:resource_timestamps,
|
381
|
+
:resource_timestamps_max,
|
382
|
+
hash,
|
383
|
+
all_attribute_keys)
|
384
|
+
elsif supports_remote_data_version?(all_attribute_keys) && supports_resource_versions_max?
|
385
|
+
assign_partial_row_version_attributes!(:resource_version,
|
386
|
+
:resource_versions,
|
387
|
+
:resource_versions_max,
|
388
|
+
hash,
|
389
|
+
all_attribute_keys)
|
390
|
+
end
|
391
|
+
# Transform hash to DB format
|
392
|
+
hash = transform_to_hash!(all_attribute_keys, hash)
|
393
|
+
|
394
|
+
assign_attributes_for_create!(hash, create_time)
|
395
|
+
|
396
|
+
next unless assert_referential_integrity(hash)
|
397
|
+
|
398
|
+
hash[:__non_serialized_versions] = timestamps # store non serialized timestamps for the partial updates
|
399
|
+
hashes << hash
|
400
|
+
# Index on Unique Columns values, so we can easily fill in the :id later
|
401
|
+
indexed_inventory_objects[unique_index_columns.map { |x| hash[x] }] = inventory_object
|
402
|
+
end
|
403
|
+
|
404
|
+
return if hashes.blank?
|
405
|
+
|
406
|
+
# First, lets try to create all partial records
|
407
|
+
hashes.each_slice(batch_size_for_persisting) do |batch|
|
408
|
+
result = create_partial!(all_attribute_keys,
|
409
|
+
batch,
|
410
|
+
:on_conflict => :do_nothing)
|
411
|
+
inventory_collection.store_created_records(result)
|
412
|
+
end
|
413
|
+
|
414
|
+
# We need only skeletal records with timestamp. We can't save the ones without timestamp, because e.g. skeletal
|
415
|
+
# precreate would be updating records with default values, that are not correct.
|
416
|
+
pre_filtered = hashes.select { |x| x[:resource_timestamps_max] || x[:resource_versions_max] }
|
417
|
+
|
418
|
+
results = {}
|
419
|
+
# TODO(lsmola) we don't need to process rows that were save by the create -> oncoflict do nothing
|
420
|
+
(all_attribute_keys - inventory_collection.base_columns).each do |column_name|
|
421
|
+
filtered = pre_filtered.select { |x| x.key?(column_name) }
|
422
|
+
|
423
|
+
filtered.each_slice(batch_size_for_persisting) do |batch|
|
424
|
+
# We need to set correct timestamps_max for this particular attribute, based on what is in timestamps
|
425
|
+
if supports_remote_data_timestamp?(all_attribute_keys)
|
426
|
+
batch.each { |x| x[:resource_timestamps_max] = x[:__non_serialized_versions][column_name] if x[:__non_serialized_versions][column_name] }
|
427
|
+
elsif supports_remote_data_version?(all_attribute_keys)
|
428
|
+
batch.each { |x| x[:resource_versions_max] = x[:__non_serialized_versions][column_name] if x[:__non_serialized_versions][column_name] }
|
429
|
+
end
|
430
|
+
|
431
|
+
result = create_partial!(inventory_collection.base_columns + [column_name],
|
432
|
+
batch,
|
433
|
+
:on_conflict => :do_update,
|
434
|
+
:column_name => column_name)
|
435
|
+
result.each do |res|
|
436
|
+
results[res["id"]] = res
|
437
|
+
end
|
438
|
+
end
|
439
|
+
end
|
440
|
+
|
441
|
+
inventory_collection.store_updated_records(results.values)
|
442
|
+
|
443
|
+
# TODO(lsmola) we need to move here the hash loading ar object etc. otherwise the lazy_find with key will not
|
444
|
+
# be correct
|
445
|
+
if inventory_collection.dependees.present?
|
446
|
+
# We need to get primary keys of the created objects, but only if there are dependees that would use them
|
447
|
+
map_ids_to_inventory_objects(indexed_inventory_objects,
|
448
|
+
all_attribute_keys,
|
449
|
+
hashes,
|
450
|
+
nil,
|
451
|
+
:on_conflict => :do_nothing)
|
452
|
+
end
|
453
|
+
end
|
454
|
+
|
455
|
+
# Batch upserts 1 data column of the row, plus the internal columns
|
456
|
+
#
|
457
|
+
# @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
|
458
|
+
# @param hashes [Array<InventoryRefresh::InventoryObject>] Array of InventoryObject objects we will be inserting
|
459
|
+
# into the DB
|
460
|
+
# @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values
|
461
|
+
# are :do_update, :do_nothing, nil
|
462
|
+
# @param column_name [Symbol] Name of the data column we will be upserting
|
463
|
+
def create_partial!(all_attribute_keys, hashes, on_conflict: nil, column_name: nil)
|
464
|
+
get_connection.execute(
|
465
|
+
build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => :partial, :column_name => column_name)
|
466
|
+
)
|
467
|
+
end
|
468
|
+
|
469
|
+
# Batch inserts records using attributes_index data. With on_conflict option using :do_update, this method
|
470
|
+
# does atomic upsert.
|
471
|
+
#
|
472
|
+
# @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
|
473
|
+
# @param batch [Array<InventoryRefresh::InventoryObject>] Array of InventoryObject object we will be inserting into
|
474
|
+
# the DB
|
475
|
+
# @param attributes_index [Hash{String => Hash}] Hash of data hashes with only keys that are column names of the
|
476
|
+
# models's table
|
477
|
+
# @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values
|
478
|
+
# are :do_update, :do_nothing, nil
|
479
|
+
def create_records!(all_attribute_keys, batch, attributes_index, on_conflict: nil)
|
480
|
+
indexed_inventory_objects = {}
|
481
|
+
hashes = []
|
482
|
+
create_time = time_now
|
483
|
+
batch.each do |index, inventory_object|
|
484
|
+
hash = if inventory_collection.use_ar_object?
|
485
|
+
record = inventory_collection.model_class.new(attributes_index[index])
|
486
|
+
values_for_database!(all_attribute_keys,
|
487
|
+
record.attributes.symbolize_keys)
|
488
|
+
elsif serializable_keys?
|
489
|
+
values_for_database!(all_attribute_keys,
|
490
|
+
attributes_index[index])
|
491
|
+
else
|
492
|
+
attributes_index[index]
|
493
|
+
end
|
494
|
+
|
495
|
+
assign_attributes_for_create!(hash, create_time)
|
496
|
+
|
497
|
+
next unless assert_referential_integrity(hash)
|
498
|
+
|
499
|
+
hashes << hash
|
500
|
+
# Index on Unique Columns values, so we can easily fill in the :id later
|
501
|
+
indexed_inventory_objects[unique_index_columns.map { |x| hash[x] }] = inventory_object
|
502
|
+
end
|
503
|
+
|
504
|
+
return if hashes.blank?
|
505
|
+
|
506
|
+
result = get_connection.execute(
|
507
|
+
build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => :full)
|
508
|
+
)
|
509
|
+
|
510
|
+
if inventory_collection.parallel_safe?
|
511
|
+
# We've done upsert, so records were either created or updated. We can recognize that by checking if
|
512
|
+
# created and updated timestamps are the same
|
513
|
+
created_attr = "created_on" if inventory_collection.supports_created_on?
|
514
|
+
created_attr ||= "created_at" if inventory_collection.supports_created_at?
|
515
|
+
updated_attr = "updated_on" if inventory_collection.supports_updated_on?
|
516
|
+
updated_attr ||= "updated_at" if inventory_collection.supports_updated_at?
|
517
|
+
|
518
|
+
if created_attr && updated_attr
|
519
|
+
created, updated = result.to_a.partition { |x| x[created_attr] == x[updated_attr] }
|
520
|
+
inventory_collection.store_created_records(created)
|
521
|
+
inventory_collection.store_updated_records(updated)
|
522
|
+
else
|
523
|
+
# The record doesn't have both created and updated attrs, so we'll take all as created
|
524
|
+
inventory_collection.store_created_records(result)
|
525
|
+
end
|
526
|
+
else
|
527
|
+
# We've done just insert, so all records were created
|
528
|
+
inventory_collection.store_created_records(result)
|
529
|
+
end
|
530
|
+
|
531
|
+
if inventory_collection.dependees.present?
|
532
|
+
# We need to get primary keys of the created objects, but only if there are dependees that would use them
|
533
|
+
map_ids_to_inventory_objects(indexed_inventory_objects,
|
534
|
+
all_attribute_keys,
|
535
|
+
hashes,
|
536
|
+
result,
|
537
|
+
:on_conflict => on_conflict)
|
538
|
+
end
|
539
|
+
|
540
|
+
if inventory_collection.parallel_safe?
|
541
|
+
skeletonize_ignored_records!(indexed_inventory_objects, result, :all_unique_columns => true)
|
542
|
+
end
|
543
|
+
end
|
544
|
+
|
545
|
+
# Stores primary_key values of created records into associated InventoryObject objects.
|
546
|
+
#
|
547
|
+
# @param indexed_inventory_objects [Hash{String => InventoryRefresh::InventoryObject}] inventory objects indexed
|
548
|
+
# by stringified value made from db_columns
|
549
|
+
# @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
|
550
|
+
# @param hashes [Array<Hashes>] Array of hashes that were used for inserting of the data
|
551
|
+
# @param result [Array<Hashes>] Array of hashes that are a result of the batch insert query, each result
|
552
|
+
# contains a primary key_value plus all columns that are a part of the unique index
|
553
|
+
# @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values
|
554
|
+
# are :do_update, :do_nothing, nil
|
555
|
+
def map_ids_to_inventory_objects(indexed_inventory_objects, all_attribute_keys, hashes, result, on_conflict:)
|
556
|
+
if on_conflict == :do_nothing
|
557
|
+
# TODO(lsmola) is the comment below still accurate? We will update some partial rows, the actual skeletal
|
558
|
+
# precreate will still do nothing.
|
559
|
+
# For ON CONFLICT DO NOTHING, we need to always fetch the records plus the attribute_references. This path
|
560
|
+
# applies only for skeletal precreate.
|
561
|
+
inventory_collection.model_class.where(
|
562
|
+
build_multi_selection_query(hashes)
|
563
|
+
).select(unique_index_columns + [:id] + attribute_references.to_a).each do |record|
|
564
|
+
key = unique_index_columns.map { |x| record.public_send(x) }
|
565
|
+
inventory_object = indexed_inventory_objects[key]
|
566
|
+
|
567
|
+
# Load also attribute_references, so lazy_find with :key pointing to skeletal reference works
|
568
|
+
attributes = record.attributes.symbolize_keys
|
569
|
+
attribute_references.each do |ref|
|
570
|
+
inventory_object[ref] = attributes[ref]
|
571
|
+
|
572
|
+
next unless (foreign_key = association_to_foreign_key_mapping[ref])
|
573
|
+
base_class_name = attributes[association_to_foreign_type_mapping[ref].try(:to_sym)] || association_to_base_class_mapping[ref]
|
574
|
+
id = attributes[foreign_key.to_sym]
|
575
|
+
inventory_object[ref] = InventoryRefresh::ApplicationRecordReference.new(base_class_name, id)
|
576
|
+
end
|
577
|
+
|
578
|
+
inventory_object.id = record.id if inventory_object
|
579
|
+
end
|
580
|
+
elsif !supports_remote_data_timestamp?(all_attribute_keys) || result.count == batch_size_for_persisting
|
581
|
+
# We can use the insert query result to fetch all primary_key values, which makes this the most effective
|
582
|
+
# path.
|
583
|
+
result.each do |inserted_record|
|
584
|
+
key = unique_index_columns.map do |x|
|
585
|
+
value = inserted_record[x.to_s]
|
586
|
+
type = deserializable_keys[x]
|
587
|
+
type ? type.deserialize(value) : value
|
588
|
+
end
|
589
|
+
inventory_object = indexed_inventory_objects[key]
|
590
|
+
inventory_object.id = inserted_record[primary_key] if inventory_object
|
591
|
+
end
|
592
|
+
else
|
593
|
+
# The remote_data_timestamp is adding a WHERE condition to ON CONFLICT UPDATE. As a result, the RETURNING
|
594
|
+
# clause is not guaranteed to return all ids of the inserted/updated records in the result. In that case
|
595
|
+
# we test if the number of results matches the expected batch size. Then if the counts do not match, the only
|
596
|
+
# safe option is to query all the data from the DB, using the unique_indexes. The batch size will also not match
|
597
|
+
# for every remainders(a last batch in a stream of batches)
|
598
|
+
inventory_collection.model_class.where(
|
599
|
+
build_multi_selection_query(hashes)
|
600
|
+
).select(unique_index_columns + [:id]).each do |inserted_record|
|
601
|
+
key = unique_index_columns.map { |x| inserted_record.public_send(x) }
|
602
|
+
inventory_object = indexed_inventory_objects[key]
|
603
|
+
inventory_object.id = inserted_record.id if inventory_object
|
604
|
+
end
|
605
|
+
end
|
606
|
+
end
|
607
|
+
|
608
|
+
def skeletonize_or_skip_record(record_version, hash_version, record_versions_max, inventory_object)
|
609
|
+
# Skip updating this record, because it is old
|
610
|
+
return true if record_version && hash_version && record_version >= hash_version
|
611
|
+
|
612
|
+
# Some column has bigger version than the whole row, we need to store the row partially
|
613
|
+
if record_versions_max && hash_version && record_versions_max > hash_version
|
614
|
+
inventory_collection.skeletal_primary_index.skeletonize_primary_index(inventory_object.manager_uuid)
|
615
|
+
return true
|
616
|
+
end
|
617
|
+
|
618
|
+
false
|
619
|
+
end
|
620
|
+
|
621
|
+
def assign_partial_row_version_attributes!(full_row_version_attr, partial_row_version_attr,
|
622
|
+
partial_row_version_attr_max, hash, all_attribute_keys)
|
623
|
+
hash[partial_row_version_attr_max] = hash.delete(full_row_version_attr)
|
624
|
+
|
625
|
+
if hash[partial_row_version_attr].present?
|
626
|
+
# Lets clean to only what we save, since when we build the skeletal object, we can set more
|
627
|
+
hash[partial_row_version_attr] = hash[partial_row_version_attr].slice(*all_attribute_keys)
|
628
|
+
end
|
629
|
+
end
|
630
|
+
end
|
631
|
+
end
|
632
|
+
end
|