inventory_refresh 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.codeclimate.yml +47 -0
- data/.gitignore +13 -0
- data/.rspec +4 -0
- data/.rspec_ci +4 -0
- data/.rubocop.yml +4 -0
- data/.rubocop_cc.yml +5 -0
- data/.rubocop_local.yml +2 -0
- data/.travis.yml +12 -0
- data/.yamllint +12 -0
- data/CHANGELOG.md +0 -0
- data/Gemfile +6 -0
- data/LICENSE +202 -0
- data/README.md +35 -0
- data/Rakefile +47 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/inventory_refresh.gemspec +34 -0
- data/lib/inventory_refresh.rb +11 -0
- data/lib/inventory_refresh/application_record_iterator.rb +56 -0
- data/lib/inventory_refresh/application_record_reference.rb +15 -0
- data/lib/inventory_refresh/graph.rb +157 -0
- data/lib/inventory_refresh/graph/topological_sort.rb +66 -0
- data/lib/inventory_refresh/inventory_collection.rb +1175 -0
- data/lib/inventory_refresh/inventory_collection/data_storage.rb +178 -0
- data/lib/inventory_refresh/inventory_collection/graph.rb +170 -0
- data/lib/inventory_refresh/inventory_collection/index/proxy.rb +230 -0
- data/lib/inventory_refresh/inventory_collection/index/type/base.rb +80 -0
- data/lib/inventory_refresh/inventory_collection/index/type/data.rb +26 -0
- data/lib/inventory_refresh/inventory_collection/index/type/local_db.rb +286 -0
- data/lib/inventory_refresh/inventory_collection/index/type/skeletal.rb +116 -0
- data/lib/inventory_refresh/inventory_collection/reference.rb +96 -0
- data/lib/inventory_refresh/inventory_collection/references_storage.rb +106 -0
- data/lib/inventory_refresh/inventory_collection/scanner.rb +117 -0
- data/lib/inventory_refresh/inventory_collection/serialization.rb +140 -0
- data/lib/inventory_refresh/inventory_object.rb +303 -0
- data/lib/inventory_refresh/inventory_object_lazy.rb +151 -0
- data/lib/inventory_refresh/save_collection/base.rb +38 -0
- data/lib/inventory_refresh/save_collection/recursive.rb +52 -0
- data/lib/inventory_refresh/save_collection/saver/base.rb +390 -0
- data/lib/inventory_refresh/save_collection/saver/batch.rb +17 -0
- data/lib/inventory_refresh/save_collection/saver/concurrent_safe.rb +71 -0
- data/lib/inventory_refresh/save_collection/saver/concurrent_safe_batch.rb +632 -0
- data/lib/inventory_refresh/save_collection/saver/default.rb +57 -0
- data/lib/inventory_refresh/save_collection/saver/sql_helper.rb +85 -0
- data/lib/inventory_refresh/save_collection/saver/sql_helper_update.rb +120 -0
- data/lib/inventory_refresh/save_collection/saver/sql_helper_upsert.rb +196 -0
- data/lib/inventory_refresh/save_collection/topological_sort.rb +38 -0
- data/lib/inventory_refresh/save_inventory.rb +38 -0
- data/lib/inventory_refresh/target.rb +73 -0
- data/lib/inventory_refresh/target_collection.rb +80 -0
- data/lib/inventory_refresh/version.rb +3 -0
- data/tools/ci/create_db_user.sh +3 -0
- metadata +207 -0
@@ -0,0 +1,17 @@
|
|
1
|
+
require "inventory_refresh/save_collection/saver/concurrent_safe_batch"
|
2
|
+
|
3
|
+
module InventoryRefresh::SaveCollection
|
4
|
+
module Saver
|
5
|
+
class Batch < InventoryRefresh::SaveCollection::Saver::ConcurrentSafeBatch
|
6
|
+
private
|
7
|
+
|
8
|
+
# Just returning manager ref transformed to column names, for strategies that do not expect to have unique DB
|
9
|
+
# indexes.
|
10
|
+
#
|
11
|
+
# @return [Array<Symbol>] manager ref transformed to column names
|
12
|
+
def unique_index_columns
|
13
|
+
inventory_collection.manager_ref_to_cols.map(&:to_sym)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require "inventory_refresh/save_collection/saver/base"
|
2
|
+
|
3
|
+
module InventoryRefresh::SaveCollection
|
4
|
+
module Saver
|
5
|
+
class ConcurrentSafe < InventoryRefresh::SaveCollection::Saver::Base
|
6
|
+
# TODO(lsmola) this strategy does not make much sense, it's better to use concurent_safe_batch and make batch size
|
7
|
+
# configurable
|
8
|
+
private
|
9
|
+
|
10
|
+
# Updates the passed record with hash data and stores primary key value into inventory_object.
|
11
|
+
#
|
12
|
+
# @param record [ApplicationRecord] record we want to update in DB
|
13
|
+
# @param hash [Hash] data we want to update the record with
|
14
|
+
# @param inventory_object [InventoryRefresh::InventoryObject] InventoryObject instance where we will store primary
|
15
|
+
# key value
|
16
|
+
def update_record!(record, hash, inventory_object)
|
17
|
+
assign_attributes_for_update!(hash, time_now)
|
18
|
+
record.assign_attributes(hash.except(:id))
|
19
|
+
|
20
|
+
if !inventory_object.inventory_collection.check_changed? || record.changed?
|
21
|
+
update_query = inventory_object.inventory_collection.model_class.where(:id => record.id)
|
22
|
+
if hash[:remote_data_timestamp]
|
23
|
+
timestamp_field = inventory_collection.model_class.arel_table[:remote_data_timestamp]
|
24
|
+
update_query = update_query.where(timestamp_field.lt(hash[:remote_data_timestamp]))
|
25
|
+
end
|
26
|
+
|
27
|
+
update_query.update_all(hash)
|
28
|
+
inventory_collection.store_updated_records(record)
|
29
|
+
end
|
30
|
+
|
31
|
+
inventory_object.id = record.id
|
32
|
+
end
|
33
|
+
|
34
|
+
# Creates a new record in the DB using the passed hash data
|
35
|
+
#
|
36
|
+
# @param hash [Hash] hash with data we want to persist to DB
|
37
|
+
# @param inventory_object [InventoryRefresh::InventoryObject] InventoryObject instance where we will store primary
|
38
|
+
# key value
|
39
|
+
def create_record!(hash, inventory_object)
|
40
|
+
all_attribute_keys = hash.keys
|
41
|
+
data = inventory_collection.model_class.new(hash).attributes.symbolize_keys
|
42
|
+
|
43
|
+
# TODO(lsmola) abstract common behavior into base class
|
44
|
+
all_attribute_keys << :type if supports_sti?
|
45
|
+
all_attribute_keys << :created_at if supports_created_at?
|
46
|
+
all_attribute_keys << :updated_at if supports_updated_at?
|
47
|
+
all_attribute_keys << :created_on if supports_created_on?
|
48
|
+
all_attribute_keys << :updated_on if supports_updated_on?
|
49
|
+
hash_for_creation = if inventory_collection.use_ar_object?
|
50
|
+
record = inventory_collection.model_class.new(data)
|
51
|
+
values_for_database!(all_attribute_keys,
|
52
|
+
record.attributes.symbolize_keys)
|
53
|
+
elsif serializable_keys?
|
54
|
+
values_for_database!(all_attribute_keys,
|
55
|
+
data)
|
56
|
+
else
|
57
|
+
data
|
58
|
+
end
|
59
|
+
|
60
|
+
assign_attributes_for_create!(hash_for_creation, time_now)
|
61
|
+
|
62
|
+
result_id = ActiveRecord::Base.connection.execute(
|
63
|
+
build_insert_query(all_attribute_keys, [hash_for_creation])
|
64
|
+
)
|
65
|
+
|
66
|
+
inventory_object.id = result_id.to_a.try(:first).try(:[], "id")
|
67
|
+
inventory_collection.store_created_records(inventory_object)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,632 @@
|
|
1
|
+
require "inventory_refresh/save_collection/saver/base"
|
2
|
+
require "active_support/core_ext/module/delegation"
|
3
|
+
|
4
|
+
module InventoryRefresh::SaveCollection
|
5
|
+
module Saver
|
6
|
+
class ConcurrentSafeBatch < InventoryRefresh::SaveCollection::Saver::Base
|
7
|
+
private
|
8
|
+
|
9
|
+
delegate :association_to_base_class_mapping,
|
10
|
+
:association_to_foreign_key_mapping,
|
11
|
+
:association_to_foreign_type_mapping,
|
12
|
+
:attribute_references,
|
13
|
+
:to => :inventory_collection
|
14
|
+
|
15
|
+
# Attribute accessor to ApplicationRecord object or Hash
|
16
|
+
#
|
17
|
+
# @param record [Hash, ApplicationRecord] record or hash
|
18
|
+
# @param key [Symbol] key pointing to attribute of the record
|
19
|
+
# @return [Object] value of the record on the key
|
20
|
+
def record_key(record, key)
|
21
|
+
send(record_key_method, record, key)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Attribute accessor to ApplicationRecord object
|
25
|
+
#
|
26
|
+
# @param record [ApplicationRecord] record
|
27
|
+
# @param key [Symbol] key pointing to attribute of the record
|
28
|
+
# @return [Object] value of the record on the key
|
29
|
+
def ar_record_key(record, key)
|
30
|
+
record.public_send(key)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Attribute accessor to Hash object
|
34
|
+
#
|
35
|
+
# @param record [Hash] hash
|
36
|
+
# @param key [Symbol] key pointing to attribute of the record
|
37
|
+
# @return [Object] value of the record on the key
|
38
|
+
def pure_sql_record_key(record, key)
|
39
|
+
record[select_keys_indexes[key]]
|
40
|
+
end
|
41
|
+
|
42
|
+
# Returns iterator or relation based on settings
|
43
|
+
#
|
44
|
+
# @param association [Symbol] An existing association on manager
|
45
|
+
# @return [ActiveRecord::Relation, InventoryRefresh::ApplicationRecordIterator] iterator or relation based on settings
|
46
|
+
def batch_iterator(association)
|
47
|
+
if pure_sql_records_fetching
|
48
|
+
# Building fast iterator doing pure SQL query and therefore avoiding redundant creation of AR objects. The
|
49
|
+
# iterator responds to find_in_batches, so it acts like the AR relation. For targeted refresh, the association
|
50
|
+
# can already be ApplicationRecordIterator, so we will skip that.
|
51
|
+
pure_sql_iterator = lambda do |&block|
|
52
|
+
primary_key_offset = nil
|
53
|
+
loop do
|
54
|
+
relation = association.select(*select_keys)
|
55
|
+
.reorder("#{primary_key} ASC")
|
56
|
+
.limit(batch_size)
|
57
|
+
# Using rails way of comparing primary key instead of offset
|
58
|
+
relation = relation.where(arel_primary_key.gt(primary_key_offset)) if primary_key_offset
|
59
|
+
records = get_connection.query(relation.to_sql)
|
60
|
+
last_record = records.last
|
61
|
+
block.call(records)
|
62
|
+
|
63
|
+
break if records.size < batch_size
|
64
|
+
primary_key_offset = record_key(last_record, primary_key)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
InventoryRefresh::ApplicationRecordIterator.new(:iterator => pure_sql_iterator)
|
69
|
+
else
|
70
|
+
# Normal Rails ActiveRecord::Relation where we can call find_in_batches or
|
71
|
+
# InventoryRefresh::ApplicationRecordIterator passed from targeted refresh
|
72
|
+
association
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Saves the InventoryCollection
|
77
|
+
#
|
78
|
+
# @param association [Symbol] An existing association on manager
|
79
|
+
def save!(association)
|
80
|
+
attributes_index = {}
|
81
|
+
inventory_objects_index = {}
|
82
|
+
all_attribute_keys = Set.new + inventory_collection.batch_extra_attributes
|
83
|
+
|
84
|
+
inventory_collection.each do |inventory_object|
|
85
|
+
attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys)
|
86
|
+
index = build_stringified_reference(attributes, unique_index_keys)
|
87
|
+
|
88
|
+
# Interesting fact: not building attributes_index and using only inventory_objects_index doesn't do much
|
89
|
+
# of a difference, since the most objects inside are shared.
|
90
|
+
attributes_index[index] = attributes
|
91
|
+
inventory_objects_index[index] = inventory_object
|
92
|
+
end
|
93
|
+
|
94
|
+
all_attribute_keys << :created_at if supports_created_at?
|
95
|
+
all_attribute_keys << :updated_at if supports_updated_at?
|
96
|
+
all_attribute_keys << :created_on if supports_created_on?
|
97
|
+
all_attribute_keys << :updated_on if supports_updated_on?
|
98
|
+
all_attribute_keys << :type if supports_sti?
|
99
|
+
|
100
|
+
#_log.debug("Processing #{inventory_collection} of size #{inventory_collection.size}...")
|
101
|
+
|
102
|
+
unless inventory_collection.create_only?
|
103
|
+
update_or_destroy_records!(batch_iterator(association), inventory_objects_index, attributes_index, all_attribute_keys)
|
104
|
+
end
|
105
|
+
|
106
|
+
unless inventory_collection.create_only?
|
107
|
+
inventory_collection.custom_reconnect_block&.call(inventory_collection, inventory_objects_index, attributes_index)
|
108
|
+
end
|
109
|
+
|
110
|
+
# Records that were not found in the DB but sent for saving, we will be creating these in the DB.
|
111
|
+
if inventory_collection.create_allowed?
|
112
|
+
on_conflict = inventory_collection.parallel_safe? ? :do_update : nil
|
113
|
+
|
114
|
+
inventory_objects_index.each_slice(batch_size_for_persisting) do |batch|
|
115
|
+
create_records!(all_attribute_keys, batch, attributes_index, :on_conflict => on_conflict)
|
116
|
+
end
|
117
|
+
|
118
|
+
# Let the GC clean this up
|
119
|
+
inventory_objects_index = nil
|
120
|
+
attributes_index = nil
|
121
|
+
|
122
|
+
if inventory_collection.parallel_safe?
|
123
|
+
create_or_update_partial_records(all_attribute_keys)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
#_log.debug("Processing #{inventory_collection}, "\
|
127
|
+
# "created=#{inventory_collection.created_records.count}, "\
|
128
|
+
# "updated=#{inventory_collection.updated_records.count}, "\
|
129
|
+
# "deleted=#{inventory_collection.deleted_records.count}...Complete")
|
130
|
+
rescue => e
|
131
|
+
#_log.error("Error when saving #{inventory_collection} with #{inventory_collection_details}. Message: #{e.message}")
|
132
|
+
raise e
|
133
|
+
end
|
134
|
+
|
135
|
+
# Batch updates existing records that are in the DB using attributes_index. And delete the ones that were not
|
136
|
+
# present in inventory_objects_index.
|
137
|
+
#
|
138
|
+
# @param records_batch_iterator [ActiveRecord::Relation, InventoryRefresh::ApplicationRecordIterator] iterator or
|
139
|
+
# relation, both responding to :find_in_batches method
|
140
|
+
# @param inventory_objects_index [Hash{String => InventoryRefresh::InventoryObject}] Hash of InventoryObject objects
|
141
|
+
# @param attributes_index [Hash{String => Hash}] Hash of data hashes with only keys that are column names of the
|
142
|
+
# models's table
|
143
|
+
# @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
|
144
|
+
def update_or_destroy_records!(records_batch_iterator, inventory_objects_index, attributes_index, all_attribute_keys)
|
145
|
+
hashes_for_update = []
|
146
|
+
records_for_destroy = []
|
147
|
+
indexed_inventory_objects = {}
|
148
|
+
|
149
|
+
records_batch_iterator.find_in_batches(:batch_size => batch_size) do |batch|
|
150
|
+
update_time = time_now
|
151
|
+
|
152
|
+
batch.each do |record|
|
153
|
+
primary_key_value = record_key(record, primary_key)
|
154
|
+
|
155
|
+
next unless assert_distinct_relation(primary_key_value)
|
156
|
+
|
157
|
+
index = db_columns_index(record)
|
158
|
+
|
159
|
+
inventory_object = inventory_objects_index.delete(index)
|
160
|
+
hash = attributes_index.delete(index)
|
161
|
+
|
162
|
+
if inventory_object.nil?
|
163
|
+
# Record was found in the DB but not sent for saving, that means it doesn't exist anymore and we should
|
164
|
+
# delete it from the DB.
|
165
|
+
if inventory_collection.delete_allowed?
|
166
|
+
records_for_destroy << record
|
167
|
+
end
|
168
|
+
else
|
169
|
+
# Record was found in the DB and sent for saving, we will be updating the DB.
|
170
|
+
next unless assert_referential_integrity(hash)
|
171
|
+
inventory_object.id = primary_key_value
|
172
|
+
|
173
|
+
if inventory_collection.parallel_safe? &&
|
174
|
+
(supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys))
|
175
|
+
|
176
|
+
version_attr, max_version_attr = if supports_remote_data_timestamp?(all_attribute_keys)
|
177
|
+
[:resource_timestamp, :resource_timestamps_max]
|
178
|
+
elsif supports_remote_data_version?(all_attribute_keys)
|
179
|
+
[:resource_version, :resource_versions_max]
|
180
|
+
end
|
181
|
+
|
182
|
+
next if skeletonize_or_skip_record(record.try(version_attr) || record.try(:[], version_attr),
|
183
|
+
hash[version_attr],
|
184
|
+
record.try(max_version_attr) || record.try(:[], max_version_attr),
|
185
|
+
inventory_object)
|
186
|
+
end
|
187
|
+
|
188
|
+
hash_for_update = if inventory_collection.use_ar_object?
|
189
|
+
record.assign_attributes(hash.except(:id))
|
190
|
+
values_for_database!(all_attribute_keys,
|
191
|
+
record.attributes.symbolize_keys)
|
192
|
+
elsif serializable_keys?
|
193
|
+
# TODO(lsmola) hash data with current DB data to allow subset of data being sent,
|
194
|
+
# otherwise we would nullify the not sent attributes. Test e.g. on disks in cloud
|
195
|
+
values_for_database!(all_attribute_keys,
|
196
|
+
hash)
|
197
|
+
else
|
198
|
+
# TODO(lsmola) hash data with current DB data to allow subset of data being sent,
|
199
|
+
# otherwise we would nullify the not sent attributes. Test e.g. on disks in cloud
|
200
|
+
hash
|
201
|
+
end
|
202
|
+
assign_attributes_for_update!(hash_for_update, update_time)
|
203
|
+
|
204
|
+
hash_for_update[:id] = primary_key_value
|
205
|
+
indexed_inventory_objects[index] = inventory_object
|
206
|
+
hashes_for_update << hash_for_update
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
# Update in batches
|
211
|
+
if hashes_for_update.size >= batch_size_for_persisting
|
212
|
+
update_records!(all_attribute_keys, hashes_for_update, indexed_inventory_objects)
|
213
|
+
|
214
|
+
hashes_for_update = []
|
215
|
+
indexed_inventory_objects = {}
|
216
|
+
end
|
217
|
+
|
218
|
+
# Destroy in batches
|
219
|
+
if records_for_destroy.size >= batch_size_for_persisting
|
220
|
+
destroy_records!(records_for_destroy)
|
221
|
+
records_for_destroy = []
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
# Update the last batch
|
226
|
+
update_records!(all_attribute_keys, hashes_for_update, indexed_inventory_objects)
|
227
|
+
hashes_for_update = [] # Cleanup so GC can release it sooner
|
228
|
+
|
229
|
+
# Destroy the last batch
|
230
|
+
destroy_records!(records_for_destroy)
|
231
|
+
records_for_destroy = [] # Cleanup so GC can release it sooner
|
232
|
+
end
|
233
|
+
|
234
|
+
def db_columns_index(record, pure_sql: false)
|
235
|
+
# Incoming values are in SQL string form.
|
236
|
+
# TODO(lsmola) unify this behavior with object_index_with_keys method in InventoryCollection
|
237
|
+
# TODO(lsmola) maybe we can drop the whole pure sql fetching, since everything will be targeted refresh
|
238
|
+
# with streaming refresh? Maybe just metrics and events will not be, but those should be upsert only
|
239
|
+
# TODO(lsmola) taking ^ in account, we can't drop pure sql, since that is returned by batch insert and
|
240
|
+
# update queries
|
241
|
+
unique_index_keys_to_s.map do |attribute|
|
242
|
+
value = if pure_sql
|
243
|
+
record[attribute]
|
244
|
+
else
|
245
|
+
record_key(record, attribute)
|
246
|
+
end
|
247
|
+
|
248
|
+
format_value(attribute, value)
|
249
|
+
end.join("__")
|
250
|
+
end
|
251
|
+
|
252
|
+
def format_value(attribute, value)
|
253
|
+
if attribute == "timestamp"
|
254
|
+
# TODO: can this be covered by @deserializable_keys?
|
255
|
+
type = model_class.type_for_attribute(attribute)
|
256
|
+
type.cast(value).utc.iso8601.to_s
|
257
|
+
elsif (type = deserializable_keys[attribute.to_sym])
|
258
|
+
type.deserialize(value).to_s
|
259
|
+
else
|
260
|
+
value.to_s
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
# Deletes or sof-deletes records. If the model_class supports a custom class delete method, we will use it for
|
265
|
+
# batch soft-delete.
|
266
|
+
#
|
267
|
+
# @param records [Array<ApplicationRecord, Hash>] Records we want to delete. If we have only hashes, we need to
|
268
|
+
# to fetch ApplicationRecord objects from the DB
|
269
|
+
def destroy_records!(records)
|
270
|
+
return false unless inventory_collection.delete_allowed?
|
271
|
+
return if records.blank?
|
272
|
+
|
273
|
+
# Is the delete_method rails standard deleting method?
|
274
|
+
rails_delete = %i(destroy delete).include?(inventory_collection.delete_method)
|
275
|
+
if !rails_delete && inventory_collection.model_class.respond_to?(inventory_collection.delete_method)
|
276
|
+
# We have custom delete method defined on a class, that means it supports batch destroy
|
277
|
+
inventory_collection.store_deleted_records(records.map { |x| {:id => record_key(x, primary_key)} })
|
278
|
+
inventory_collection.model_class.public_send(inventory_collection.delete_method, records.map { |x| record_key(x, primary_key) })
|
279
|
+
else
|
280
|
+
# We have either standard :destroy and :delete rails method, or custom instance level delete method
|
281
|
+
# Note: The standard :destroy and :delete rails method can't be batched because of the hooks and cascade destroy
|
282
|
+
ActiveRecord::Base.transaction do
|
283
|
+
if pure_sql_records_fetching
|
284
|
+
# For pure SQL fetching, we need to get the AR objects again, so we can call destroy
|
285
|
+
inventory_collection.model_class.where(:id => records.map { |x| record_key(x, primary_key) }).find_each do |record|
|
286
|
+
delete_record!(record)
|
287
|
+
end
|
288
|
+
else
|
289
|
+
records.each do |record|
|
290
|
+
delete_record!(record)
|
291
|
+
end
|
292
|
+
end
|
293
|
+
end
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
# Batch updates existing records
|
298
|
+
#
|
299
|
+
# @param hashes [Array<Hash>] data used for building a batch update sql query
|
300
|
+
# @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
|
301
|
+
def update_records!(all_attribute_keys, hashes, indexed_inventory_objects)
|
302
|
+
return if hashes.blank?
|
303
|
+
|
304
|
+
unless inventory_collection.parallel_safe?
|
305
|
+
# We need to update the stored records before we save it, since hashes are modified
|
306
|
+
inventory_collection.store_updated_records(hashes)
|
307
|
+
end
|
308
|
+
|
309
|
+
query = build_update_query(all_attribute_keys, hashes)
|
310
|
+
result = get_connection.execute(query)
|
311
|
+
|
312
|
+
if inventory_collection.parallel_safe?
|
313
|
+
# We will check for timestamp clashes of full row update and we will fallback to skeletal update
|
314
|
+
inventory_collection.store_updated_records(result)
|
315
|
+
|
316
|
+
skeletonize_ignored_records!(indexed_inventory_objects, result)
|
317
|
+
end
|
318
|
+
|
319
|
+
result
|
320
|
+
end
|
321
|
+
|
322
|
+
# Taking result from update or upsert of the row. The records that were not saved will be turned into skeletal
|
323
|
+
# records and we will save them attribute by attribute.
|
324
|
+
#
|
325
|
+
# @param hash [Hash{String => InventoryObject}>] Hash with indexed data we want to save
|
326
|
+
# @param result [Array<Hash>] Result from the DB containing the data that were actually saved
|
327
|
+
# @param all_unique_columns [Boolean] True if index is consisted from all columns of the unique index. False if
|
328
|
+
# index is just made from manager_ref turned in DB column names.
|
329
|
+
def skeletonize_ignored_records!(hash, result, all_unique_columns: false)
|
330
|
+
updated = if all_unique_columns
|
331
|
+
result.map { |x| unique_index_columns_to_s.map { |key| x[key] } }
|
332
|
+
else
|
333
|
+
result.map { |x| db_columns_index(x, :pure_sql => true) }
|
334
|
+
end
|
335
|
+
|
336
|
+
updated.each { |x| hash.delete(x) }
|
337
|
+
|
338
|
+
# Now lets skeletonize all inventory_objects that were not saved by update or upsert. Old rows that can't be
|
339
|
+
# saved are not being sent here. We have only rows that are new, but become old as we send the query (so other
|
340
|
+
# parallel process saved the data in the meantime). Or if some attributes are newer than the whole row
|
341
|
+
# being sent.
|
342
|
+
hash.each_key do |db_index|
|
343
|
+
inventory_collection.skeletal_primary_index.skeletonize_primary_index(hash[db_index].manager_uuid)
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
# Saves partial records using upsert, taking records from skeletal_primary_index. This is used both for
|
348
|
+
# skeletal precreate as well as for saving partial rows.
|
349
|
+
#
|
350
|
+
# @param all_attribute_keys [Set] Superset of all keys of all records being saved
|
351
|
+
def create_or_update_partial_records(all_attribute_keys)
|
352
|
+
skeletal_attributes_index = {}
|
353
|
+
skeletal_inventory_objects_index = {}
|
354
|
+
|
355
|
+
inventory_collection.skeletal_primary_index.each_value do |inventory_object|
|
356
|
+
attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys)
|
357
|
+
index = build_stringified_reference(attributes, unique_index_keys)
|
358
|
+
|
359
|
+
skeletal_attributes_index[index] = attributes
|
360
|
+
skeletal_inventory_objects_index[index] = inventory_object
|
361
|
+
end
|
362
|
+
|
363
|
+
if supports_remote_data_timestamp?(all_attribute_keys)
|
364
|
+
all_attribute_keys << :resource_timestamps
|
365
|
+
all_attribute_keys << :resource_timestamps_max
|
366
|
+
elsif supports_remote_data_version?(all_attribute_keys)
|
367
|
+
all_attribute_keys << :resource_versions
|
368
|
+
all_attribute_keys << :resource_versions_max
|
369
|
+
end
|
370
|
+
|
371
|
+
indexed_inventory_objects = {}
|
372
|
+
hashes = []
|
373
|
+
create_time = time_now
|
374
|
+
|
375
|
+
skeletal_inventory_objects_index.each do |index, inventory_object|
|
376
|
+
hash = skeletal_attributes_index.delete(index)
|
377
|
+
# Partial create or update must never set a timestamp for the whole row
|
378
|
+
timestamps = if supports_remote_data_timestamp?(all_attribute_keys) && supports_resource_timestamps_max?
|
379
|
+
assign_partial_row_version_attributes!(:resource_timestamp,
|
380
|
+
:resource_timestamps,
|
381
|
+
:resource_timestamps_max,
|
382
|
+
hash,
|
383
|
+
all_attribute_keys)
|
384
|
+
elsif supports_remote_data_version?(all_attribute_keys) && supports_resource_versions_max?
|
385
|
+
assign_partial_row_version_attributes!(:resource_version,
|
386
|
+
:resource_versions,
|
387
|
+
:resource_versions_max,
|
388
|
+
hash,
|
389
|
+
all_attribute_keys)
|
390
|
+
end
|
391
|
+
# Transform hash to DB format
|
392
|
+
hash = transform_to_hash!(all_attribute_keys, hash)
|
393
|
+
|
394
|
+
assign_attributes_for_create!(hash, create_time)
|
395
|
+
|
396
|
+
next unless assert_referential_integrity(hash)
|
397
|
+
|
398
|
+
hash[:__non_serialized_versions] = timestamps # store non serialized timestamps for the partial updates
|
399
|
+
hashes << hash
|
400
|
+
# Index on Unique Columns values, so we can easily fill in the :id later
|
401
|
+
indexed_inventory_objects[unique_index_columns.map { |x| hash[x] }] = inventory_object
|
402
|
+
end
|
403
|
+
|
404
|
+
return if hashes.blank?
|
405
|
+
|
406
|
+
# First, lets try to create all partial records
|
407
|
+
hashes.each_slice(batch_size_for_persisting) do |batch|
|
408
|
+
result = create_partial!(all_attribute_keys,
|
409
|
+
batch,
|
410
|
+
:on_conflict => :do_nothing)
|
411
|
+
inventory_collection.store_created_records(result)
|
412
|
+
end
|
413
|
+
|
414
|
+
# We need only skeletal records with timestamp. We can't save the ones without timestamp, because e.g. skeletal
|
415
|
+
# precreate would be updating records with default values, that are not correct.
|
416
|
+
pre_filtered = hashes.select { |x| x[:resource_timestamps_max] || x[:resource_versions_max] }
|
417
|
+
|
418
|
+
results = {}
|
419
|
+
# TODO(lsmola) we don't need to process rows that were save by the create -> oncoflict do nothing
|
420
|
+
(all_attribute_keys - inventory_collection.base_columns).each do |column_name|
|
421
|
+
filtered = pre_filtered.select { |x| x.key?(column_name) }
|
422
|
+
|
423
|
+
filtered.each_slice(batch_size_for_persisting) do |batch|
|
424
|
+
# We need to set correct timestamps_max for this particular attribute, based on what is in timestamps
|
425
|
+
if supports_remote_data_timestamp?(all_attribute_keys)
|
426
|
+
batch.each { |x| x[:resource_timestamps_max] = x[:__non_serialized_versions][column_name] if x[:__non_serialized_versions][column_name] }
|
427
|
+
elsif supports_remote_data_version?(all_attribute_keys)
|
428
|
+
batch.each { |x| x[:resource_versions_max] = x[:__non_serialized_versions][column_name] if x[:__non_serialized_versions][column_name] }
|
429
|
+
end
|
430
|
+
|
431
|
+
result = create_partial!(inventory_collection.base_columns + [column_name],
|
432
|
+
batch,
|
433
|
+
:on_conflict => :do_update,
|
434
|
+
:column_name => column_name)
|
435
|
+
result.each do |res|
|
436
|
+
results[res["id"]] = res
|
437
|
+
end
|
438
|
+
end
|
439
|
+
end
|
440
|
+
|
441
|
+
inventory_collection.store_updated_records(results.values)
|
442
|
+
|
443
|
+
# TODO(lsmola) we need to move here the hash loading ar object etc. otherwise the lazy_find with key will not
|
444
|
+
# be correct
|
445
|
+
if inventory_collection.dependees.present?
|
446
|
+
# We need to get primary keys of the created objects, but only if there are dependees that would use them
|
447
|
+
map_ids_to_inventory_objects(indexed_inventory_objects,
|
448
|
+
all_attribute_keys,
|
449
|
+
hashes,
|
450
|
+
nil,
|
451
|
+
:on_conflict => :do_nothing)
|
452
|
+
end
|
453
|
+
end
|
454
|
+
|
455
|
+
# Batch upserts 1 data column of the row, plus the internal columns
|
456
|
+
#
|
457
|
+
# @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
|
458
|
+
# @param hashes [Array<InventoryRefresh::InventoryObject>] Array of InventoryObject objects we will be inserting
|
459
|
+
# into the DB
|
460
|
+
# @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values
|
461
|
+
# are :do_update, :do_nothing, nil
|
462
|
+
# @param column_name [Symbol] Name of the data column we will be upserting
|
463
|
+
def create_partial!(all_attribute_keys, hashes, on_conflict: nil, column_name: nil)
|
464
|
+
get_connection.execute(
|
465
|
+
build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => :partial, :column_name => column_name)
|
466
|
+
)
|
467
|
+
end
|
468
|
+
|
469
|
+
# Batch inserts records using attributes_index data. With on_conflict option using :do_update, this method
|
470
|
+
# does atomic upsert.
|
471
|
+
#
|
472
|
+
# @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
|
473
|
+
# @param batch [Array<InventoryRefresh::InventoryObject>] Array of InventoryObject object we will be inserting into
|
474
|
+
# the DB
|
475
|
+
# @param attributes_index [Hash{String => Hash}] Hash of data hashes with only keys that are column names of the
|
476
|
+
# models's table
|
477
|
+
# @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values
|
478
|
+
# are :do_update, :do_nothing, nil
|
479
|
+
def create_records!(all_attribute_keys, batch, attributes_index, on_conflict: nil)
|
480
|
+
indexed_inventory_objects = {}
|
481
|
+
hashes = []
|
482
|
+
create_time = time_now
|
483
|
+
batch.each do |index, inventory_object|
|
484
|
+
hash = if inventory_collection.use_ar_object?
|
485
|
+
record = inventory_collection.model_class.new(attributes_index[index])
|
486
|
+
values_for_database!(all_attribute_keys,
|
487
|
+
record.attributes.symbolize_keys)
|
488
|
+
elsif serializable_keys?
|
489
|
+
values_for_database!(all_attribute_keys,
|
490
|
+
attributes_index[index])
|
491
|
+
else
|
492
|
+
attributes_index[index]
|
493
|
+
end
|
494
|
+
|
495
|
+
assign_attributes_for_create!(hash, create_time)
|
496
|
+
|
497
|
+
next unless assert_referential_integrity(hash)
|
498
|
+
|
499
|
+
hashes << hash
|
500
|
+
# Index on Unique Columns values, so we can easily fill in the :id later
|
501
|
+
indexed_inventory_objects[unique_index_columns.map { |x| hash[x] }] = inventory_object
|
502
|
+
end
|
503
|
+
|
504
|
+
return if hashes.blank?
|
505
|
+
|
506
|
+
result = get_connection.execute(
|
507
|
+
build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => :full)
|
508
|
+
)
|
509
|
+
|
510
|
+
if inventory_collection.parallel_safe?
|
511
|
+
# We've done upsert, so records were either created or updated. We can recognize that by checking if
|
512
|
+
# created and updated timestamps are the same
|
513
|
+
created_attr = "created_on" if inventory_collection.supports_created_on?
|
514
|
+
created_attr ||= "created_at" if inventory_collection.supports_created_at?
|
515
|
+
updated_attr = "updated_on" if inventory_collection.supports_updated_on?
|
516
|
+
updated_attr ||= "updated_at" if inventory_collection.supports_updated_at?
|
517
|
+
|
518
|
+
if created_attr && updated_attr
|
519
|
+
created, updated = result.to_a.partition { |x| x[created_attr] == x[updated_attr] }
|
520
|
+
inventory_collection.store_created_records(created)
|
521
|
+
inventory_collection.store_updated_records(updated)
|
522
|
+
else
|
523
|
+
# The record doesn't have both created and updated attrs, so we'll take all as created
|
524
|
+
inventory_collection.store_created_records(result)
|
525
|
+
end
|
526
|
+
else
|
527
|
+
# We've done just insert, so all records were created
|
528
|
+
inventory_collection.store_created_records(result)
|
529
|
+
end
|
530
|
+
|
531
|
+
if inventory_collection.dependees.present?
|
532
|
+
# We need to get primary keys of the created objects, but only if there are dependees that would use them
|
533
|
+
map_ids_to_inventory_objects(indexed_inventory_objects,
|
534
|
+
all_attribute_keys,
|
535
|
+
hashes,
|
536
|
+
result,
|
537
|
+
:on_conflict => on_conflict)
|
538
|
+
end
|
539
|
+
|
540
|
+
if inventory_collection.parallel_safe?
|
541
|
+
skeletonize_ignored_records!(indexed_inventory_objects, result, :all_unique_columns => true)
|
542
|
+
end
|
543
|
+
end
|
544
|
+
|
545
|
+
# Stores primary_key values of created records into associated InventoryObject objects.
|
546
|
+
#
|
547
|
+
# @param indexed_inventory_objects [Hash{String => InventoryRefresh::InventoryObject}] inventory objects indexed
|
548
|
+
# by stringified value made from db_columns
|
549
|
+
# @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
|
550
|
+
# @param hashes [Array<Hashes>] Array of hashes that were used for inserting of the data
|
551
|
+
# @param result [Array<Hashes>] Array of hashes that are a result of the batch insert query, each result
|
552
|
+
# contains a primary key_value plus all columns that are a part of the unique index
|
553
|
+
# @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values
|
554
|
+
# are :do_update, :do_nothing, nil
|
555
|
+
def map_ids_to_inventory_objects(indexed_inventory_objects, all_attribute_keys, hashes, result, on_conflict:)
|
556
|
+
if on_conflict == :do_nothing
|
557
|
+
# TODO(lsmola) is the comment below still accurate? We will update some partial rows, the actual skeletal
|
558
|
+
# precreate will still do nothing.
|
559
|
+
# For ON CONFLICT DO NOTHING, we need to always fetch the records plus the attribute_references. This path
|
560
|
+
# applies only for skeletal precreate.
|
561
|
+
inventory_collection.model_class.where(
|
562
|
+
build_multi_selection_query(hashes)
|
563
|
+
).select(unique_index_columns + [:id] + attribute_references.to_a).each do |record|
|
564
|
+
key = unique_index_columns.map { |x| record.public_send(x) }
|
565
|
+
inventory_object = indexed_inventory_objects[key]
|
566
|
+
|
567
|
+
# Load also attribute_references, so lazy_find with :key pointing to skeletal reference works
|
568
|
+
attributes = record.attributes.symbolize_keys
|
569
|
+
attribute_references.each do |ref|
|
570
|
+
inventory_object[ref] = attributes[ref]
|
571
|
+
|
572
|
+
next unless (foreign_key = association_to_foreign_key_mapping[ref])
|
573
|
+
base_class_name = attributes[association_to_foreign_type_mapping[ref].try(:to_sym)] || association_to_base_class_mapping[ref]
|
574
|
+
id = attributes[foreign_key.to_sym]
|
575
|
+
inventory_object[ref] = InventoryRefresh::ApplicationRecordReference.new(base_class_name, id)
|
576
|
+
end
|
577
|
+
|
578
|
+
inventory_object.id = record.id if inventory_object
|
579
|
+
end
|
580
|
+
elsif !supports_remote_data_timestamp?(all_attribute_keys) || result.count == batch_size_for_persisting
|
581
|
+
# We can use the insert query result to fetch all primary_key values, which makes this the most effective
|
582
|
+
# path.
|
583
|
+
result.each do |inserted_record|
|
584
|
+
key = unique_index_columns.map do |x|
|
585
|
+
value = inserted_record[x.to_s]
|
586
|
+
type = deserializable_keys[x]
|
587
|
+
type ? type.deserialize(value) : value
|
588
|
+
end
|
589
|
+
inventory_object = indexed_inventory_objects[key]
|
590
|
+
inventory_object.id = inserted_record[primary_key] if inventory_object
|
591
|
+
end
|
592
|
+
else
|
593
|
+
# The remote_data_timestamp is adding a WHERE condition to ON CONFLICT UPDATE. As a result, the RETURNING
|
594
|
+
# clause is not guaranteed to return all ids of the inserted/updated records in the result. In that case
|
595
|
+
# we test if the number of results matches the expected batch size. Then if the counts do not match, the only
|
596
|
+
# safe option is to query all the data from the DB, using the unique_indexes. The batch size will also not match
|
597
|
+
# for every remainders(a last batch in a stream of batches)
|
598
|
+
inventory_collection.model_class.where(
|
599
|
+
build_multi_selection_query(hashes)
|
600
|
+
).select(unique_index_columns + [:id]).each do |inserted_record|
|
601
|
+
key = unique_index_columns.map { |x| inserted_record.public_send(x) }
|
602
|
+
inventory_object = indexed_inventory_objects[key]
|
603
|
+
inventory_object.id = inserted_record.id if inventory_object
|
604
|
+
end
|
605
|
+
end
|
606
|
+
end
|
607
|
+
|
608
|
+
def skeletonize_or_skip_record(record_version, hash_version, record_versions_max, inventory_object)
|
609
|
+
# Skip updating this record, because it is old
|
610
|
+
return true if record_version && hash_version && record_version >= hash_version
|
611
|
+
|
612
|
+
# Some column has bigger version than the whole row, we need to store the row partially
|
613
|
+
if record_versions_max && hash_version && record_versions_max > hash_version
|
614
|
+
inventory_collection.skeletal_primary_index.skeletonize_primary_index(inventory_object.manager_uuid)
|
615
|
+
return true
|
616
|
+
end
|
617
|
+
|
618
|
+
false
|
619
|
+
end
|
620
|
+
|
621
|
+
def assign_partial_row_version_attributes!(full_row_version_attr, partial_row_version_attr,
|
622
|
+
partial_row_version_attr_max, hash, all_attribute_keys)
|
623
|
+
hash[partial_row_version_attr_max] = hash.delete(full_row_version_attr)
|
624
|
+
|
625
|
+
if hash[partial_row_version_attr].present?
|
626
|
+
# Lets clean to only what we save, since when we build the skeletal object, we can set more
|
627
|
+
hash[partial_row_version_attr] = hash[partial_row_version_attr].slice(*all_attribute_keys)
|
628
|
+
end
|
629
|
+
end
|
630
|
+
end
|
631
|
+
end
|
632
|
+
end
|