inventory_refresh 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +7 -0
  2. data/.codeclimate.yml +47 -0
  3. data/.gitignore +13 -0
  4. data/.rspec +4 -0
  5. data/.rspec_ci +4 -0
  6. data/.rubocop.yml +4 -0
  7. data/.rubocop_cc.yml +5 -0
  8. data/.rubocop_local.yml +2 -0
  9. data/.travis.yml +12 -0
  10. data/.yamllint +12 -0
  11. data/CHANGELOG.md +0 -0
  12. data/Gemfile +6 -0
  13. data/LICENSE +202 -0
  14. data/README.md +35 -0
  15. data/Rakefile +47 -0
  16. data/bin/console +14 -0
  17. data/bin/setup +8 -0
  18. data/inventory_refresh.gemspec +34 -0
  19. data/lib/inventory_refresh.rb +11 -0
  20. data/lib/inventory_refresh/application_record_iterator.rb +56 -0
  21. data/lib/inventory_refresh/application_record_reference.rb +15 -0
  22. data/lib/inventory_refresh/graph.rb +157 -0
  23. data/lib/inventory_refresh/graph/topological_sort.rb +66 -0
  24. data/lib/inventory_refresh/inventory_collection.rb +1175 -0
  25. data/lib/inventory_refresh/inventory_collection/data_storage.rb +178 -0
  26. data/lib/inventory_refresh/inventory_collection/graph.rb +170 -0
  27. data/lib/inventory_refresh/inventory_collection/index/proxy.rb +230 -0
  28. data/lib/inventory_refresh/inventory_collection/index/type/base.rb +80 -0
  29. data/lib/inventory_refresh/inventory_collection/index/type/data.rb +26 -0
  30. data/lib/inventory_refresh/inventory_collection/index/type/local_db.rb +286 -0
  31. data/lib/inventory_refresh/inventory_collection/index/type/skeletal.rb +116 -0
  32. data/lib/inventory_refresh/inventory_collection/reference.rb +96 -0
  33. data/lib/inventory_refresh/inventory_collection/references_storage.rb +106 -0
  34. data/lib/inventory_refresh/inventory_collection/scanner.rb +117 -0
  35. data/lib/inventory_refresh/inventory_collection/serialization.rb +140 -0
  36. data/lib/inventory_refresh/inventory_object.rb +303 -0
  37. data/lib/inventory_refresh/inventory_object_lazy.rb +151 -0
  38. data/lib/inventory_refresh/save_collection/base.rb +38 -0
  39. data/lib/inventory_refresh/save_collection/recursive.rb +52 -0
  40. data/lib/inventory_refresh/save_collection/saver/base.rb +390 -0
  41. data/lib/inventory_refresh/save_collection/saver/batch.rb +17 -0
  42. data/lib/inventory_refresh/save_collection/saver/concurrent_safe.rb +71 -0
  43. data/lib/inventory_refresh/save_collection/saver/concurrent_safe_batch.rb +632 -0
  44. data/lib/inventory_refresh/save_collection/saver/default.rb +57 -0
  45. data/lib/inventory_refresh/save_collection/saver/sql_helper.rb +85 -0
  46. data/lib/inventory_refresh/save_collection/saver/sql_helper_update.rb +120 -0
  47. data/lib/inventory_refresh/save_collection/saver/sql_helper_upsert.rb +196 -0
  48. data/lib/inventory_refresh/save_collection/topological_sort.rb +38 -0
  49. data/lib/inventory_refresh/save_inventory.rb +38 -0
  50. data/lib/inventory_refresh/target.rb +73 -0
  51. data/lib/inventory_refresh/target_collection.rb +80 -0
  52. data/lib/inventory_refresh/version.rb +3 -0
  53. data/tools/ci/create_db_user.sh +3 -0
  54. metadata +207 -0
@@ -0,0 +1,57 @@
1
+ require "inventory_refresh/save_collection/saver/base"
2
+
3
+ module InventoryRefresh::SaveCollection
4
+ module Saver
5
+ class Default < InventoryRefresh::SaveCollection::Saver::Base
6
+ private
7
+
8
+ # Updates the passed record with hash data and stores primary key value into inventory_object.
9
+ #
10
+ # @param record [ApplicationRecord] record we want to update in DB
11
+ # @param hash [Hash] data we want to update the record with
12
+ # @param inventory_object [InventoryRefresh::InventoryObject] InventoryObject instance where we will store primary
13
+ # key value
14
+ def update_record!(record, hash, inventory_object)
15
+ record.assign_attributes(hash.except(:id))
16
+ if !inventory_collection.check_changed? || record.changed?
17
+ record.save
18
+ inventory_collection.store_updated_records(record)
19
+ end
20
+
21
+ inventory_object.id = record.id
22
+ end
23
+
24
+ # Creates a new record in the DB using the passed hash data
25
+ #
26
+ # @param hash [Hash] hash with data we want to persist to DB
27
+ # @param inventory_object [InventoryRefresh::InventoryObject] InventoryObject instance where we will store primary
28
+ # key value
29
+ def create_record!(hash, inventory_object)
30
+ record = inventory_collection.model_class.create!(hash.except(:id))
31
+ inventory_collection.store_created_records(record)
32
+
33
+ inventory_object.id = record.id
34
+ end
35
+
36
+ # Asserts we do not have duplicate records in the DB. If the record is duplicate we will destroy it.
37
+ #
38
+ # @param record [ApplicationRecord] record we want to update in DB
39
+ # @param index [String] manager_uuid of the record
40
+ # @return [Boolean] false if the record is duplicate
41
+ def assert_unique_record(record, index)
42
+ # TODO(lsmola) can go away once we indexed our DB with unique indexes
43
+ if unique_db_indexes.include?(index) # Include on Set is O(1)
44
+ # We have a duplicate in the DB, destroy it. A find_each method does automatically .order(:id => :asc)
45
+ # so we always keep the oldest record in the case of duplicates.
46
+ #_log.warn("A duplicate record was detected and destroyed, inventory_collection: "\
47
+ # "'#{inventory_collection}', record: '#{record}', duplicate_index: '#{index}'")
48
+ record.destroy
49
+ return false
50
+ else
51
+ unique_db_indexes << index
52
+ end
53
+ true
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,85 @@
1
+ require "inventory_refresh/save_collection/saver/sql_helper_update"
2
+ require "inventory_refresh/save_collection/saver/sql_helper_upsert"
3
+ require "active_support/concern"
4
+
5
+ module InventoryRefresh::SaveCollection
6
+ module Saver
7
+ module SqlHelper
8
+ # TODO(lsmola) all below methods should be rewritten to arel, but we need to first extend arel to be able to do
9
+ # this
10
+
11
+ extend ActiveSupport::Concern
12
+
13
+ included do
14
+ include SqlHelperUpsert
15
+ include SqlHelperUpdate
16
+ end
17
+
18
+ # Returns quoted column name
19
+ # @param key [Symbol] key that is column name
20
+ # @returns [String] quoted column name
21
+ def quote_column_name(key)
22
+ get_connection.quote_column_name(key)
23
+ end
24
+
25
+ # @return [ActiveRecord::ConnectionAdapters::AbstractAdapter] ActiveRecord connection
26
+ def get_connection
27
+ ActiveRecord::Base.connection
28
+ end
29
+
30
+ # Builds a multiselection conditions like (table1.a = a1 AND table2.b = b1) OR (table1.a = a2 AND table2.b = b2)
31
+ #
32
+ # @param hashes [Array<Hash>] data we want to use for the query
33
+ # @return [String] condition usable in .where of an ActiveRecord relation
34
+ def build_multi_selection_query(hashes)
35
+ inventory_collection.build_multi_selection_condition(hashes, unique_index_columns)
36
+ end
37
+
38
+ # Quotes a value. For update query, the value also needs to be explicitly casted, which we can do by
39
+ # type_cast_for_pg param set to true.
40
+ #
41
+ # @param connection [ActiveRecord::ConnectionAdapters::AbstractAdapter] ActiveRecord connection
42
+ # @param value [Object] value we want to quote
43
+ # @param name [Symbol] name of the column
44
+ # @param type_cast_for_pg [Boolean] true if we want to also cast the quoted value
45
+ # @return [String] quoted and based on type_cast_for_pg param also casted value
46
+ def quote(connection, value, name = nil, type_cast_for_pg = nil)
47
+ # TODO(lsmola) needed only because UPDATE FROM VALUES needs a specific PG typecasting, remove when fixed in PG
48
+ if type_cast_for_pg
49
+ quote_and_pg_type_cast(connection, value, name)
50
+ else
51
+ connection.quote(value)
52
+ end
53
+ rescue TypeError => e
54
+ #_log.error("Can't quote value: #{value}, of :#{name} and #{inventory_collection}")
55
+ raise e
56
+ end
57
+
58
+ # Quotes and type casts the value.
59
+ #
60
+ # @param connection [ActiveRecord::ConnectionAdapters::AbstractAdapter] ActiveRecord connection
61
+ # @param value [Object] value we want to quote
62
+ # @param name [Symbol] name of the column
63
+ # @return [String] quoted and casted value
64
+ def quote_and_pg_type_cast(connection, value, name)
65
+ pg_type_cast(
66
+ connection.quote(value),
67
+ pg_types[name]
68
+ )
69
+ end
70
+
71
+ # Returns a type casted value in format needed by PostgreSQL
72
+ #
73
+ # @param value [Object] value we want to quote
74
+ # @param sql_type [String] PostgreSQL column type
75
+ # @return [String] type casted value in format needed by PostgreSQL
76
+ def pg_type_cast(value, sql_type)
77
+ if sql_type.nil?
78
+ value
79
+ else
80
+ "#{value}::#{sql_type}"
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,120 @@
1
+ module InventoryRefresh::SaveCollection
2
+ module Saver
3
+ module SqlHelperUpdate
4
+ # Builds update clause for one column identified by the passed key
5
+ #
6
+ # @param key [Symbol] key that is column name
7
+ # @return [String] SQL clause for updating one column
8
+ def build_update_set_cols(key)
9
+ "#{quote_column_name(key)} = updated_values.#{quote_column_name(key)}"
10
+ end
11
+
12
+ # Build batch update query
13
+ #
14
+ # @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
15
+ # @param hashes [Array<Hash>] data used for building a batch update sql query
16
+ def build_update_query(all_attribute_keys, hashes)
17
+ #_log.debug("Building update query for #{inventory_collection} of size #{inventory_collection.size}...")
18
+ # Cache the connection for the batch
19
+ connection = get_connection
20
+
21
+ # We want to ignore create timestamps when updating
22
+ all_attribute_keys_array = all_attribute_keys.to_a.delete_if { |x| %i(created_at created_on).include?(x) }
23
+ all_attribute_keys_array << :id
24
+
25
+ # If there is not version attribute, the version conditions will be ignored
26
+ version_attribute = if inventory_collection.parallel_safe? && supports_remote_data_timestamp?(all_attribute_keys)
27
+ :resource_timestamp
28
+ elsif inventory_collection.parallel_safe? && supports_remote_data_version?(all_attribute_keys)
29
+ :resource_version
30
+ end
31
+
32
+ update_query = update_query_beginning(all_attribute_keys_array)
33
+ update_query += update_query_reset_version_columns(version_attribute)
34
+ update_query += update_query_from_values(hashes, all_attribute_keys_array, connection)
35
+ update_query += update_query_version_conditions(version_attribute)
36
+ update_query += update_query_returning
37
+
38
+ #_log.debug("Building update query for #{inventory_collection} of size #{inventory_collection.size}...Complete")
39
+
40
+ update_query
41
+ end
42
+
43
+ private
44
+
45
+ def update_query_beginning(all_attribute_keys_array)
46
+ <<-SQL
47
+ UPDATE #{table_name}
48
+ SET
49
+ #{all_attribute_keys_array.map { |key| build_update_set_cols(key) }.join(",")}
50
+ SQL
51
+ end
52
+
53
+ def update_query_reset_version_columns(version_attribute)
54
+ if version_attribute
55
+ attr_partial = version_attribute.to_s.pluralize # Changes resource_version/timestamp to resource_versions/timestamps
56
+ attr_partial_max = "#{attr_partial}_max"
57
+
58
+ # Quote the column names
59
+ attr_partial = quote_column_name(attr_partial)
60
+ attr_partial_max = quote_column_name(attr_partial_max)
61
+
62
+ # Full row update will reset the partial update timestamps
63
+ <<-SQL
64
+ , #{attr_partial} = '{}', #{attr_partial_max} = NULL
65
+ SQL
66
+ else
67
+ ""
68
+ end
69
+ end
70
+
71
+ def update_query_from_values(hashes, all_attribute_keys_array, connection)
72
+ values = hashes.map! do |hash|
73
+ "(#{all_attribute_keys_array.map { |x| quote(connection, hash[x], x, true) }.join(",")})"
74
+ end.join(",")
75
+
76
+ <<-SQL
77
+ FROM (
78
+ VALUES
79
+ #{values}
80
+ ) AS updated_values (#{all_attribute_keys_array.map { |x| quote_column_name(x) }.join(",")})
81
+ WHERE updated_values.id = #{q_table_name}.id
82
+ SQL
83
+ end
84
+
85
+ def update_query_version_conditions(version_attribute)
86
+ if version_attribute
87
+ # This conditional will avoid rewriting new data by old data. But we want it only when version_attribute is
88
+ # a part of the data, since for the fake records, we just want to update ems_ref.
89
+ attr_partial = version_attribute.to_s.pluralize # Changes resource_version/timestamp to resource_versions/timestamps
90
+ attr_partial_max = "#{attr_partial}_max"
91
+
92
+ # Quote the column names
93
+ attr_full = quote_column_name(version_attribute)
94
+ attr_partial_max = quote_column_name(attr_partial_max)
95
+
96
+ <<-SQL
97
+ AND (
98
+ updated_values.#{attr_full} IS NULL OR (
99
+ (#{q_table_name}.#{attr_full} IS NULL OR updated_values.#{attr_full} > #{q_table_name}.#{attr_full}) AND
100
+ (#{q_table_name}.#{attr_partial_max} IS NULL OR updated_values.#{attr_full} >= #{q_table_name}.#{attr_partial_max})
101
+ )
102
+ )
103
+ SQL
104
+ else
105
+ ""
106
+ end
107
+ end
108
+
109
+ def update_query_returning
110
+ if inventory_collection.parallel_safe?
111
+ <<-SQL
112
+ RETURNING updated_values.#{quote_column_name("id")}, #{unique_index_columns.map { |x| "updated_values.#{quote_column_name(x)}" }.join(",")}
113
+ SQL
114
+ else
115
+ ""
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,196 @@
1
+ module InventoryRefresh::SaveCollection
2
+ module Saver
3
+ module SqlHelperUpsert
4
+ # Builds ON CONFLICT UPDATE updating branch for one column identified by the passed key
5
+ #
6
+ # @param key [Symbol] key that is column name
7
+ # @return [String] SQL clause for upserting one column
8
+ def build_insert_set_cols(key)
9
+ "#{quote_column_name(key)} = EXCLUDED.#{quote_column_name(key)}"
10
+ end
11
+
12
+ # @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
13
+ # @param hashes [Array<Hash>] data used for building a batch insert sql query
14
+ # @param mode [Symbol] Mode for saving, allowed values are [:full, :partial], :full is when we save all
15
+ # columns of a row, :partial is when we save only few columns, so a partial row.
16
+ # @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values
17
+ # are :do_update, :do_nothing, nil
18
+ def build_insert_query(all_attribute_keys, hashes, on_conflict: nil, mode:, column_name: nil)
19
+ #_log.debug("Building insert query for #{inventory_collection} of size #{inventory_collection.size}...")
20
+
21
+ # Cache the connection for the batch
22
+ connection = get_connection
23
+ # Ignore versioning columns that are set separately
24
+ ignore_cols = mode == :partial ? [:resource_timestamp, :resource_version] : []
25
+ # Make sure we don't send a primary_key for INSERT in any form, it could break PG sequencer
26
+ all_attribute_keys_array = all_attribute_keys.to_a - [primary_key.to_s, primary_key.to_sym] - ignore_cols
27
+
28
+ insert_query = insert_query_insert_values(hashes, all_attribute_keys_array, connection)
29
+ insert_query += insert_query_on_conflict_behavior(all_attribute_keys, on_conflict, mode, ignore_cols, column_name)
30
+ insert_query += insert_query_returning
31
+
32
+ #_log.debug("Building insert query for #{inventory_collection} of size #{inventory_collection.size}...Complete")
33
+
34
+ insert_query
35
+ end
36
+
37
+ private
38
+
39
+ def insert_query_insert_values(hashes, all_attribute_keys_array, connection)
40
+ values = hashes.map do |hash|
41
+ "(#{all_attribute_keys_array.map { |x| quote(connection, hash[x], x) }.join(",")})"
42
+ end.join(",")
43
+
44
+ col_names = all_attribute_keys_array.map { |x| quote_column_name(x) }.join(",")
45
+
46
+ <<-SQL
47
+ INSERT INTO #{q_table_name} (#{col_names})
48
+ VALUES
49
+ #{values}
50
+ SQL
51
+ end
52
+
53
+ def insert_query_on_conflict_behavior(all_attribute_keys, on_conflict, mode, ignore_cols, column_name)
54
+ return "" unless inventory_collection.parallel_safe?
55
+
56
+ insert_query_on_conflict = insert_query_on_conflict_do(on_conflict)
57
+ if on_conflict == :do_update
58
+ insert_query_on_conflict += insert_query_on_conflict_update(all_attribute_keys, mode, ignore_cols, column_name)
59
+ end
60
+ insert_query_on_conflict
61
+ end
62
+
63
+ def insert_query_on_conflict_do(on_conflict)
64
+ if on_conflict == :do_nothing
65
+ <<-SQL
66
+ ON CONFLICT DO NOTHING
67
+ SQL
68
+ elsif on_conflict == :do_update
69
+ index_where_condition = unique_index_for(unique_index_keys).where
70
+ where_to_sql = index_where_condition ? "WHERE #{index_where_condition}" : ""
71
+
72
+ <<-SQL
73
+ ON CONFLICT (#{unique_index_columns.map { |x| quote_column_name(x) }.join(",")}) #{where_to_sql}
74
+ DO
75
+ UPDATE
76
+ SQL
77
+ end
78
+ end
79
+
80
+ def insert_query_on_conflict_update(all_attribute_keys, mode, ignore_cols, column_name)
81
+ if mode == :partial
82
+ ignore_cols += [:resource_timestamps, :resource_timestamps_max, :resource_versions, :resource_versions_max]
83
+ end
84
+ ignore_cols += [:created_on, :created_at] # Lets not change created for the update clause
85
+
86
+ # If there is not version attribute, the update part will be ignored below
87
+ version_attribute = if supports_remote_data_timestamp?(all_attribute_keys)
88
+ :resource_timestamp
89
+ elsif supports_remote_data_version?(all_attribute_keys)
90
+ :resource_version
91
+ end
92
+
93
+ # TODO(lsmola) should we add :deleted => false to the update clause? That should handle a reconnect, without a
94
+ # a need to list :deleted anywhere in the parser. We just need to check that a model has the :deleted attribute
95
+ query = <<-SQL
96
+ SET #{(all_attribute_keys - ignore_cols).map { |key| build_insert_set_cols(key) }.join(", ")}
97
+ SQL
98
+
99
+ # This conditional will make sure we are avoiding rewriting new data by old data. But we want it only when
100
+ # remote_data_timestamp is a part of the data.
101
+ query += insert_query_on_conflict_update_mode(mode, version_attribute, column_name) if version_attribute
102
+ query
103
+ end
104
+
105
+ def insert_query_on_conflict_update_mode(mode, version_attribute, column_name)
106
+ if mode == :full
107
+ full_update_condition(version_attribute)
108
+ elsif mode == :partial
109
+ raise "Column name must be provided" unless column_name
110
+ partial_update_condition(version_attribute, column_name)
111
+ end
112
+ end
113
+
114
+ def full_update_condition(attr_full)
115
+ attr_partial = attr_full.to_s.pluralize # Changes resource_version/timestamp to resource_versions/timestamps
116
+ attr_partial_max = "#{attr_partial}_max"
117
+
118
+ # Quote the column names
119
+ attr_full = quote_column_name(attr_full)
120
+ attr_partial = quote_column_name(attr_partial)
121
+ attr_partial_max = quote_column_name(attr_partial_max)
122
+
123
+ <<-SQL
124
+ , #{attr_partial} = '{}', #{attr_partial_max} = NULL
125
+
126
+ WHERE EXCLUDED.#{attr_full} IS NULL OR (
127
+ (#{q_table_name}.#{attr_full} IS NULL OR EXCLUDED.#{attr_full} > #{q_table_name}.#{attr_full}) AND
128
+ (#{q_table_name}.#{attr_partial_max} IS NULL OR EXCLUDED.#{attr_full} >= #{q_table_name}.#{attr_partial_max})
129
+ )
130
+ SQL
131
+ end
132
+
133
+ def partial_update_condition(attr_full, column_name)
134
+ attr_partial = attr_full.to_s.pluralize # Changes resource_version/timestamp to resource_versions/timestamps
135
+ attr_partial_max = "#{attr_partial}_max"
136
+ cast = if attr_full == :resource_timestamp
137
+ "timestamp"
138
+ elsif attr_full == :resource_version
139
+ "integer"
140
+ end
141
+
142
+ # Quote the column names
143
+ attr_full = quote_column_name(attr_full)
144
+ attr_partial = quote_column_name(attr_partial)
145
+ attr_partial_max = quote_column_name(attr_partial_max)
146
+ column_name = get_connection.quote_string(column_name.to_s)
147
+ q_table_name = get_connection.quote_table_name(table_name)
148
+
149
+ <<-SQL
150
+ #{insert_query_set_jsonb_version(cast, attr_partial, attr_partial_max, column_name)}
151
+ , #{attr_partial_max} = greatest(#{q_table_name}.#{attr_partial_max}::#{cast}, EXCLUDED.#{attr_partial_max}::#{cast})
152
+ WHERE EXCLUDED.#{attr_partial_max} IS NULL OR (
153
+ (#{q_table_name}.#{attr_full} IS NULL OR EXCLUDED.#{attr_partial_max} > #{q_table_name}.#{attr_full}) AND (
154
+ (#{q_table_name}.#{attr_partial}->>'#{column_name}')::#{cast} IS NULL OR
155
+ EXCLUDED.#{attr_partial_max}::#{cast} > (#{q_table_name}.#{attr_partial}->>'#{column_name}')::#{cast}
156
+ )
157
+ )
158
+ SQL
159
+ end
160
+
161
+ def insert_query_set_jsonb_version(cast, attr_partial, attr_partial_max, column_name)
162
+ if cast == "integer"
163
+ # If we have integer value, we don't want to encapsulate the value in ""
164
+ <<-SQL
165
+ , #{attr_partial} = #{q_table_name}.#{attr_partial} || ('{"#{column_name}": ' || EXCLUDED.#{attr_partial_max}::#{cast} || '}')::jsonb
166
+ SQL
167
+ else
168
+ <<-SQL
169
+ , #{attr_partial} = #{q_table_name}.#{attr_partial} || ('{"#{column_name}": "' || EXCLUDED.#{attr_partial_max}::#{cast} || '"}')::jsonb
170
+ SQL
171
+ end
172
+ end
173
+
174
+ def insert_query_returning
175
+ <<-SQL
176
+ RETURNING "id",#{unique_index_columns.map { |x| quote_column_name(x) }.join(",")}
177
+ #{insert_query_returning_timestamps}
178
+ SQL
179
+ end
180
+
181
+ def insert_query_returning_timestamps
182
+ if inventory_collection.parallel_safe?
183
+ # For upsert, we'll return also created and updated timestamps, so we can recognize what was created and what
184
+ # updated
185
+ if inventory_collection.internal_timestamp_columns.present?
186
+ <<-SQL
187
+ , #{inventory_collection.internal_timestamp_columns.map { |x| quote_column_name(x) }.join(",")}
188
+ SQL
189
+ end
190
+ else
191
+ ""
192
+ end
193
+ end
194
+ end
195
+ end
196
+ end