inventory_refresh 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +7 -0
  2. data/.codeclimate.yml +47 -0
  3. data/.gitignore +13 -0
  4. data/.rspec +4 -0
  5. data/.rspec_ci +4 -0
  6. data/.rubocop.yml +4 -0
  7. data/.rubocop_cc.yml +5 -0
  8. data/.rubocop_local.yml +2 -0
  9. data/.travis.yml +12 -0
  10. data/.yamllint +12 -0
  11. data/CHANGELOG.md +0 -0
  12. data/Gemfile +6 -0
  13. data/LICENSE +202 -0
  14. data/README.md +35 -0
  15. data/Rakefile +47 -0
  16. data/bin/console +14 -0
  17. data/bin/setup +8 -0
  18. data/inventory_refresh.gemspec +34 -0
  19. data/lib/inventory_refresh.rb +11 -0
  20. data/lib/inventory_refresh/application_record_iterator.rb +56 -0
  21. data/lib/inventory_refresh/application_record_reference.rb +15 -0
  22. data/lib/inventory_refresh/graph.rb +157 -0
  23. data/lib/inventory_refresh/graph/topological_sort.rb +66 -0
  24. data/lib/inventory_refresh/inventory_collection.rb +1175 -0
  25. data/lib/inventory_refresh/inventory_collection/data_storage.rb +178 -0
  26. data/lib/inventory_refresh/inventory_collection/graph.rb +170 -0
  27. data/lib/inventory_refresh/inventory_collection/index/proxy.rb +230 -0
  28. data/lib/inventory_refresh/inventory_collection/index/type/base.rb +80 -0
  29. data/lib/inventory_refresh/inventory_collection/index/type/data.rb +26 -0
  30. data/lib/inventory_refresh/inventory_collection/index/type/local_db.rb +286 -0
  31. data/lib/inventory_refresh/inventory_collection/index/type/skeletal.rb +116 -0
  32. data/lib/inventory_refresh/inventory_collection/reference.rb +96 -0
  33. data/lib/inventory_refresh/inventory_collection/references_storage.rb +106 -0
  34. data/lib/inventory_refresh/inventory_collection/scanner.rb +117 -0
  35. data/lib/inventory_refresh/inventory_collection/serialization.rb +140 -0
  36. data/lib/inventory_refresh/inventory_object.rb +303 -0
  37. data/lib/inventory_refresh/inventory_object_lazy.rb +151 -0
  38. data/lib/inventory_refresh/save_collection/base.rb +38 -0
  39. data/lib/inventory_refresh/save_collection/recursive.rb +52 -0
  40. data/lib/inventory_refresh/save_collection/saver/base.rb +390 -0
  41. data/lib/inventory_refresh/save_collection/saver/batch.rb +17 -0
  42. data/lib/inventory_refresh/save_collection/saver/concurrent_safe.rb +71 -0
  43. data/lib/inventory_refresh/save_collection/saver/concurrent_safe_batch.rb +632 -0
  44. data/lib/inventory_refresh/save_collection/saver/default.rb +57 -0
  45. data/lib/inventory_refresh/save_collection/saver/sql_helper.rb +85 -0
  46. data/lib/inventory_refresh/save_collection/saver/sql_helper_update.rb +120 -0
  47. data/lib/inventory_refresh/save_collection/saver/sql_helper_upsert.rb +196 -0
  48. data/lib/inventory_refresh/save_collection/topological_sort.rb +38 -0
  49. data/lib/inventory_refresh/save_inventory.rb +38 -0
  50. data/lib/inventory_refresh/target.rb +73 -0
  51. data/lib/inventory_refresh/target_collection.rb +80 -0
  52. data/lib/inventory_refresh/version.rb +3 -0
  53. data/tools/ci/create_db_user.sh +3 -0
  54. metadata +207 -0
@@ -0,0 +1,57 @@
1
+ require "inventory_refresh/save_collection/saver/base"
2
+
3
+ module InventoryRefresh::SaveCollection
4
+ module Saver
5
+ class Default < InventoryRefresh::SaveCollection::Saver::Base
6
+ private
7
+
8
+ # Updates the passed record with hash data and stores primary key value into inventory_object.
9
+ #
10
+ # @param record [ApplicationRecord] record we want to update in DB
11
+ # @param hash [Hash] data we want to update the record with
12
+ # @param inventory_object [InventoryRefresh::InventoryObject] InventoryObject instance where we will store primary
13
+ # key value
14
+ def update_record!(record, hash, inventory_object)
15
+ record.assign_attributes(hash.except(:id))
16
+ if !inventory_collection.check_changed? || record.changed?
17
+ record.save
18
+ inventory_collection.store_updated_records(record)
19
+ end
20
+
21
+ inventory_object.id = record.id
22
+ end
23
+
24
+ # Creates a new record in the DB using the passed hash data
25
+ #
26
+ # @param hash [Hash] hash with data we want to persist to DB
27
+ # @param inventory_object [InventoryRefresh::InventoryObject] InventoryObject instance where we will store primary
28
+ # key value
29
+ def create_record!(hash, inventory_object)
30
+ record = inventory_collection.model_class.create!(hash.except(:id))
31
+ inventory_collection.store_created_records(record)
32
+
33
+ inventory_object.id = record.id
34
+ end
35
+
36
+ # Asserts we do not have duplicate records in the DB. If the record is duplicate we will destroy it.
37
+ #
38
+ # @param record [ApplicationRecord] record we want to update in DB
39
+ # @param index [String] manager_uuid of the record
40
+ # @return [Boolean] false if the record is duplicate
41
+ def assert_unique_record(record, index)
42
+ # TODO(lsmola) can go away once we indexed our DB with unique indexes
43
+ if unique_db_indexes.include?(index) # Include on Set is O(1)
44
+ # We have a duplicate in the DB, destroy it. A find_each method does automatically .order(:id => :asc)
45
+ # so we always keep the oldest record in the case of duplicates.
46
+ #_log.warn("A duplicate record was detected and destroyed, inventory_collection: "\
47
+ # "'#{inventory_collection}', record: '#{record}', duplicate_index: '#{index}'")
48
+ record.destroy
49
+ return false
50
+ else
51
+ unique_db_indexes << index
52
+ end
53
+ true
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,85 @@
1
+ require "inventory_refresh/save_collection/saver/sql_helper_update"
2
+ require "inventory_refresh/save_collection/saver/sql_helper_upsert"
3
+ require "active_support/concern"
4
+
5
+ module InventoryRefresh::SaveCollection
6
+ module Saver
7
+ module SqlHelper
8
+ # TODO(lsmola) all below methods should be rewritten to arel, but we need to first extend arel to be able to do
9
+ # this
10
+
11
+ extend ActiveSupport::Concern
12
+
13
+ included do
14
+ include SqlHelperUpsert
15
+ include SqlHelperUpdate
16
+ end
17
+
18
+ # Returns quoted column name
19
+ # @param key [Symbol] key that is column name
20
+ # @returns [String] quoted column name
21
+ def quote_column_name(key)
22
+ get_connection.quote_column_name(key)
23
+ end
24
+
25
+ # @return [ActiveRecord::ConnectionAdapters::AbstractAdapter] ActiveRecord connection
26
+ def get_connection
27
+ ActiveRecord::Base.connection
28
+ end
29
+
30
+ # Builds a multiselection conditions like (table1.a = a1 AND table2.b = b1) OR (table1.a = a2 AND table2.b = b2)
31
+ #
32
+ # @param hashes [Array<Hash>] data we want to use for the query
33
+ # @return [String] condition usable in .where of an ActiveRecord relation
34
+ def build_multi_selection_query(hashes)
35
+ inventory_collection.build_multi_selection_condition(hashes, unique_index_columns)
36
+ end
37
+
38
+ # Quotes a value. For update query, the value also needs to be explicitly casted, which we can do by
39
+ # type_cast_for_pg param set to true.
40
+ #
41
+ # @param connection [ActiveRecord::ConnectionAdapters::AbstractAdapter] ActiveRecord connection
42
+ # @param value [Object] value we want to quote
43
+ # @param name [Symbol] name of the column
44
+ # @param type_cast_for_pg [Boolean] true if we want to also cast the quoted value
45
+ # @return [String] quoted and based on type_cast_for_pg param also casted value
46
+ def quote(connection, value, name = nil, type_cast_for_pg = nil)
47
+ # TODO(lsmola) needed only because UPDATE FROM VALUES needs a specific PG typecasting, remove when fixed in PG
48
+ if type_cast_for_pg
49
+ quote_and_pg_type_cast(connection, value, name)
50
+ else
51
+ connection.quote(value)
52
+ end
53
+ rescue TypeError => e
54
+ #_log.error("Can't quote value: #{value}, of :#{name} and #{inventory_collection}")
55
+ raise e
56
+ end
57
+
58
+ # Quotes and type casts the value.
59
+ #
60
+ # @param connection [ActiveRecord::ConnectionAdapters::AbstractAdapter] ActiveRecord connection
61
+ # @param value [Object] value we want to quote
62
+ # @param name [Symbol] name of the column
63
+ # @return [String] quoted and casted value
64
+ def quote_and_pg_type_cast(connection, value, name)
65
+ pg_type_cast(
66
+ connection.quote(value),
67
+ pg_types[name]
68
+ )
69
+ end
70
+
71
+ # Returns a type casted value in format needed by PostgreSQL
72
+ #
73
+ # @param value [Object] value we want to quote
74
+ # @param sql_type [String] PostgreSQL column type
75
+ # @return [String] type casted value in format needed by PostgreSQL
76
+ def pg_type_cast(value, sql_type)
77
+ if sql_type.nil?
78
+ value
79
+ else
80
+ "#{value}::#{sql_type}"
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,120 @@
1
+ module InventoryRefresh::SaveCollection
2
+ module Saver
3
+ module SqlHelperUpdate
4
+ # Builds update clause for one column identified by the passed key
5
+ #
6
+ # @param key [Symbol] key that is column name
7
+ # @return [String] SQL clause for updating one column
8
+ def build_update_set_cols(key)
9
+ "#{quote_column_name(key)} = updated_values.#{quote_column_name(key)}"
10
+ end
11
+
12
+ # Build batch update query
13
+ #
14
+ # @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
15
+ # @param hashes [Array<Hash>] data used for building a batch update sql query
16
+ def build_update_query(all_attribute_keys, hashes)
17
+ #_log.debug("Building update query for #{inventory_collection} of size #{inventory_collection.size}...")
18
+ # Cache the connection for the batch
19
+ connection = get_connection
20
+
21
+ # We want to ignore create timestamps when updating
22
+ all_attribute_keys_array = all_attribute_keys.to_a.delete_if { |x| %i(created_at created_on).include?(x) }
23
+ all_attribute_keys_array << :id
24
+
25
+ # If there is not version attribute, the version conditions will be ignored
26
+ version_attribute = if inventory_collection.parallel_safe? && supports_remote_data_timestamp?(all_attribute_keys)
27
+ :resource_timestamp
28
+ elsif inventory_collection.parallel_safe? && supports_remote_data_version?(all_attribute_keys)
29
+ :resource_version
30
+ end
31
+
32
+ update_query = update_query_beginning(all_attribute_keys_array)
33
+ update_query += update_query_reset_version_columns(version_attribute)
34
+ update_query += update_query_from_values(hashes, all_attribute_keys_array, connection)
35
+ update_query += update_query_version_conditions(version_attribute)
36
+ update_query += update_query_returning
37
+
38
+ #_log.debug("Building update query for #{inventory_collection} of size #{inventory_collection.size}...Complete")
39
+
40
+ update_query
41
+ end
42
+
43
+ private
44
+
45
+ def update_query_beginning(all_attribute_keys_array)
46
+ <<-SQL
47
+ UPDATE #{table_name}
48
+ SET
49
+ #{all_attribute_keys_array.map { |key| build_update_set_cols(key) }.join(",")}
50
+ SQL
51
+ end
52
+
53
+ def update_query_reset_version_columns(version_attribute)
54
+ if version_attribute
55
+ attr_partial = version_attribute.to_s.pluralize # Changes resource_version/timestamp to resource_versions/timestamps
56
+ attr_partial_max = "#{attr_partial}_max"
57
+
58
+ # Quote the column names
59
+ attr_partial = quote_column_name(attr_partial)
60
+ attr_partial_max = quote_column_name(attr_partial_max)
61
+
62
+ # Full row update will reset the partial update timestamps
63
+ <<-SQL
64
+ , #{attr_partial} = '{}', #{attr_partial_max} = NULL
65
+ SQL
66
+ else
67
+ ""
68
+ end
69
+ end
70
+
71
+ def update_query_from_values(hashes, all_attribute_keys_array, connection)
72
+ values = hashes.map! do |hash|
73
+ "(#{all_attribute_keys_array.map { |x| quote(connection, hash[x], x, true) }.join(",")})"
74
+ end.join(",")
75
+
76
+ <<-SQL
77
+ FROM (
78
+ VALUES
79
+ #{values}
80
+ ) AS updated_values (#{all_attribute_keys_array.map { |x| quote_column_name(x) }.join(",")})
81
+ WHERE updated_values.id = #{q_table_name}.id
82
+ SQL
83
+ end
84
+
85
+ def update_query_version_conditions(version_attribute)
86
+ if version_attribute
87
+ # This conditional will avoid rewriting new data by old data. But we want it only when version_attribute is
88
+ # a part of the data, since for the fake records, we just want to update ems_ref.
89
+ attr_partial = version_attribute.to_s.pluralize # Changes resource_version/timestamp to resource_versions/timestamps
90
+ attr_partial_max = "#{attr_partial}_max"
91
+
92
+ # Quote the column names
93
+ attr_full = quote_column_name(version_attribute)
94
+ attr_partial_max = quote_column_name(attr_partial_max)
95
+
96
+ <<-SQL
97
+ AND (
98
+ updated_values.#{attr_full} IS NULL OR (
99
+ (#{q_table_name}.#{attr_full} IS NULL OR updated_values.#{attr_full} > #{q_table_name}.#{attr_full}) AND
100
+ (#{q_table_name}.#{attr_partial_max} IS NULL OR updated_values.#{attr_full} >= #{q_table_name}.#{attr_partial_max})
101
+ )
102
+ )
103
+ SQL
104
+ else
105
+ ""
106
+ end
107
+ end
108
+
109
+ def update_query_returning
110
+ if inventory_collection.parallel_safe?
111
+ <<-SQL
112
+ RETURNING updated_values.#{quote_column_name("id")}, #{unique_index_columns.map { |x| "updated_values.#{quote_column_name(x)}" }.join(",")}
113
+ SQL
114
+ else
115
+ ""
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,196 @@
1
+ module InventoryRefresh::SaveCollection
2
+ module Saver
3
+ module SqlHelperUpsert
4
+ # Builds ON CONFLICT UPDATE updating branch for one column identified by the passed key
5
+ #
6
+ # @param key [Symbol] key that is column name
7
+ # @return [String] SQL clause for upserting one column
8
+ def build_insert_set_cols(key)
9
+ "#{quote_column_name(key)} = EXCLUDED.#{quote_column_name(key)}"
10
+ end
11
+
12
+ # @param all_attribute_keys [Array<Symbol>] Array of all columns we will be saving into each table row
13
+ # @param hashes [Array<Hash>] data used for building a batch insert sql query
14
+ # @param mode [Symbol] Mode for saving, allowed values are [:full, :partial], :full is when we save all
15
+ # columns of a row, :partial is when we save only few columns, so a partial row.
16
+ # @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values
17
+ # are :do_update, :do_nothing, nil
18
+ def build_insert_query(all_attribute_keys, hashes, on_conflict: nil, mode:, column_name: nil)
19
+ #_log.debug("Building insert query for #{inventory_collection} of size #{inventory_collection.size}...")
20
+
21
+ # Cache the connection for the batch
22
+ connection = get_connection
23
+ # Ignore versioning columns that are set separately
24
+ ignore_cols = mode == :partial ? [:resource_timestamp, :resource_version] : []
25
+ # Make sure we don't send a primary_key for INSERT in any form, it could break PG sequencer
26
+ all_attribute_keys_array = all_attribute_keys.to_a - [primary_key.to_s, primary_key.to_sym] - ignore_cols
27
+
28
+ insert_query = insert_query_insert_values(hashes, all_attribute_keys_array, connection)
29
+ insert_query += insert_query_on_conflict_behavior(all_attribute_keys, on_conflict, mode, ignore_cols, column_name)
30
+ insert_query += insert_query_returning
31
+
32
+ #_log.debug("Building insert query for #{inventory_collection} of size #{inventory_collection.size}...Complete")
33
+
34
+ insert_query
35
+ end
36
+
37
+ private
38
+
39
+ def insert_query_insert_values(hashes, all_attribute_keys_array, connection)
40
+ values = hashes.map do |hash|
41
+ "(#{all_attribute_keys_array.map { |x| quote(connection, hash[x], x) }.join(",")})"
42
+ end.join(",")
43
+
44
+ col_names = all_attribute_keys_array.map { |x| quote_column_name(x) }.join(",")
45
+
46
+ <<-SQL
47
+ INSERT INTO #{q_table_name} (#{col_names})
48
+ VALUES
49
+ #{values}
50
+ SQL
51
+ end
52
+
53
+ def insert_query_on_conflict_behavior(all_attribute_keys, on_conflict, mode, ignore_cols, column_name)
54
+ return "" unless inventory_collection.parallel_safe?
55
+
56
+ insert_query_on_conflict = insert_query_on_conflict_do(on_conflict)
57
+ if on_conflict == :do_update
58
+ insert_query_on_conflict += insert_query_on_conflict_update(all_attribute_keys, mode, ignore_cols, column_name)
59
+ end
60
+ insert_query_on_conflict
61
+ end
62
+
63
+ def insert_query_on_conflict_do(on_conflict)
64
+ if on_conflict == :do_nothing
65
+ <<-SQL
66
+ ON CONFLICT DO NOTHING
67
+ SQL
68
+ elsif on_conflict == :do_update
69
+ index_where_condition = unique_index_for(unique_index_keys).where
70
+ where_to_sql = index_where_condition ? "WHERE #{index_where_condition}" : ""
71
+
72
+ <<-SQL
73
+ ON CONFLICT (#{unique_index_columns.map { |x| quote_column_name(x) }.join(",")}) #{where_to_sql}
74
+ DO
75
+ UPDATE
76
+ SQL
77
+ end
78
+ end
79
+
80
+ def insert_query_on_conflict_update(all_attribute_keys, mode, ignore_cols, column_name)
81
+ if mode == :partial
82
+ ignore_cols += [:resource_timestamps, :resource_timestamps_max, :resource_versions, :resource_versions_max]
83
+ end
84
+ ignore_cols += [:created_on, :created_at] # Lets not change created for the update clause
85
+
86
+ # If there is not version attribute, the update part will be ignored below
87
+ version_attribute = if supports_remote_data_timestamp?(all_attribute_keys)
88
+ :resource_timestamp
89
+ elsif supports_remote_data_version?(all_attribute_keys)
90
+ :resource_version
91
+ end
92
+
93
+ # TODO(lsmola) should we add :deleted => false to the update clause? That should handle a reconnect, without a
94
+ # a need to list :deleted anywhere in the parser. We just need to check that a model has the :deleted attribute
95
+ query = <<-SQL
96
+ SET #{(all_attribute_keys - ignore_cols).map { |key| build_insert_set_cols(key) }.join(", ")}
97
+ SQL
98
+
99
+ # This conditional will make sure we are avoiding rewriting new data by old data. But we want it only when
100
+ # remote_data_timestamp is a part of the data.
101
+ query += insert_query_on_conflict_update_mode(mode, version_attribute, column_name) if version_attribute
102
+ query
103
+ end
104
+
105
+ def insert_query_on_conflict_update_mode(mode, version_attribute, column_name)
106
+ if mode == :full
107
+ full_update_condition(version_attribute)
108
+ elsif mode == :partial
109
+ raise "Column name must be provided" unless column_name
110
+ partial_update_condition(version_attribute, column_name)
111
+ end
112
+ end
113
+
114
+ def full_update_condition(attr_full)
115
+ attr_partial = attr_full.to_s.pluralize # Changes resource_version/timestamp to resource_versions/timestamps
116
+ attr_partial_max = "#{attr_partial}_max"
117
+
118
+ # Quote the column names
119
+ attr_full = quote_column_name(attr_full)
120
+ attr_partial = quote_column_name(attr_partial)
121
+ attr_partial_max = quote_column_name(attr_partial_max)
122
+
123
+ <<-SQL
124
+ , #{attr_partial} = '{}', #{attr_partial_max} = NULL
125
+
126
+ WHERE EXCLUDED.#{attr_full} IS NULL OR (
127
+ (#{q_table_name}.#{attr_full} IS NULL OR EXCLUDED.#{attr_full} > #{q_table_name}.#{attr_full}) AND
128
+ (#{q_table_name}.#{attr_partial_max} IS NULL OR EXCLUDED.#{attr_full} >= #{q_table_name}.#{attr_partial_max})
129
+ )
130
+ SQL
131
+ end
132
+
133
+ def partial_update_condition(attr_full, column_name)
134
+ attr_partial = attr_full.to_s.pluralize # Changes resource_version/timestamp to resource_versions/timestamps
135
+ attr_partial_max = "#{attr_partial}_max"
136
+ cast = if attr_full == :resource_timestamp
137
+ "timestamp"
138
+ elsif attr_full == :resource_version
139
+ "integer"
140
+ end
141
+
142
+ # Quote the column names
143
+ attr_full = quote_column_name(attr_full)
144
+ attr_partial = quote_column_name(attr_partial)
145
+ attr_partial_max = quote_column_name(attr_partial_max)
146
+ column_name = get_connection.quote_string(column_name.to_s)
147
+ q_table_name = get_connection.quote_table_name(table_name)
148
+
149
+ <<-SQL
150
+ #{insert_query_set_jsonb_version(cast, attr_partial, attr_partial_max, column_name)}
151
+ , #{attr_partial_max} = greatest(#{q_table_name}.#{attr_partial_max}::#{cast}, EXCLUDED.#{attr_partial_max}::#{cast})
152
+ WHERE EXCLUDED.#{attr_partial_max} IS NULL OR (
153
+ (#{q_table_name}.#{attr_full} IS NULL OR EXCLUDED.#{attr_partial_max} > #{q_table_name}.#{attr_full}) AND (
154
+ (#{q_table_name}.#{attr_partial}->>'#{column_name}')::#{cast} IS NULL OR
155
+ EXCLUDED.#{attr_partial_max}::#{cast} > (#{q_table_name}.#{attr_partial}->>'#{column_name}')::#{cast}
156
+ )
157
+ )
158
+ SQL
159
+ end
160
+
161
+ def insert_query_set_jsonb_version(cast, attr_partial, attr_partial_max, column_name)
162
+ if cast == "integer"
163
+ # If we have integer value, we don't want to encapsulate the value in ""
164
+ <<-SQL
165
+ , #{attr_partial} = #{q_table_name}.#{attr_partial} || ('{"#{column_name}": ' || EXCLUDED.#{attr_partial_max}::#{cast} || '}')::jsonb
166
+ SQL
167
+ else
168
+ <<-SQL
169
+ , #{attr_partial} = #{q_table_name}.#{attr_partial} || ('{"#{column_name}": "' || EXCLUDED.#{attr_partial_max}::#{cast} || '"}')::jsonb
170
+ SQL
171
+ end
172
+ end
173
+
174
+ def insert_query_returning
175
+ <<-SQL
176
+ RETURNING "id",#{unique_index_columns.map { |x| quote_column_name(x) }.join(",")}
177
+ #{insert_query_returning_timestamps}
178
+ SQL
179
+ end
180
+
181
+ def insert_query_returning_timestamps
182
+ if inventory_collection.parallel_safe?
183
+ # For upsert, we'll return also created and updated timestamps, so we can recognize what was created and what
184
+ # updated
185
+ if inventory_collection.internal_timestamp_columns.present?
186
+ <<-SQL
187
+ , #{inventory_collection.internal_timestamp_columns.map { |x| quote_column_name(x) }.join(",")}
188
+ SQL
189
+ end
190
+ else
191
+ ""
192
+ end
193
+ end
194
+ end
195
+ end
196
+ end