inventory_refresh 0.3.5 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.codeclimate.yml +25 -30
  3. data/.github/workflows/ci.yaml +58 -0
  4. data/.rubocop.yml +3 -3
  5. data/.rubocop_cc.yml +3 -4
  6. data/.rubocop_local.yml +5 -2
  7. data/.whitesource +3 -0
  8. data/CHANGELOG.md +19 -0
  9. data/Gemfile +10 -4
  10. data/README.md +1 -2
  11. data/Rakefile +2 -2
  12. data/inventory_refresh.gemspec +9 -10
  13. data/lib/inventory_refresh/application_record_iterator.rb +25 -12
  14. data/lib/inventory_refresh/graph/topological_sort.rb +24 -26
  15. data/lib/inventory_refresh/graph.rb +2 -2
  16. data/lib/inventory_refresh/inventory_collection/builder.rb +37 -15
  17. data/lib/inventory_refresh/inventory_collection/data_storage.rb +9 -0
  18. data/lib/inventory_refresh/inventory_collection/helpers/initialize_helper.rb +147 -38
  19. data/lib/inventory_refresh/inventory_collection/helpers/questions_helper.rb +48 -4
  20. data/lib/inventory_refresh/inventory_collection/index/proxy.rb +35 -3
  21. data/lib/inventory_refresh/inventory_collection/index/type/base.rb +8 -0
  22. data/lib/inventory_refresh/inventory_collection/index/type/local_db.rb +2 -0
  23. data/lib/inventory_refresh/inventory_collection/index/type/skeletal.rb +1 -0
  24. data/lib/inventory_refresh/inventory_collection/reference.rb +1 -0
  25. data/lib/inventory_refresh/inventory_collection/references_storage.rb +17 -0
  26. data/lib/inventory_refresh/inventory_collection/scanner.rb +91 -3
  27. data/lib/inventory_refresh/inventory_collection/serialization.rb +16 -10
  28. data/lib/inventory_refresh/inventory_collection.rb +122 -64
  29. data/lib/inventory_refresh/inventory_object.rb +74 -40
  30. data/lib/inventory_refresh/inventory_object_lazy.rb +17 -10
  31. data/lib/inventory_refresh/null_logger.rb +2 -2
  32. data/lib/inventory_refresh/persister.rb +43 -93
  33. data/lib/inventory_refresh/save_collection/base.rb +4 -2
  34. data/lib/inventory_refresh/save_collection/saver/base.rb +114 -15
  35. data/lib/inventory_refresh/save_collection/saver/batch.rb +17 -0
  36. data/lib/inventory_refresh/save_collection/saver/concurrent_safe_batch.rb +129 -51
  37. data/lib/inventory_refresh/save_collection/saver/default.rb +57 -0
  38. data/lib/inventory_refresh/save_collection/saver/partial_upsert_helper.rb +2 -19
  39. data/lib/inventory_refresh/save_collection/saver/retention_helper.rb +68 -3
  40. data/lib/inventory_refresh/save_collection/saver/sql_helper.rb +125 -0
  41. data/lib/inventory_refresh/save_collection/saver/sql_helper_update.rb +10 -6
  42. data/lib/inventory_refresh/save_collection/saver/sql_helper_upsert.rb +28 -16
  43. data/lib/inventory_refresh/save_collection/sweeper.rb +17 -93
  44. data/lib/inventory_refresh/save_collection/topological_sort.rb +5 -5
  45. data/lib/inventory_refresh/save_inventory.rb +5 -12
  46. data/lib/inventory_refresh/target.rb +73 -0
  47. data/lib/inventory_refresh/target_collection.rb +92 -0
  48. data/lib/inventory_refresh/version.rb +1 -1
  49. data/lib/inventory_refresh.rb +2 -0
  50. metadata +42 -39
  51. data/.travis.yml +0 -23
  52. data/lib/inventory_refresh/exception.rb +0 -8
@@ -3,6 +3,28 @@ module InventoryRefresh::SaveCollection
3
3
  module RetentionHelper
4
4
  private
5
5
 
6
+ # Deletes a complement of referenced data
7
+ def delete_complement
8
+ return unless inventory_collection.delete_allowed?
9
+
10
+ all_manager_uuids_size = inventory_collection.all_manager_uuids.size
11
+
12
+ logger.debug("Processing :delete_complement of #{inventory_collection} of size "\
13
+ "#{all_manager_uuids_size}...")
14
+
15
+ query = complement_of!(inventory_collection.all_manager_uuids,
16
+ inventory_collection.all_manager_uuids_scope,
17
+ inventory_collection.all_manager_uuids_timestamp)
18
+
19
+ ids_of_non_active_entities = ActiveRecord::Base.connection.execute(query.to_sql).to_a
20
+ ids_of_non_active_entities.each_slice(10_000) do |batch|
21
+ destroy_records!(batch)
22
+ end
23
+
24
+ logger.debug("Processing :delete_complement of #{inventory_collection} of size "\
25
+ "#{all_manager_uuids_size}, deleted=#{inventory_collection.deleted_records.size}...Complete")
26
+ end
27
+
6
28
  # Applies strategy based on :retention_strategy parameter, or fallbacks to legacy_destroy_records.
7
29
  #
8
30
  # @param records [Array<ApplicationRecord, Hash, Array>] Records we want to delete or archive
@@ -13,9 +35,13 @@ module InventoryRefresh::SaveCollection
13
35
  return false unless inventory_collection.delete_allowed?
14
36
  return if records.blank?
15
37
 
16
- ids = ids_array(records)
17
- inventory_collection.store_deleted_records(ids)
18
- send("#{inventory_collection.retention_strategy}_all_records!", ids)
38
+ if inventory_collection.retention_strategy
39
+ ids = ids_array(records)
40
+ inventory_collection.store_deleted_records(ids)
41
+ send("#{inventory_collection.retention_strategy}_all_records!", ids)
42
+ else
43
+ legacy_destroy_records!(records)
44
+ end
19
45
  end
20
46
 
21
47
  # Convert records to list of ids in format [{:id => X}, {:id => Y}...]
@@ -45,6 +71,45 @@ module InventoryRefresh::SaveCollection
45
71
  def destroy_all_records!(records)
46
72
  inventory_collection.model_class.where(:id => records.map { |x| x[:id] }).delete_all
47
73
  end
74
+
75
+ # Deletes or sof-deletes records. If the model_class supports a custom class delete method, we will use it for
76
+ # batch soft-delete. This is the legacy method doing either ineffective deletion/archiving or requiring a method
77
+ # on a class.
78
+ #
79
+ # @param records [Array<ApplicationRecord, Hash>] Records we want to delete. If we have only hashes, we need to
80
+ # to fetch ApplicationRecord objects from the DB
81
+ def legacy_destroy_records!(records)
82
+ # Is the delete_method rails standard deleting method?
83
+ rails_delete = %i[destroy delete].include?(inventory_collection.delete_method)
84
+ if !rails_delete && inventory_collection.model_class.respond_to?(inventory_collection.delete_method)
85
+ # We have custom delete method defined on a class, that means it supports batch destroy
86
+ inventory_collection.store_deleted_records(records.map { |x| {:id => record_key(x, primary_key)} })
87
+ inventory_collection.model_class.public_send(inventory_collection.delete_method, records.map { |x| record_key(x, primary_key) })
88
+ else
89
+ legacy_ineffective_destroy_records(records)
90
+ end
91
+ end
92
+
93
+ # Very ineffective way of deleting records, but is needed if we want to invoke hooks.
94
+ #
95
+ # @param records [Array<ApplicationRecord, Hash>] Records we want to delete. If we have only hashes, we need to
96
+ # to fetch ApplicationRecord objects from the DB
97
+ def legacy_ineffective_destroy_records(records)
98
+ # We have either standard :destroy and :delete rails method, or custom instance level delete method
99
+ # Note: The standard :destroy and :delete rails method can't be batched because of the hooks and cascade destroy
100
+ ActiveRecord::Base.transaction do
101
+ if pure_sql_records_fetching
102
+ # For pure SQL fetching, we need to get the AR objects again, so we can call destroy
103
+ inventory_collection.model_class.where(:id => records.map { |x| record_key(x, primary_key) }).find_each do |record|
104
+ delete_record!(record)
105
+ end
106
+ else
107
+ records.each do |record|
108
+ delete_record!(record)
109
+ end
110
+ end
111
+ end
112
+ end
48
113
  end
49
114
  end
50
115
  end
@@ -8,6 +8,9 @@ module InventoryRefresh::SaveCollection
8
8
  module SqlHelper
9
9
  include InventoryRefresh::Logging
10
10
 
11
+ # TODO(lsmola) all below methods should be rewritten to arel, but we need to first extend arel to be able to do
12
+ # this
13
+
11
14
  extend ActiveSupport::Concern
12
15
 
13
16
  included do
@@ -80,6 +83,128 @@ module InventoryRefresh::SaveCollection
80
83
  "#{value}::#{sql_type}"
81
84
  end
82
85
  end
86
+
87
+ # Effective way of doing multiselect
88
+ #
89
+ # If we use "(col1, col2) IN [(a,e), (b,f), (b,e)]" it's not great, just with 10k batch, we see
90
+ # *** ActiveRecord::StatementInvalid Exception: PG::StatementTooComplex: ERROR: stack depth limit exceeded
91
+ # HINT: Increase the configuration parameter "max_stack_depth" (currently 2048kB), after ensuring the
92
+ # platform's stack depth limit is adequate.
93
+ #
94
+ # If we use "(col1 = a AND col2 = e) OR (col1 = b AND col2 = f) OR (col1 = b AND col2 = e)" with 10k batch, it
95
+ # takes about 6s and consumes 300MB, with 100k it takes ~1h and consume 3GB in Postgre process
96
+ #
97
+ # The best way seems to be using CTE, where the list of values we want to map is turned to 'table' and we just
98
+ # do RIGHT OUTER JOIN to get the complement of given identifiers. Tested on getting complement of 100k items,
99
+ # using 2 cols (:ems_ref and :uid_ems) from total 150k rows. It takes ~1s and 350MB in Postgre process
100
+ #
101
+ # @param manager_uuids [Array<String>, Array[Hash]] Array with manager_uuids of entities. The keys have to match
102
+ # inventory_collection.manager_ref. We allow passing just array of strings, if manager_ref.size ==1, to
103
+ # spare some memory
104
+ # @return [Arel::SelectManager] Arel for getting complement of uuids. This method modifies the passed
105
+ # manager_uuids to spare some memory
106
+ def complement_of!(manager_uuids, all_manager_uuids_scope, all_manager_uuids_timestamp)
107
+ all_attribute_keys = inventory_collection.manager_ref
108
+ all_attribute_keys_array = inventory_collection.manager_ref.map(&:to_s)
109
+
110
+ active_entities = Arel::Table.new(:active_entities)
111
+ active_entities_cte = Arel::Nodes::As.new(
112
+ active_entities,
113
+ Arel.sql("(#{active_entities_query(all_attribute_keys_array, manager_uuids)})")
114
+ )
115
+
116
+ all_entities = Arel::Table.new(:all_entities)
117
+ all_entities_cte = Arel::Nodes::As.new(
118
+ all_entities,
119
+ Arel.sql("(#{all_entities_query(all_manager_uuids_scope, all_manager_uuids_timestamp).select(:id, *all_attribute_keys_array).to_sql})")
120
+ )
121
+ join_condition = all_attribute_keys.map { |key| active_entities[key].eq(all_entities[key]) }.inject(:and)
122
+ where_condition = all_attribute_keys.map { |key| active_entities[key].eq(nil) }.inject(:and)
123
+
124
+ active_entities
125
+ .project(all_entities[:id])
126
+ .join(all_entities, Arel::Nodes::RightOuterJoin)
127
+ .on(join_condition)
128
+ .with(active_entities_cte, all_entities_cte)
129
+ .where(where_condition)
130
+ end
131
+
132
+ private
133
+
134
+ def all_entities_query(all_manager_uuids_scope, all_manager_uuids_timestamp)
135
+ all_entities_query = inventory_collection.full_collection_for_comparison
136
+ all_entities_query = all_entities_query.active if inventory_collection.retention_strategy == :archive
137
+
138
+ if all_manager_uuids_scope
139
+ scope_keys = all_manager_uuids_scope.first.keys.map { |x| association_to_foreign_key_mapping[x.to_sym] }.map(&:to_s)
140
+ scope = load_scope(all_manager_uuids_scope)
141
+ condition = inventory_collection.build_multi_selection_condition(scope, scope_keys)
142
+ all_entities_query = all_entities_query.where(condition)
143
+ end
144
+
145
+ if all_manager_uuids_timestamp && supports_column?(:resource_timestamp)
146
+ all_manager_uuids_timestamp = Time.parse(all_manager_uuids_timestamp).utc
147
+
148
+ date_field = model_class.arel_table[:resource_timestamp]
149
+ all_entities_query = all_entities_query.where(date_field.lt(all_manager_uuids_timestamp))
150
+ end
151
+ all_entities_query
152
+ end
153
+
154
+ def load_scope(all_manager_uuids_scope)
155
+ scope_keys = all_manager_uuids_scope.first.keys.to_set
156
+
157
+ all_manager_uuids_scope.map do |cond|
158
+ assert_scope!(scope_keys, cond)
159
+
160
+ cond.map do |key, value|
161
+ foreign_key = association_to_foreign_key_mapping[key.to_sym]
162
+ foreign_key_value = value.load&.id
163
+
164
+ assert_foreign_keys!(key, value, foreign_key, foreign_key_value)
165
+
166
+ [foreign_key, foreign_key_value]
167
+ end.to_h
168
+ end
169
+ end
170
+
171
+ def assert_scope!(scope_keys, cond)
172
+ if cond.keys.to_set != scope_keys
173
+ raise "'#{inventory_collection}' expected keys for :all_manager_uuids_scope are #{scope_keys.to_a}, got"\
174
+ " #{cond.keys}. Keys must be the same for all scopes provided."
175
+ end
176
+ end
177
+
178
+ def assert_foreign_keys!(key, value, foreign_key, foreign_key_value)
179
+ unless foreign_key
180
+ raise "'#{inventory_collection}' doesn't have relation :#{key} provided in :all_manager_uuids_scope."
181
+ end
182
+
183
+ unless foreign_key_value
184
+ raise "'#{inventory_collection}' couldn't load scope value :#{key} => #{value.inspect} provided in :all_manager_uuids_scope"
185
+ end
186
+ end
187
+
188
+ def active_entities_query(all_attribute_keys_array, manager_uuids)
189
+ connection = ActiveRecord::Base.connection
190
+
191
+ all_attribute_keys_array_q = all_attribute_keys_array.map { |x| quote_column_name(x) }
192
+ # For Postgre, only first set of values should contain the type casts
193
+ first_value = manager_uuids.shift.to_h
194
+ first_value = "(#{all_attribute_keys_array.map { |x| quote(connection, first_value[x], x, true) }.join(",")})"
195
+
196
+ # Rest of the values, without the type cast
197
+ values = manager_uuids.map! do |hash|
198
+ "(#{all_attribute_keys_array.map { |x| quote(connection, hash[x], x, false) }.join(",")})"
199
+ end.join(",")
200
+
201
+ values = values.blank? ? first_value : [first_value, values].join(",")
202
+
203
+ <<-SQL
204
+ SELECT *
205
+ FROM (VALUES #{values}) AS active_entities_table(#{all_attribute_keys_array_q.join(",")})
206
+ SQL
207
+ end
83
208
  end
84
209
  end
85
210
  end
@@ -23,13 +23,13 @@ module InventoryRefresh::SaveCollection
23
23
  connection = get_connection
24
24
 
25
25
  # We want to ignore create timestamps when updating
26
- all_attribute_keys_array = all_attribute_keys.to_a.delete_if { |x| %i(created_at created_on).include?(x) }
26
+ all_attribute_keys_array = all_attribute_keys.to_a.delete_if { |x| %i[created_at created_on].include?(x) }
27
27
  all_attribute_keys_array << :id
28
28
 
29
29
  # If there is not version attribute, the version conditions will be ignored
30
- version_attribute = if supports_remote_data_timestamp?(all_attribute_keys)
30
+ version_attribute = if inventory_collection.parallel_safe? && supports_remote_data_timestamp?(all_attribute_keys)
31
31
  :resource_timestamp
32
- elsif supports_remote_data_version?(all_attribute_keys)
32
+ elsif inventory_collection.parallel_safe? && supports_remote_data_version?(all_attribute_keys)
33
33
  :resource_counter
34
34
  end
35
35
 
@@ -130,9 +130,13 @@ module InventoryRefresh::SaveCollection
130
130
  end
131
131
 
132
132
  def update_query_returning
133
- <<-SQL
134
- RETURNING updated_values.#{quote_column_name("id")}, #{unique_index_columns.map { |x| "updated_values.#{quote_column_name(x)}" }.join(",")}
135
- SQL
133
+ if inventory_collection.parallel_safe?
134
+ <<-SQL
135
+ RETURNING updated_values.#{quote_column_name("id")}, #{unique_index_columns.map { |x| "updated_values.#{quote_column_name(x)}" }.join(",")}
136
+ SQL
137
+ else
138
+ ""
139
+ end
136
140
  end
137
141
  end
138
142
  end
@@ -19,7 +19,7 @@ module InventoryRefresh::SaveCollection
19
19
  # columns of a row, :partial is when we save only few columns, so a partial row.
20
20
  # @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values
21
21
  # are :do_update, :do_nothing, nil
22
- def build_insert_query(all_attribute_keys, hashes, on_conflict: nil, mode:, column_name: nil)
22
+ def build_insert_query(all_attribute_keys, hashes, mode:, on_conflict: nil, column_name: nil)
23
23
  logger.debug("Building insert query for #{inventory_collection} of size #{inventory_collection.size}...")
24
24
 
25
25
  # Cache the connection for the batch
@@ -55,6 +55,8 @@ module InventoryRefresh::SaveCollection
55
55
  end
56
56
 
57
57
  def insert_query_on_conflict_behavior(all_attribute_keys, on_conflict, mode, ignore_cols, column_name)
58
+ return "" unless inventory_collection.parallel_safe?
59
+
58
60
  insert_query_on_conflict = insert_query_on_conflict_do(on_conflict)
59
61
  if on_conflict == :do_update
60
62
  insert_query_on_conflict += insert_query_on_conflict_update(all_attribute_keys, mode, ignore_cols, column_name)
@@ -63,11 +65,12 @@ module InventoryRefresh::SaveCollection
63
65
  end
64
66
 
65
67
  def insert_query_on_conflict_do(on_conflict)
66
- if on_conflict == :do_nothing
68
+ case on_conflict
69
+ when :do_nothing
67
70
  <<-SQL
68
71
  ON CONFLICT DO NOTHING
69
72
  SQL
70
- elsif on_conflict == :do_update
73
+ when :do_update
71
74
  index_where_condition = unique_index_for(unique_index_keys).where
72
75
  where_to_sql = index_where_condition ? "WHERE #{index_where_condition}" : ""
73
76
 
@@ -92,6 +95,8 @@ module InventoryRefresh::SaveCollection
92
95
  :resource_counter
93
96
  end
94
97
 
98
+ # TODO(lsmola) should we add :deleted => false to the update clause? That should handle a reconnect, without a
99
+ # a need to list :deleted anywhere in the parser. We just need to check that a model has the :deleted attribute
95
100
  query = <<-SQL
96
101
  SET #{(all_attribute_keys - ignore_cols).map { |key| build_insert_set_cols(key) }.join(", ")}
97
102
  SQL
@@ -103,10 +108,12 @@ module InventoryRefresh::SaveCollection
103
108
  end
104
109
 
105
110
  def insert_query_on_conflict_update_mode(mode, version_attribute, column_name)
106
- if mode == :full
111
+ case mode
112
+ when :full
107
113
  full_update_condition(version_attribute)
108
- elsif mode == :partial
114
+ when :partial
109
115
  raise "Column name must be provided" unless column_name
116
+
110
117
  partial_update_condition(version_attribute, column_name)
111
118
  end
112
119
  end
@@ -124,7 +131,7 @@ module InventoryRefresh::SaveCollection
124
131
  , #{attr_partial} = '{}', #{attr_partial_max} = NULL
125
132
 
126
133
  WHERE EXCLUDED.#{attr_full} IS NULL OR (
127
- (#{q_table_name}.#{attr_full} IS NULL OR EXCLUDED.#{attr_full} >= #{q_table_name}.#{attr_full}) AND
134
+ (#{q_table_name}.#{attr_full} IS NULL OR EXCLUDED.#{attr_full} > #{q_table_name}.#{attr_full}) AND
128
135
  (#{q_table_name}.#{attr_partial_max} IS NULL OR EXCLUDED.#{attr_full} >= #{q_table_name}.#{attr_partial_max})
129
136
  )
130
137
  SQL
@@ -133,9 +140,10 @@ module InventoryRefresh::SaveCollection
133
140
  def partial_update_condition(attr_full, column_name)
134
141
  attr_partial = attr_full.to_s.pluralize # Changes resource_counter/timestamp to resource_counters/timestamps
135
142
  attr_partial_max = "#{attr_partial}_max"
136
- cast = if attr_full == :resource_timestamp
143
+ cast = case attr_full
144
+ when :resource_timestamp
137
145
  "timestamp"
138
- elsif attr_full == :resource_counter
146
+ when :resource_counter
139
147
  "integer"
140
148
  end
141
149
 
@@ -150,9 +158,9 @@ module InventoryRefresh::SaveCollection
150
158
  #{insert_query_set_jsonb_version(cast, attr_partial, attr_partial_max, column_name)}
151
159
  , #{attr_partial_max} = greatest(#{q_table_name}.#{attr_partial_max}::#{cast}, EXCLUDED.#{attr_partial_max}::#{cast})
152
160
  WHERE EXCLUDED.#{attr_partial_max} IS NULL OR (
153
- (#{q_table_name}.#{attr_full} IS NULL OR EXCLUDED.#{attr_partial_max} >= #{q_table_name}.#{attr_full}) AND (
161
+ (#{q_table_name}.#{attr_full} IS NULL OR EXCLUDED.#{attr_partial_max} > #{q_table_name}.#{attr_full}) AND (
154
162
  (#{q_table_name}.#{attr_partial}->>'#{column_name}')::#{cast} IS NULL OR
155
- EXCLUDED.#{attr_partial_max}::#{cast} >= (#{q_table_name}.#{attr_partial}->>'#{column_name}')::#{cast}
163
+ EXCLUDED.#{attr_partial_max}::#{cast} > (#{q_table_name}.#{attr_partial}->>'#{column_name}')::#{cast}
156
164
  )
157
165
  )
158
166
  SQL
@@ -179,12 +187,16 @@ module InventoryRefresh::SaveCollection
179
187
  end
180
188
 
181
189
  def insert_query_returning_timestamps
182
- # For upsert, we'll return also created and updated timestamps, so we can recognize what was created and what
183
- # updated
184
- if inventory_collection.internal_timestamp_columns.present?
185
- <<-SQL
186
- , #{inventory_collection.internal_timestamp_columns.map { |x| quote_column_name(x) }.join(",")}
187
- SQL
190
+ if inventory_collection.parallel_safe?
191
+ # For upsert, we'll return also created and updated timestamps, so we can recognize what was created and what
192
+ # updated
193
+ if inventory_collection.internal_timestamp_columns.present?
194
+ <<-SQL
195
+ , #{inventory_collection.internal_timestamp_columns.map { |x| quote_column_name(x) }.join(",")}
196
+ SQL
197
+ end
198
+ else
199
+ ""
188
200
  end
189
201
  end
190
202
  end
@@ -1,7 +1,5 @@
1
- require "inventory_refresh/exception"
2
1
  require "inventory_refresh/logging"
3
2
  require "inventory_refresh/save_collection/saver/retention_helper"
4
- require "inventory_refresh/inventory_collection/index/type/local_db"
5
3
 
6
4
  module InventoryRefresh::SaveCollection
7
5
  class Sweeper < InventoryRefresh::SaveCollection::Base
@@ -12,111 +10,39 @@ module InventoryRefresh::SaveCollection
12
10
  # @param _ems [ActiveRecord] Manager owning the inventory_collections
13
11
  # @param inventory_collections [Array<InventoryRefresh::InventoryCollection>] Array of InventoryCollection objects
14
12
  # for sweeping
15
- # @param sweep_scope [Array<String, Symbol, Hash>] Array of inventory collection names marking sweep. Or for
16
- # targeted sweeping it's array of hashes, where key is inventory collection name pointing to an array of
17
- # identifiers of inventory objects we want to target for sweeping.
18
13
  # @param refresh_state [ActiveRecord] Record of :refresh_states
19
- def sweep(_ems, inventory_collections, sweep_scope, refresh_state)
20
- scope_set = build_scope_set(sweep_scope)
21
-
14
+ def sweep(_ems, inventory_collections, refresh_state)
22
15
  inventory_collections.each do |inventory_collection|
23
- next unless sweep_possible?(inventory_collection, scope_set)
16
+ next unless sweep_possible?(inventory_collection, refresh_state)
24
17
 
25
- new(inventory_collection, refresh_state, sweep_scope).sweep
18
+ new(inventory_collection, refresh_state).sweep
26
19
  end
27
20
  end
28
21
 
29
- def sweep_possible?(inventory_collection, scope_set)
30
- inventory_collection.supports_column?(:last_seen_at) && in_scope?(inventory_collection, scope_set)
31
- end
32
-
33
- def in_scope?(inventory_collection, scope_set)
34
- scope_set.include?(inventory_collection&.name)
22
+ def sweep_possible?(inventory_collection, refresh_state)
23
+ inventory_collection.supports_column?(:last_seen_at) && inventory_collection.parallel_safe? &&
24
+ inventory_collection.strategy == :local_db_find_missing_references &&
25
+ in_scope?(inventory_collection, refresh_state.sweep_scope)
35
26
  end
36
27
 
37
- def build_scope_set(sweep_scope)
38
- return [] unless sweep_scope
28
+ def in_scope?(inventory_collection, sweep_scope)
29
+ return true unless sweep_scope
30
+ return true if sweep_scope.kind_of?(Array) && sweep_scope.include?(inventory_collection&.name&.to_s)
39
31
 
40
- if sweep_scope.kind_of?(Array)
41
- sweep_scope.map(&:to_sym).to_set
42
- elsif sweep_scope.kind_of?(Hash)
43
- sweep_scope.keys.map(&:to_sym).to_set
44
- end
32
+ false
45
33
  end
46
34
  end
47
35
 
48
36
  include InventoryRefresh::SaveCollection::Saver::RetentionHelper
49
37
 
50
- attr_reader :inventory_collection, :refresh_state, :sweep_scope, :model_class, :primary_key
38
+ attr_reader :inventory_collection, :refresh_state, :model_class, :primary_key
51
39
 
52
- delegate :inventory_object_lazy?,
53
- :inventory_object?,
54
- :to => :inventory_collection
55
-
56
- def initialize(inventory_collection, refresh_state, sweep_scope)
40
+ def initialize(inventory_collection, refresh_state)
57
41
  @inventory_collection = inventory_collection
42
+ @refresh_state = refresh_state
58
43
 
59
- @refresh_state = refresh_state
60
- @sweep_scope = sweep_scope
61
-
62
- @model_class = inventory_collection.model_class
63
- @primary_key = @model_class.primary_key
64
- end
65
-
66
- def apply_targeted_sweep_scope(all_entities_query)
67
- if sweep_scope.kind_of?(Hash)
68
- scope = sweep_scope[inventory_collection.name]
69
- return all_entities_query if scope.nil? || scope.empty?
70
-
71
- # Scan the scope to find all references, so we can load them from DB in batches
72
- scan_sweep_scope!(scope)
73
-
74
- scope_keys = Set.new
75
- conditions = scope.map { |x| InventoryRefresh::InventoryObject.attributes_with_keys(x, inventory_collection, scope_keys) }
76
- assert_conditions!(conditions, scope_keys)
77
-
78
- all_entities_query.where(inventory_collection.build_multi_selection_condition(conditions, scope_keys))
79
- else
80
- all_entities_query
81
- end
82
- end
83
-
84
- def loadable?(value)
85
- inventory_object_lazy?(value) || inventory_object?(value)
86
- end
87
-
88
- def scan_sweep_scope!(scope)
89
- scope.each do |sc|
90
- sc.each_value do |value|
91
- next unless loadable?(value)
92
-
93
- value_inventory_collection = value.inventory_collection
94
- value_inventory_collection.add_reference(value.reference, :key => value.key)
95
- end
96
- end
97
- end
98
-
99
- def assert_conditions!(conditions, scope_keys)
100
- conditions.each do |cond|
101
- assert_uniform_keys!(cond, scope_keys)
102
- assert_non_existent_keys!(cond)
103
- end
104
- end
105
-
106
- def assert_uniform_keys!(cond, scope_keys)
107
- return if (diff = (scope_keys - cond.keys.to_set)).empty?
108
-
109
- raise(InventoryRefresh::Exception::SweeperNonUniformScopeKeyFoundError,
110
- "Sweeping scope for #{inventory_collection} contained non uniform keys. All keys for the"\
111
- "scope must be the same, it's possible to send multiple sweeps with different key set. Missing keys"\
112
- " for a scope were: #{diff.to_a}")
113
- end
114
-
115
- def assert_non_existent_keys!(cond)
116
- return if (diff = (cond.keys.to_set - inventory_collection.all_column_names)).empty?
117
-
118
- raise(InventoryRefresh::Exception::SweeperNonExistentScopeKeyFoundError,
119
- "Sweeping scope for #{inventory_collection} contained keys that are not columns: #{diff.to_a}")
44
+ @model_class = inventory_collection.model_class
45
+ @primary_key = @model_class.primary_key
120
46
  end
121
47
 
122
48
  def sweep
@@ -126,9 +52,7 @@ module InventoryRefresh::SaveCollection
126
52
  table = model_class.arel_table
127
53
  date_field = table[:last_seen_at]
128
54
  all_entities_query = inventory_collection.full_collection_for_comparison
129
- all_entities_query = all_entities_query.active if inventory_collection.retention_strategy == :archive && inventory_collection.supports_column?(:archived_at)
130
-
131
- all_entities_query = apply_targeted_sweep_scope(all_entities_query)
55
+ all_entities_query.active if inventory_collection.retention_strategy == :archive
132
56
 
133
57
  query = all_entities_query
134
58
  .where(date_field.lt(refresh_start)).or(all_entities_query.where(:last_seen_at => nil))
@@ -17,21 +17,21 @@ module InventoryRefresh::SaveCollection
17
17
 
18
18
  layers = InventoryRefresh::Graph::TopologicalSort.new(graph).topological_sort
19
19
 
20
- logger.debug("Saving manager #{ems.id}...")
20
+ logger.debug("Saving manager #{ems.name}...")
21
21
 
22
- sorted_graph_log = "Topological sorting of manager #{ems.id} resulted in these layers processable in parallel:\n"
22
+ sorted_graph_log = "Topological sorting of manager #{ems.name} resulted in these layers processable in parallel:\n"
23
23
  sorted_graph_log += graph.to_graphviz(:layers => layers)
24
24
  logger.debug(sorted_graph_log)
25
25
 
26
26
  layers.each_with_index do |layer, index|
27
- logger.debug("Saving manager #{ems.id} | Layer #{index}")
27
+ logger.debug("Saving manager #{ems.name} | Layer #{index}")
28
28
  layer.each do |inventory_collection|
29
29
  save_inventory_object_inventory(ems, inventory_collection) unless inventory_collection.saved?
30
30
  end
31
- logger.debug("Saved manager #{ems.id} | Layer #{index}")
31
+ logger.debug("Saved manager #{ems.name} | Layer #{index}")
32
32
  end
33
33
 
34
- logger.debug("Saving manager #{ems.id}...Complete")
34
+ logger.debug("Saving manager #{ems.name}...Complete")
35
35
  end
36
36
  end
37
37
  end
@@ -29,18 +29,11 @@ module InventoryRefresh
29
29
  # @param ems [ExtManagementSystem] manager owning the inventory_collections
30
30
  # @param inventory_collections [Array<InventoryRefresh::InventoryCollection>] array of InventoryCollection objects
31
31
  # for sweeping
32
- # @param sweep_scope [Array<String, Symbol, Hash>] Array of inventory collection names marking sweep. Or for
33
- # targeted sweeping it's array of hashes, where key is inventory collection name pointing to an array of
34
- # identifiers of inventory objects we want to target for sweeping.
35
32
  # @param refresh_state [ActiveRecord] Record of :refresh_states
36
- def sweep_inactive_records(ems, inventory_collections, sweep_scope, refresh_state)
37
- inventory_collections.each do |inventory_collection|
38
- inventory_collection.strategy = :local_db_find_references
39
- end
40
-
41
- logger.info("#{log_header(ems)} Sweeping EMS Inventory with scope #{sweep_scope} and date #{refresh_state.created_at} ...")
42
- InventoryRefresh::SaveCollection::Sweeper.sweep(ems, inventory_collections, sweep_scope, refresh_state)
43
- logger.info("#{log_header(ems)} Sweeping EMS Inventory with scope #{sweep_scope} and date #{refresh_state.created_at}...Complete")
33
+ def sweep_inactive_records(ems, inventory_collections, refresh_state)
34
+ logger.info("#{log_header(ems)} Sweeping EMS Inventory...")
35
+ InventoryRefresh::SaveCollection::Sweeper.sweep(ems, inventory_collections, refresh_state)
36
+ logger.info("#{log_header(ems)} Sweeping EMS Inventory...Complete")
44
37
 
45
38
  ems
46
39
  end
@@ -50,7 +43,7 @@ module InventoryRefresh
50
43
  # @param ems [ExtManagementSystem] manager owning the inventory_collections
51
44
  # @return [String] helper string for logging
52
45
  def log_header(ems)
53
- "EMS: [#{ems.id}]"
46
+ "EMS: [#{ems.name}], id: [#{ems.id}]"
54
47
  end
55
48
  end
56
49
  end
@@ -0,0 +1,73 @@
1
+ module InventoryRefresh
2
+ class Target
3
+ attr_reader :association, :manager_ref, :event_id, :options
4
+
5
+ # @param association [Symbol] An existing association on Manager, that lists objects represented by a Target, naming
6
+ # should be the same of association of a counterpart InventoryCollection object
7
+ # @param manager_ref [Hash] A Hash that can be used to find_by on a given association and returning a unique object.
8
+ # The keys should be the same as the keys of the counterpart InventoryObject
9
+ # @param manager [ManageIQ::Providers::BaseManager] The Manager owning the Target
10
+ # @param manager_id [Integer] A primary key of the Manager owning the Target
11
+ # @param event_id [Integer] A primary key of the EmsEvent associated with the Target
12
+ # @param options [Hash] A free form options hash
13
+ def initialize(association:, manager_ref:, manager: nil, manager_id: nil, event_id: nil, options: {})
14
+ raise "Provide either :manager or :manager_id argument" if manager.nil? && manager_id.nil?
15
+
16
+ @manager = manager
17
+ @manager_id = manager_id
18
+ @association = association
19
+ @manager_ref = manager_ref
20
+ @event_id = event_id
21
+ @options = options
22
+ end
23
+
24
+ # A Rails recommended interface for deserializing an object
25
+ # @return [InventoryRefresh::Target] InventoryRefresh::Target instance
26
+ def self.load(**args)
27
+ new(**args)
28
+ end
29
+
30
+ # A Rails recommended interface for serializing an object
31
+ #
32
+ # @param obj [InventoryRefresh::Target] InventoryRefresh::Target instance we want to serialize
33
+ # @return [Hash] serialized object
34
+ def self.dump(obj)
35
+ obj.dump
36
+ end
37
+
38
+ # Returns a serialized InventoryRefresh::Target object. This can be used to initialize a new object, then the object
39
+ # target acts the same as the object InventoryRefresh::Target.new(target.serialize)
40
+ #
41
+ # @return [Hash] serialized object
42
+ def dump
43
+ {
44
+ :manager_id => manager_id,
45
+ :association => association,
46
+ :manager_ref => manager_ref,
47
+ :event_id => event_id,
48
+ :options => options
49
+ }
50
+ end
51
+
52
+ alias id dump
53
+ alias name manager_ref
54
+
55
+ # @return [ManageIQ::Providers::BaseManager] The Manager owning the Target
56
+ def manager
57
+ @manager || ManageIQ::Providers::BaseManager.find(@manager_id)
58
+ end
59
+
60
+ # @return [Integer] A primary key of the Manager owning the Target
61
+ def manager_id
62
+ @manager_id || manager.try(:id)
63
+ end
64
+
65
+ # Loads InventoryRefresh::Target ApplicationRecord representation from our DB, this requires that InventoryRefresh::Target
66
+ # has been refreshed, otherwise the AR object can be missing.
67
+ #
68
+ # @return [ApplicationRecord] A InventoryRefresh::Target loaded from the database as AR object
69
+ def load_from_db
70
+ manager.public_send(association).find_by(manager_ref)
71
+ end
72
+ end
73
+ end