inventory_refresh 0.2.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/.codeclimate.yml +0 -1
  3. data/.travis.yml +6 -8
  4. data/inventory_refresh.gemspec +2 -4
  5. data/lib/inventory_refresh.rb +0 -2
  6. data/lib/inventory_refresh/application_record_iterator.rb +9 -26
  7. data/lib/inventory_refresh/exception.rb +8 -0
  8. data/lib/inventory_refresh/inventory_collection.rb +36 -110
  9. data/lib/inventory_refresh/inventory_collection/builder.rb +6 -6
  10. data/lib/inventory_refresh/inventory_collection/data_storage.rb +0 -9
  11. data/lib/inventory_refresh/inventory_collection/helpers/initialize_helper.rb +34 -143
  12. data/lib/inventory_refresh/inventory_collection/helpers/questions_helper.rb +1 -44
  13. data/lib/inventory_refresh/inventory_collection/index/proxy.rb +6 -34
  14. data/lib/inventory_refresh/inventory_collection/index/type/base.rb +0 -8
  15. data/lib/inventory_refresh/inventory_collection/references_storage.rb +0 -17
  16. data/lib/inventory_refresh/inventory_collection/scanner.rb +1 -87
  17. data/lib/inventory_refresh/inventory_collection/serialization.rb +10 -16
  18. data/lib/inventory_refresh/inventory_object.rb +34 -68
  19. data/lib/inventory_refresh/inventory_object_lazy.rb +10 -17
  20. data/lib/inventory_refresh/persister.rb +63 -29
  21. data/lib/inventory_refresh/save_collection/base.rb +2 -4
  22. data/lib/inventory_refresh/save_collection/saver/base.rb +8 -108
  23. data/lib/inventory_refresh/save_collection/saver/concurrent_safe_batch.rb +48 -126
  24. data/lib/inventory_refresh/save_collection/saver/partial_upsert_helper.rb +19 -1
  25. data/lib/inventory_refresh/save_collection/saver/retention_helper.rb +3 -68
  26. data/lib/inventory_refresh/save_collection/saver/sql_helper.rb +0 -125
  27. data/lib/inventory_refresh/save_collection/saver/sql_helper_update.rb +5 -9
  28. data/lib/inventory_refresh/save_collection/saver/sql_helper_upsert.rb +9 -17
  29. data/lib/inventory_refresh/save_collection/sweeper.rb +91 -18
  30. data/lib/inventory_refresh/save_collection/topological_sort.rb +5 -5
  31. data/lib/inventory_refresh/save_inventory.rb +12 -5
  32. data/lib/inventory_refresh/version.rb +1 -1
  33. metadata +9 -45
  34. data/lib/inventory_refresh/save_collection/saver/batch.rb +0 -17
  35. data/lib/inventory_refresh/save_collection/saver/default.rb +0 -57
  36. data/lib/inventory_refresh/target.rb +0 -73
  37. data/lib/inventory_refresh/target_collection.rb +0 -92
@@ -59,7 +59,7 @@ module InventoryRefresh::SaveCollection
59
59
  skeletal_inventory_objects_index = {}
60
60
 
61
61
  inventory_collection.skeletal_primary_index.each_value do |inventory_object|
62
- attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys, inventory_object)
62
+ attributes = inventory_object.class.attributes_with_keys(inventory_object.data, inventory_collection, all_attribute_keys, inventory_object)
63
63
  index = build_stringified_reference(attributes, unique_index_keys)
64
64
 
65
65
  skeletal_attributes_index[index] = attributes
@@ -200,7 +200,25 @@ module InventoryRefresh::SaveCollection
200
200
  )
201
201
  end
202
202
 
203
+ def comparable_timestamp(timestamp)
204
+ # Lets cast all timestamps to to_f, rounding the time comparing precision to miliseconds, that should be
205
+ # enough, since we are the ones setting the record version in collector. Otherwise we will have hard time with
206
+ # doing equality, since the value changes going through DB (DB. cuts it at 5 decimal places)
207
+
208
+ if timestamp.kind_of?(String)
209
+ Time.use_zone('UTC') { Time.zone.parse(timestamp) }.to_f.round(3)
210
+ elsif timestamp.kind_of?(Time)
211
+ timestamp.in_time_zone('UTC').to_f.round(3)
212
+ else
213
+ timestamp
214
+ end
215
+ end
216
+
203
217
  def skeletonize_or_skip_record(record_version, hash_version, record_versions_max, inventory_object)
218
+ record_version = comparable_timestamp(record_version)
219
+ record_versions_max = comparable_timestamp(record_versions_max) if record_versions_max
220
+ hash_version = comparable_timestamp(hash_version)
221
+
204
222
  # Skip updating this record, because it is old
205
223
  return true if record_version && hash_version && record_version >= hash_version
206
224
 
@@ -3,28 +3,6 @@ module InventoryRefresh::SaveCollection
3
3
  module RetentionHelper
4
4
  private
5
5
 
6
- # Deletes a complement of referenced data
7
- def delete_complement
8
- return unless inventory_collection.delete_allowed?
9
-
10
- all_manager_uuids_size = inventory_collection.all_manager_uuids.size
11
-
12
- logger.debug("Processing :delete_complement of #{inventory_collection} of size "\
13
- "#{all_manager_uuids_size}...")
14
-
15
- query = complement_of!(inventory_collection.all_manager_uuids,
16
- inventory_collection.all_manager_uuids_scope,
17
- inventory_collection.all_manager_uuids_timestamp)
18
-
19
- ids_of_non_active_entities = ActiveRecord::Base.connection.execute(query.to_sql).to_a
20
- ids_of_non_active_entities.each_slice(10_000) do |batch|
21
- destroy_records!(batch)
22
- end
23
-
24
- logger.debug("Processing :delete_complement of #{inventory_collection} of size "\
25
- "#{all_manager_uuids_size}, deleted=#{inventory_collection.deleted_records.size}...Complete")
26
- end
27
-
28
6
  # Applies strategy based on :retention_strategy parameter, or fallbacks to legacy_destroy_records.
29
7
  #
30
8
  # @param records [Array<ApplicationRecord, Hash, Array>] Records we want to delete or archive
@@ -35,13 +13,9 @@ module InventoryRefresh::SaveCollection
35
13
  return false unless inventory_collection.delete_allowed?
36
14
  return if records.blank?
37
15
 
38
- if inventory_collection.retention_strategy
39
- ids = ids_array(records)
40
- inventory_collection.store_deleted_records(ids)
41
- send("#{inventory_collection.retention_strategy}_all_records!", ids)
42
- else
43
- legacy_destroy_records!(records)
44
- end
16
+ ids = ids_array(records)
17
+ inventory_collection.store_deleted_records(ids)
18
+ send("#{inventory_collection.retention_strategy}_all_records!", ids)
45
19
  end
46
20
 
47
21
  # Convert records to list of ids in format [{:id => X}, {:id => Y}...]
@@ -71,45 +45,6 @@ module InventoryRefresh::SaveCollection
71
45
  def destroy_all_records!(records)
72
46
  inventory_collection.model_class.where(:id => records.map { |x| x[:id] }).delete_all
73
47
  end
74
-
75
- # Deletes or sof-deletes records. If the model_class supports a custom class delete method, we will use it for
76
- # batch soft-delete. This is the legacy method doing either ineffective deletion/archiving or requiring a method
77
- # on a class.
78
- #
79
- # @param records [Array<ApplicationRecord, Hash>] Records we want to delete. If we have only hashes, we need to
80
- # to fetch ApplicationRecord objects from the DB
81
- def legacy_destroy_records!(records)
82
- # Is the delete_method rails standard deleting method?
83
- rails_delete = %i(destroy delete).include?(inventory_collection.delete_method)
84
- if !rails_delete && inventory_collection.model_class.respond_to?(inventory_collection.delete_method)
85
- # We have custom delete method defined on a class, that means it supports batch destroy
86
- inventory_collection.store_deleted_records(records.map { |x| {:id => record_key(x, primary_key)} })
87
- inventory_collection.model_class.public_send(inventory_collection.delete_method, records.map { |x| record_key(x, primary_key) })
88
- else
89
- legacy_ineffective_destroy_records(records)
90
- end
91
- end
92
-
93
- # Very ineffective way of deleting records, but is needed if we want to invoke hooks.
94
- #
95
- # @param records [Array<ApplicationRecord, Hash>] Records we want to delete. If we have only hashes, we need to
96
- # to fetch ApplicationRecord objects from the DB
97
- def legacy_ineffective_destroy_records(records)
98
- # We have either standard :destroy and :delete rails method, or custom instance level delete method
99
- # Note: The standard :destroy and :delete rails method can't be batched because of the hooks and cascade destroy
100
- ActiveRecord::Base.transaction do
101
- if pure_sql_records_fetching
102
- # For pure SQL fetching, we need to get the AR objects again, so we can call destroy
103
- inventory_collection.model_class.where(:id => records.map { |x| record_key(x, primary_key) }).find_each do |record|
104
- delete_record!(record)
105
- end
106
- else
107
- records.each do |record|
108
- delete_record!(record)
109
- end
110
- end
111
- end
112
- end
113
48
  end
114
49
  end
115
50
  end
@@ -8,9 +8,6 @@ module InventoryRefresh::SaveCollection
8
8
  module SqlHelper
9
9
  include InventoryRefresh::Logging
10
10
 
11
- # TODO(lsmola) all below methods should be rewritten to arel, but we need to first extend arel to be able to do
12
- # this
13
-
14
11
  extend ActiveSupport::Concern
15
12
 
16
13
  included do
@@ -83,128 +80,6 @@ module InventoryRefresh::SaveCollection
83
80
  "#{value}::#{sql_type}"
84
81
  end
85
82
  end
86
-
87
- # Effective way of doing multiselect
88
- #
89
- # If we use "(col1, col2) IN [(a,e), (b,f), (b,e)]" it's not great, just with 10k batch, we see
90
- # *** ActiveRecord::StatementInvalid Exception: PG::StatementTooComplex: ERROR: stack depth limit exceeded
91
- # HINT: Increase the configuration parameter "max_stack_depth" (currently 2048kB), after ensuring the
92
- # platform's stack depth limit is adequate.
93
- #
94
- # If we use "(col1 = a AND col2 = e) OR (col1 = b AND col2 = f) OR (col1 = b AND col2 = e)" with 10k batch, it
95
- # takes about 6s and consumes 300MB, with 100k it takes ~1h and consume 3GB in Postgre process
96
- #
97
- # The best way seems to be using CTE, where the list of values we want to map is turned to 'table' and we just
98
- # do RIGHT OUTER JOIN to get the complement of given identifiers. Tested on getting complement of 100k items,
99
- # using 2 cols (:ems_ref and :uid_ems) from total 150k rows. It takes ~1s and 350MB in Postgre process
100
- #
101
- # @param manager_uuids [Array<String>, Array[Hash]] Array with manager_uuids of entities. The keys have to match
102
- # inventory_collection.manager_ref. We allow passing just array of strings, if manager_ref.size ==1, to
103
- # spare some memory
104
- # @return [Arel::SelectManager] Arel for getting complement of uuids. This method modifies the passed
105
- # manager_uuids to spare some memory
106
- def complement_of!(manager_uuids, all_manager_uuids_scope, all_manager_uuids_timestamp)
107
- all_attribute_keys = inventory_collection.manager_ref
108
- all_attribute_keys_array = inventory_collection.manager_ref.map(&:to_s)
109
-
110
- active_entities = Arel::Table.new(:active_entities)
111
- active_entities_cte = Arel::Nodes::As.new(
112
- active_entities,
113
- Arel.sql("(#{active_entities_query(all_attribute_keys_array, manager_uuids)})")
114
- )
115
-
116
- all_entities = Arel::Table.new(:all_entities)
117
- all_entities_cte = Arel::Nodes::As.new(
118
- all_entities,
119
- Arel.sql("(#{all_entities_query(all_manager_uuids_scope, all_manager_uuids_timestamp).select(:id, *all_attribute_keys_array).to_sql})")
120
- )
121
- join_condition = all_attribute_keys.map { |key| active_entities[key].eq(all_entities[key]) }.inject(:and)
122
- where_condition = all_attribute_keys.map { |key| active_entities[key].eq(nil) }.inject(:and)
123
-
124
- active_entities
125
- .project(all_entities[:id])
126
- .join(all_entities, Arel::Nodes::RightOuterJoin)
127
- .on(join_condition)
128
- .with(active_entities_cte, all_entities_cte)
129
- .where(where_condition)
130
- end
131
-
132
- private
133
-
134
- def all_entities_query(all_manager_uuids_scope, all_manager_uuids_timestamp)
135
- all_entities_query = inventory_collection.full_collection_for_comparison
136
- all_entities_query = all_entities_query.active if inventory_collection.retention_strategy == :archive
137
-
138
- if all_manager_uuids_scope
139
- scope_keys = all_manager_uuids_scope.first.keys.map { |x| association_to_foreign_key_mapping[x.to_sym] }.map(&:to_s)
140
- scope = load_scope(all_manager_uuids_scope)
141
- condition = inventory_collection.build_multi_selection_condition(scope, scope_keys)
142
- all_entities_query = all_entities_query.where(condition)
143
- end
144
-
145
- if all_manager_uuids_timestamp && supports_column?(:resource_timestamp)
146
- all_manager_uuids_timestamp = Time.parse(all_manager_uuids_timestamp).utc
147
-
148
- date_field = model_class.arel_table[:resource_timestamp]
149
- all_entities_query = all_entities_query.where(date_field.lt(all_manager_uuids_timestamp))
150
- end
151
- all_entities_query
152
- end
153
-
154
- def load_scope(all_manager_uuids_scope)
155
- scope_keys = all_manager_uuids_scope.first.keys.to_set
156
-
157
- all_manager_uuids_scope.map do |cond|
158
- assert_scope!(scope_keys, cond)
159
-
160
- cond.map do |key, value|
161
- foreign_key = association_to_foreign_key_mapping[key.to_sym]
162
- foreign_key_value = value.load&.id
163
-
164
- assert_foreign_keys!(key, value, foreign_key, foreign_key_value)
165
-
166
- [foreign_key, foreign_key_value]
167
- end.to_h
168
- end
169
- end
170
-
171
- def assert_scope!(scope_keys, cond)
172
- if cond.keys.to_set != scope_keys
173
- raise "'#{inventory_collection}' expected keys for :all_manager_uuids_scope are #{scope_keys.to_a}, got"\
174
- " #{cond.keys}. Keys must be the same for all scopes provided."
175
- end
176
- end
177
-
178
- def assert_foreign_keys!(key, value, foreign_key, foreign_key_value)
179
- unless foreign_key
180
- raise "'#{inventory_collection}' doesn't have relation :#{key} provided in :all_manager_uuids_scope."
181
- end
182
-
183
- unless foreign_key_value
184
- raise "'#{inventory_collection}' couldn't load scope value :#{key} => #{value.inspect} provided in :all_manager_uuids_scope"
185
- end
186
- end
187
-
188
- def active_entities_query(all_attribute_keys_array, manager_uuids)
189
- connection = ActiveRecord::Base.connection
190
-
191
- all_attribute_keys_array_q = all_attribute_keys_array.map { |x| quote_column_name(x) }
192
- # For Postgre, only first set of values should contain the type casts
193
- first_value = manager_uuids.shift.to_h
194
- first_value = "(#{all_attribute_keys_array.map { |x| quote(connection, first_value[x], x, true) }.join(",")})"
195
-
196
- # Rest of the values, without the type cast
197
- values = manager_uuids.map! do |hash|
198
- "(#{all_attribute_keys_array.map { |x| quote(connection, hash[x], x, false) }.join(",")})"
199
- end.join(",")
200
-
201
- values = values.blank? ? first_value : [first_value, values].join(",")
202
-
203
- <<-SQL
204
- SELECT *
205
- FROM (VALUES #{values}) AS active_entities_table(#{all_attribute_keys_array_q.join(",")})
206
- SQL
207
- end
208
83
  end
209
84
  end
210
85
  end
@@ -27,9 +27,9 @@ module InventoryRefresh::SaveCollection
27
27
  all_attribute_keys_array << :id
28
28
 
29
29
  # If there is not version attribute, the version conditions will be ignored
30
- version_attribute = if inventory_collection.parallel_safe? && supports_remote_data_timestamp?(all_attribute_keys)
30
+ version_attribute = if supports_remote_data_timestamp?(all_attribute_keys)
31
31
  :resource_timestamp
32
- elsif inventory_collection.parallel_safe? && supports_remote_data_version?(all_attribute_keys)
32
+ elsif supports_remote_data_version?(all_attribute_keys)
33
33
  :resource_counter
34
34
  end
35
35
 
@@ -130,13 +130,9 @@ module InventoryRefresh::SaveCollection
130
130
  end
131
131
 
132
132
  def update_query_returning
133
- if inventory_collection.parallel_safe?
134
- <<-SQL
135
- RETURNING updated_values.#{quote_column_name("id")}, #{unique_index_columns.map { |x| "updated_values.#{quote_column_name(x)}" }.join(",")}
136
- SQL
137
- else
138
- ""
139
- end
133
+ <<-SQL
134
+ RETURNING updated_values.#{quote_column_name("id")}, #{unique_index_columns.map { |x| "updated_values.#{quote_column_name(x)}" }.join(",")}
135
+ SQL
140
136
  end
141
137
  end
142
138
  end
@@ -55,8 +55,6 @@ module InventoryRefresh::SaveCollection
55
55
  end
56
56
 
57
57
  def insert_query_on_conflict_behavior(all_attribute_keys, on_conflict, mode, ignore_cols, column_name)
58
- return "" unless inventory_collection.parallel_safe?
59
-
60
58
  insert_query_on_conflict = insert_query_on_conflict_do(on_conflict)
61
59
  if on_conflict == :do_update
62
60
  insert_query_on_conflict += insert_query_on_conflict_update(all_attribute_keys, mode, ignore_cols, column_name)
@@ -94,8 +92,6 @@ module InventoryRefresh::SaveCollection
94
92
  :resource_counter
95
93
  end
96
94
 
97
- # TODO(lsmola) should we add :deleted => false to the update clause? That should handle a reconnect, without a
98
- # a need to list :deleted anywhere in the parser. We just need to check that a model has the :deleted attribute
99
95
  query = <<-SQL
100
96
  SET #{(all_attribute_keys - ignore_cols).map { |key| build_insert_set_cols(key) }.join(", ")}
101
97
  SQL
@@ -128,7 +124,7 @@ module InventoryRefresh::SaveCollection
128
124
  , #{attr_partial} = '{}', #{attr_partial_max} = NULL
129
125
 
130
126
  WHERE EXCLUDED.#{attr_full} IS NULL OR (
131
- (#{q_table_name}.#{attr_full} IS NULL OR EXCLUDED.#{attr_full} > #{q_table_name}.#{attr_full}) AND
127
+ (#{q_table_name}.#{attr_full} IS NULL OR EXCLUDED.#{attr_full} >= #{q_table_name}.#{attr_full}) AND
132
128
  (#{q_table_name}.#{attr_partial_max} IS NULL OR EXCLUDED.#{attr_full} >= #{q_table_name}.#{attr_partial_max})
133
129
  )
134
130
  SQL
@@ -154,9 +150,9 @@ module InventoryRefresh::SaveCollection
154
150
  #{insert_query_set_jsonb_version(cast, attr_partial, attr_partial_max, column_name)}
155
151
  , #{attr_partial_max} = greatest(#{q_table_name}.#{attr_partial_max}::#{cast}, EXCLUDED.#{attr_partial_max}::#{cast})
156
152
  WHERE EXCLUDED.#{attr_partial_max} IS NULL OR (
157
- (#{q_table_name}.#{attr_full} IS NULL OR EXCLUDED.#{attr_partial_max} > #{q_table_name}.#{attr_full}) AND (
153
+ (#{q_table_name}.#{attr_full} IS NULL OR EXCLUDED.#{attr_partial_max} >= #{q_table_name}.#{attr_full}) AND (
158
154
  (#{q_table_name}.#{attr_partial}->>'#{column_name}')::#{cast} IS NULL OR
159
- EXCLUDED.#{attr_partial_max}::#{cast} > (#{q_table_name}.#{attr_partial}->>'#{column_name}')::#{cast}
155
+ EXCLUDED.#{attr_partial_max}::#{cast} >= (#{q_table_name}.#{attr_partial}->>'#{column_name}')::#{cast}
160
156
  )
161
157
  )
162
158
  SQL
@@ -183,16 +179,12 @@ module InventoryRefresh::SaveCollection
183
179
  end
184
180
 
185
181
  def insert_query_returning_timestamps
186
- if inventory_collection.parallel_safe?
187
- # For upsert, we'll return also created and updated timestamps, so we can recognize what was created and what
188
- # updated
189
- if inventory_collection.internal_timestamp_columns.present?
190
- <<-SQL
191
- , #{inventory_collection.internal_timestamp_columns.map { |x| quote_column_name(x) }.join(",")}
192
- SQL
193
- end
194
- else
195
- ""
182
+ # For upsert, we'll return also created and updated timestamps, so we can recognize what was created and what
183
+ # updated
184
+ if inventory_collection.internal_timestamp_columns.present?
185
+ <<-SQL
186
+ , #{inventory_collection.internal_timestamp_columns.map { |x| quote_column_name(x) }.join(",")}
187
+ SQL
196
188
  end
197
189
  end
198
190
  end
@@ -1,5 +1,7 @@
1
+ require "inventory_refresh/exception"
1
2
  require "inventory_refresh/logging"
2
3
  require "inventory_refresh/save_collection/saver/retention_helper"
4
+ require "inventory_refresh/inventory_collection/index/type/local_db"
3
5
 
4
6
  module InventoryRefresh::SaveCollection
5
7
  class Sweeper < InventoryRefresh::SaveCollection::Base
@@ -10,42 +12,111 @@ module InventoryRefresh::SaveCollection
10
12
  # @param _ems [ActiveRecord] Manager owning the inventory_collections
11
13
  # @param inventory_collections [Array<InventoryRefresh::InventoryCollection>] Array of InventoryCollection objects
12
14
  # for sweeping
15
+ # @param sweep_scope [Array<String, Symbol, Hash>] Array of inventory collection names marking sweep. Or for
16
+ # targeted sweeping it's array of hashes, where key is inventory collection name pointing to an array of
17
+ # identifiers of inventory objects we want to target for sweeping.
13
18
  # @param refresh_state [ActiveRecord] Record of :refresh_states
14
- def sweep(_ems, inventory_collections, refresh_state)
19
+ def sweep(_ems, inventory_collections, sweep_scope, refresh_state)
20
+ scope_set = build_scope_set(sweep_scope)
21
+
15
22
  inventory_collections.each do |inventory_collection|
16
- next unless sweep_possible?(inventory_collection, refresh_state)
23
+ next unless sweep_possible?(inventory_collection, scope_set)
17
24
 
18
- new(inventory_collection, refresh_state).sweep
25
+ new(inventory_collection, refresh_state, sweep_scope).sweep
19
26
  end
20
27
  end
21
28
 
22
- def sweep_possible?(inventory_collection, refresh_state)
23
- inventory_collection.supports_column?(:last_seen_at) && inventory_collection.parallel_safe? &&
24
- inventory_collection.strategy == :local_db_find_missing_references &&
25
- in_scope?(inventory_collection, refresh_state.sweep_scope)
29
+ def sweep_possible?(inventory_collection, scope_set)
30
+ inventory_collection.supports_column?(:last_seen_at) && in_scope?(inventory_collection, scope_set)
31
+ end
32
+
33
+ def in_scope?(inventory_collection, scope_set)
34
+ scope_set.include?(inventory_collection&.name)
26
35
  end
27
36
 
28
- def in_scope?(inventory_collection, sweep_scope)
29
- return true unless sweep_scope
37
+ def build_scope_set(sweep_scope)
38
+ return [] unless sweep_scope
30
39
 
31
40
  if sweep_scope.kind_of?(Array)
32
- return true if sweep_scope.include?(inventory_collection&.name&.to_s)
41
+ sweep_scope.map(&:to_sym).to_set
42
+ elsif sweep_scope.kind_of?(Hash)
43
+ sweep_scope.keys.map(&:to_sym).to_set
33
44
  end
34
-
35
- false
36
45
  end
37
46
  end
38
47
 
39
48
  include InventoryRefresh::SaveCollection::Saver::RetentionHelper
40
49
 
41
- attr_reader :inventory_collection, :refresh_state, :model_class, :primary_key
50
+ attr_reader :inventory_collection, :refresh_state, :sweep_scope, :model_class, :primary_key
42
51
 
43
- def initialize(inventory_collection, refresh_state)
52
+ delegate :inventory_object_lazy?,
53
+ :inventory_object?,
54
+ :to => :inventory_collection
55
+
56
+ def initialize(inventory_collection, refresh_state, sweep_scope)
44
57
  @inventory_collection = inventory_collection
45
- @refresh_state = refresh_state
46
58
 
47
- @model_class = inventory_collection.model_class
48
- @primary_key = @model_class.primary_key
59
+ @refresh_state = refresh_state
60
+ @sweep_scope = sweep_scope
61
+
62
+ @model_class = inventory_collection.model_class
63
+ @primary_key = @model_class.primary_key
64
+ end
65
+
66
+ def apply_targeted_sweep_scope(all_entities_query)
67
+ if sweep_scope.kind_of?(Hash)
68
+ scope = sweep_scope[inventory_collection.name]
69
+ return all_entities_query if scope.nil? || scope.empty?
70
+
71
+ # Scan the scope to find all references, so we can load them from DB in batches
72
+ scan_sweep_scope!(scope)
73
+
74
+ scope_keys = Set.new
75
+ conditions = scope.map { |x| InventoryRefresh::InventoryObject.attributes_with_keys(x, inventory_collection, scope_keys) }
76
+ assert_conditions!(conditions, scope_keys)
77
+
78
+ all_entities_query.where(inventory_collection.build_multi_selection_condition(conditions, scope_keys))
79
+ else
80
+ all_entities_query
81
+ end
82
+ end
83
+
84
+ def loadable?(value)
85
+ inventory_object_lazy?(value) || inventory_object?(value)
86
+ end
87
+
88
+ def scan_sweep_scope!(scope)
89
+ scope.each do |sc|
90
+ sc.each_value do |value|
91
+ next unless loadable?(value)
92
+
93
+ value_inventory_collection = value.inventory_collection
94
+ value_inventory_collection.add_reference(value.reference, :key => value.key)
95
+ end
96
+ end
97
+ end
98
+
99
+ def assert_conditions!(conditions, scope_keys)
100
+ conditions.each do |cond|
101
+ assert_uniform_keys!(cond, scope_keys)
102
+ assert_non_existent_keys!(cond)
103
+ end
104
+ end
105
+
106
+ def assert_uniform_keys!(cond, scope_keys)
107
+ return if (diff = (scope_keys - cond.keys.to_set)).empty?
108
+
109
+ raise(InventoryRefresh::Exception::SweeperNonUniformScopeKeyFoundError,
110
+ "Sweeping scope for #{inventory_collection} contained non uniform keys. All keys for the"\
111
+ "scope must be the same, it's possible to send multiple sweeps with different key set. Missing keys"\
112
+ " for a scope were: #{diff.to_a}")
113
+ end
114
+
115
+ def assert_non_existent_keys!(cond)
116
+ return if (diff = (cond.keys.to_set - inventory_collection.all_column_names)).empty?
117
+
118
+ raise(InventoryRefresh::Exception::SweeperNonExistentScopeKeyFoundError,
119
+ "Sweeping scope for #{inventory_collection} contained keys that are not columns: #{diff.to_a}")
49
120
  end
50
121
 
51
122
  def sweep
@@ -55,7 +126,9 @@ module InventoryRefresh::SaveCollection
55
126
  table = model_class.arel_table
56
127
  date_field = table[:last_seen_at]
57
128
  all_entities_query = inventory_collection.full_collection_for_comparison
58
- all_entities_query.active if inventory_collection.retention_strategy == :archive
129
+ all_entities_query = all_entities_query.active if inventory_collection.retention_strategy == :archive && inventory_collection.supports_column?(:archived_at)
130
+
131
+ all_entities_query = apply_targeted_sweep_scope(all_entities_query)
59
132
 
60
133
  query = all_entities_query
61
134
  .where(date_field.lt(refresh_start)).or(all_entities_query.where(:last_seen_at => nil))