inventory_refresh 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/.codeclimate.yml +0 -1
  3. data/.travis.yml +6 -8
  4. data/inventory_refresh.gemspec +2 -4
  5. data/lib/inventory_refresh.rb +0 -2
  6. data/lib/inventory_refresh/application_record_iterator.rb +9 -26
  7. data/lib/inventory_refresh/exception.rb +8 -0
  8. data/lib/inventory_refresh/inventory_collection.rb +36 -110
  9. data/lib/inventory_refresh/inventory_collection/builder.rb +6 -6
  10. data/lib/inventory_refresh/inventory_collection/data_storage.rb +0 -9
  11. data/lib/inventory_refresh/inventory_collection/helpers/initialize_helper.rb +34 -143
  12. data/lib/inventory_refresh/inventory_collection/helpers/questions_helper.rb +1 -44
  13. data/lib/inventory_refresh/inventory_collection/index/proxy.rb +6 -34
  14. data/lib/inventory_refresh/inventory_collection/index/type/base.rb +0 -8
  15. data/lib/inventory_refresh/inventory_collection/references_storage.rb +0 -17
  16. data/lib/inventory_refresh/inventory_collection/scanner.rb +1 -87
  17. data/lib/inventory_refresh/inventory_collection/serialization.rb +10 -16
  18. data/lib/inventory_refresh/inventory_object.rb +34 -68
  19. data/lib/inventory_refresh/inventory_object_lazy.rb +10 -17
  20. data/lib/inventory_refresh/persister.rb +63 -29
  21. data/lib/inventory_refresh/save_collection/base.rb +2 -4
  22. data/lib/inventory_refresh/save_collection/saver/base.rb +8 -108
  23. data/lib/inventory_refresh/save_collection/saver/concurrent_safe_batch.rb +48 -126
  24. data/lib/inventory_refresh/save_collection/saver/partial_upsert_helper.rb +19 -1
  25. data/lib/inventory_refresh/save_collection/saver/retention_helper.rb +3 -68
  26. data/lib/inventory_refresh/save_collection/saver/sql_helper.rb +0 -125
  27. data/lib/inventory_refresh/save_collection/saver/sql_helper_update.rb +5 -9
  28. data/lib/inventory_refresh/save_collection/saver/sql_helper_upsert.rb +9 -17
  29. data/lib/inventory_refresh/save_collection/sweeper.rb +91 -18
  30. data/lib/inventory_refresh/save_collection/topological_sort.rb +5 -5
  31. data/lib/inventory_refresh/save_inventory.rb +12 -5
  32. data/lib/inventory_refresh/version.rb +1 -1
  33. metadata +9 -45
  34. data/lib/inventory_refresh/save_collection/saver/batch.rb +0 -17
  35. data/lib/inventory_refresh/save_collection/saver/default.rb +0 -57
  36. data/lib/inventory_refresh/target.rb +0 -73
  37. data/lib/inventory_refresh/target_collection.rb +0 -92
@@ -59,7 +59,7 @@ module InventoryRefresh::SaveCollection
59
59
  skeletal_inventory_objects_index = {}
60
60
 
61
61
  inventory_collection.skeletal_primary_index.each_value do |inventory_object|
62
- attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys, inventory_object)
62
+ attributes = inventory_object.class.attributes_with_keys(inventory_object.data, inventory_collection, all_attribute_keys, inventory_object)
63
63
  index = build_stringified_reference(attributes, unique_index_keys)
64
64
 
65
65
  skeletal_attributes_index[index] = attributes
@@ -200,7 +200,25 @@ module InventoryRefresh::SaveCollection
200
200
  )
201
201
  end
202
202
 
203
+ def comparable_timestamp(timestamp)
204
+ # Lets cast all timestamps to to_f, rounding the time comparing precision to miliseconds, that should be
205
+ # enough, since we are the ones setting the record version in collector. Otherwise we will have hard time with
206
+ # doing equality, since the value changes going through DB (DB. cuts it at 5 decimal places)
207
+
208
+ if timestamp.kind_of?(String)
209
+ Time.use_zone('UTC') { Time.zone.parse(timestamp) }.to_f.round(3)
210
+ elsif timestamp.kind_of?(Time)
211
+ timestamp.in_time_zone('UTC').to_f.round(3)
212
+ else
213
+ timestamp
214
+ end
215
+ end
216
+
203
217
  def skeletonize_or_skip_record(record_version, hash_version, record_versions_max, inventory_object)
218
+ record_version = comparable_timestamp(record_version)
219
+ record_versions_max = comparable_timestamp(record_versions_max) if record_versions_max
220
+ hash_version = comparable_timestamp(hash_version)
221
+
204
222
  # Skip updating this record, because it is old
205
223
  return true if record_version && hash_version && record_version >= hash_version
206
224
 
@@ -3,28 +3,6 @@ module InventoryRefresh::SaveCollection
3
3
  module RetentionHelper
4
4
  private
5
5
 
6
- # Deletes a complement of referenced data
7
- def delete_complement
8
- return unless inventory_collection.delete_allowed?
9
-
10
- all_manager_uuids_size = inventory_collection.all_manager_uuids.size
11
-
12
- logger.debug("Processing :delete_complement of #{inventory_collection} of size "\
13
- "#{all_manager_uuids_size}...")
14
-
15
- query = complement_of!(inventory_collection.all_manager_uuids,
16
- inventory_collection.all_manager_uuids_scope,
17
- inventory_collection.all_manager_uuids_timestamp)
18
-
19
- ids_of_non_active_entities = ActiveRecord::Base.connection.execute(query.to_sql).to_a
20
- ids_of_non_active_entities.each_slice(10_000) do |batch|
21
- destroy_records!(batch)
22
- end
23
-
24
- logger.debug("Processing :delete_complement of #{inventory_collection} of size "\
25
- "#{all_manager_uuids_size}, deleted=#{inventory_collection.deleted_records.size}...Complete")
26
- end
27
-
28
6
  # Applies strategy based on :retention_strategy parameter, or fallbacks to legacy_destroy_records.
29
7
  #
30
8
  # @param records [Array<ApplicationRecord, Hash, Array>] Records we want to delete or archive
@@ -35,13 +13,9 @@ module InventoryRefresh::SaveCollection
35
13
  return false unless inventory_collection.delete_allowed?
36
14
  return if records.blank?
37
15
 
38
- if inventory_collection.retention_strategy
39
- ids = ids_array(records)
40
- inventory_collection.store_deleted_records(ids)
41
- send("#{inventory_collection.retention_strategy}_all_records!", ids)
42
- else
43
- legacy_destroy_records!(records)
44
- end
16
+ ids = ids_array(records)
17
+ inventory_collection.store_deleted_records(ids)
18
+ send("#{inventory_collection.retention_strategy}_all_records!", ids)
45
19
  end
46
20
 
47
21
  # Convert records to list of ids in format [{:id => X}, {:id => Y}...]
@@ -71,45 +45,6 @@ module InventoryRefresh::SaveCollection
71
45
  def destroy_all_records!(records)
72
46
  inventory_collection.model_class.where(:id => records.map { |x| x[:id] }).delete_all
73
47
  end
74
-
75
- # Deletes or sof-deletes records. If the model_class supports a custom class delete method, we will use it for
76
- # batch soft-delete. This is the legacy method doing either ineffective deletion/archiving or requiring a method
77
- # on a class.
78
- #
79
- # @param records [Array<ApplicationRecord, Hash>] Records we want to delete. If we have only hashes, we need to
80
- # to fetch ApplicationRecord objects from the DB
81
- def legacy_destroy_records!(records)
82
- # Is the delete_method rails standard deleting method?
83
- rails_delete = %i(destroy delete).include?(inventory_collection.delete_method)
84
- if !rails_delete && inventory_collection.model_class.respond_to?(inventory_collection.delete_method)
85
- # We have custom delete method defined on a class, that means it supports batch destroy
86
- inventory_collection.store_deleted_records(records.map { |x| {:id => record_key(x, primary_key)} })
87
- inventory_collection.model_class.public_send(inventory_collection.delete_method, records.map { |x| record_key(x, primary_key) })
88
- else
89
- legacy_ineffective_destroy_records(records)
90
- end
91
- end
92
-
93
- # Very ineffective way of deleting records, but is needed if we want to invoke hooks.
94
- #
95
- # @param records [Array<ApplicationRecord, Hash>] Records we want to delete. If we have only hashes, we need to
96
- # to fetch ApplicationRecord objects from the DB
97
- def legacy_ineffective_destroy_records(records)
98
- # We have either standard :destroy and :delete rails method, or custom instance level delete method
99
- # Note: The standard :destroy and :delete rails method can't be batched because of the hooks and cascade destroy
100
- ActiveRecord::Base.transaction do
101
- if pure_sql_records_fetching
102
- # For pure SQL fetching, we need to get the AR objects again, so we can call destroy
103
- inventory_collection.model_class.where(:id => records.map { |x| record_key(x, primary_key) }).find_each do |record|
104
- delete_record!(record)
105
- end
106
- else
107
- records.each do |record|
108
- delete_record!(record)
109
- end
110
- end
111
- end
112
- end
113
48
  end
114
49
  end
115
50
  end
@@ -8,9 +8,6 @@ module InventoryRefresh::SaveCollection
8
8
  module SqlHelper
9
9
  include InventoryRefresh::Logging
10
10
 
11
- # TODO(lsmola) all below methods should be rewritten to arel, but we need to first extend arel to be able to do
12
- # this
13
-
14
11
  extend ActiveSupport::Concern
15
12
 
16
13
  included do
@@ -83,128 +80,6 @@ module InventoryRefresh::SaveCollection
83
80
  "#{value}::#{sql_type}"
84
81
  end
85
82
  end
86
-
87
- # Effective way of doing multiselect
88
- #
89
- # If we use "(col1, col2) IN [(a,e), (b,f), (b,e)]" it's not great, just with 10k batch, we see
90
- # *** ActiveRecord::StatementInvalid Exception: PG::StatementTooComplex: ERROR: stack depth limit exceeded
91
- # HINT: Increase the configuration parameter "max_stack_depth" (currently 2048kB), after ensuring the
92
- # platform's stack depth limit is adequate.
93
- #
94
- # If we use "(col1 = a AND col2 = e) OR (col1 = b AND col2 = f) OR (col1 = b AND col2 = e)" with 10k batch, it
95
- # takes about 6s and consumes 300MB, with 100k it takes ~1h and consume 3GB in Postgre process
96
- #
97
- # The best way seems to be using CTE, where the list of values we want to map is turned to 'table' and we just
98
- # do RIGHT OUTER JOIN to get the complement of given identifiers. Tested on getting complement of 100k items,
99
- # using 2 cols (:ems_ref and :uid_ems) from total 150k rows. It takes ~1s and 350MB in Postgre process
100
- #
101
- # @param manager_uuids [Array<String>, Array[Hash]] Array with manager_uuids of entities. The keys have to match
102
- # inventory_collection.manager_ref. We allow passing just array of strings, if manager_ref.size ==1, to
103
- # spare some memory
104
- # @return [Arel::SelectManager] Arel for getting complement of uuids. This method modifies the passed
105
- # manager_uuids to spare some memory
106
- def complement_of!(manager_uuids, all_manager_uuids_scope, all_manager_uuids_timestamp)
107
- all_attribute_keys = inventory_collection.manager_ref
108
- all_attribute_keys_array = inventory_collection.manager_ref.map(&:to_s)
109
-
110
- active_entities = Arel::Table.new(:active_entities)
111
- active_entities_cte = Arel::Nodes::As.new(
112
- active_entities,
113
- Arel.sql("(#{active_entities_query(all_attribute_keys_array, manager_uuids)})")
114
- )
115
-
116
- all_entities = Arel::Table.new(:all_entities)
117
- all_entities_cte = Arel::Nodes::As.new(
118
- all_entities,
119
- Arel.sql("(#{all_entities_query(all_manager_uuids_scope, all_manager_uuids_timestamp).select(:id, *all_attribute_keys_array).to_sql})")
120
- )
121
- join_condition = all_attribute_keys.map { |key| active_entities[key].eq(all_entities[key]) }.inject(:and)
122
- where_condition = all_attribute_keys.map { |key| active_entities[key].eq(nil) }.inject(:and)
123
-
124
- active_entities
125
- .project(all_entities[:id])
126
- .join(all_entities, Arel::Nodes::RightOuterJoin)
127
- .on(join_condition)
128
- .with(active_entities_cte, all_entities_cte)
129
- .where(where_condition)
130
- end
131
-
132
- private
133
-
134
- def all_entities_query(all_manager_uuids_scope, all_manager_uuids_timestamp)
135
- all_entities_query = inventory_collection.full_collection_for_comparison
136
- all_entities_query = all_entities_query.active if inventory_collection.retention_strategy == :archive
137
-
138
- if all_manager_uuids_scope
139
- scope_keys = all_manager_uuids_scope.first.keys.map { |x| association_to_foreign_key_mapping[x.to_sym] }.map(&:to_s)
140
- scope = load_scope(all_manager_uuids_scope)
141
- condition = inventory_collection.build_multi_selection_condition(scope, scope_keys)
142
- all_entities_query = all_entities_query.where(condition)
143
- end
144
-
145
- if all_manager_uuids_timestamp && supports_column?(:resource_timestamp)
146
- all_manager_uuids_timestamp = Time.parse(all_manager_uuids_timestamp).utc
147
-
148
- date_field = model_class.arel_table[:resource_timestamp]
149
- all_entities_query = all_entities_query.where(date_field.lt(all_manager_uuids_timestamp))
150
- end
151
- all_entities_query
152
- end
153
-
154
- def load_scope(all_manager_uuids_scope)
155
- scope_keys = all_manager_uuids_scope.first.keys.to_set
156
-
157
- all_manager_uuids_scope.map do |cond|
158
- assert_scope!(scope_keys, cond)
159
-
160
- cond.map do |key, value|
161
- foreign_key = association_to_foreign_key_mapping[key.to_sym]
162
- foreign_key_value = value.load&.id
163
-
164
- assert_foreign_keys!(key, value, foreign_key, foreign_key_value)
165
-
166
- [foreign_key, foreign_key_value]
167
- end.to_h
168
- end
169
- end
170
-
171
- def assert_scope!(scope_keys, cond)
172
- if cond.keys.to_set != scope_keys
173
- raise "'#{inventory_collection}' expected keys for :all_manager_uuids_scope are #{scope_keys.to_a}, got"\
174
- " #{cond.keys}. Keys must be the same for all scopes provided."
175
- end
176
- end
177
-
178
- def assert_foreign_keys!(key, value, foreign_key, foreign_key_value)
179
- unless foreign_key
180
- raise "'#{inventory_collection}' doesn't have relation :#{key} provided in :all_manager_uuids_scope."
181
- end
182
-
183
- unless foreign_key_value
184
- raise "'#{inventory_collection}' couldn't load scope value :#{key} => #{value.inspect} provided in :all_manager_uuids_scope"
185
- end
186
- end
187
-
188
- def active_entities_query(all_attribute_keys_array, manager_uuids)
189
- connection = ActiveRecord::Base.connection
190
-
191
- all_attribute_keys_array_q = all_attribute_keys_array.map { |x| quote_column_name(x) }
192
- # For Postgre, only first set of values should contain the type casts
193
- first_value = manager_uuids.shift.to_h
194
- first_value = "(#{all_attribute_keys_array.map { |x| quote(connection, first_value[x], x, true) }.join(",")})"
195
-
196
- # Rest of the values, without the type cast
197
- values = manager_uuids.map! do |hash|
198
- "(#{all_attribute_keys_array.map { |x| quote(connection, hash[x], x, false) }.join(",")})"
199
- end.join(",")
200
-
201
- values = values.blank? ? first_value : [first_value, values].join(",")
202
-
203
- <<-SQL
204
- SELECT *
205
- FROM (VALUES #{values}) AS active_entities_table(#{all_attribute_keys_array_q.join(",")})
206
- SQL
207
- end
208
83
  end
209
84
  end
210
85
  end
@@ -27,9 +27,9 @@ module InventoryRefresh::SaveCollection
27
27
  all_attribute_keys_array << :id
28
28
 
29
29
  # If there is not version attribute, the version conditions will be ignored
30
- version_attribute = if inventory_collection.parallel_safe? && supports_remote_data_timestamp?(all_attribute_keys)
30
+ version_attribute = if supports_remote_data_timestamp?(all_attribute_keys)
31
31
  :resource_timestamp
32
- elsif inventory_collection.parallel_safe? && supports_remote_data_version?(all_attribute_keys)
32
+ elsif supports_remote_data_version?(all_attribute_keys)
33
33
  :resource_counter
34
34
  end
35
35
 
@@ -130,13 +130,9 @@ module InventoryRefresh::SaveCollection
130
130
  end
131
131
 
132
132
  def update_query_returning
133
- if inventory_collection.parallel_safe?
134
- <<-SQL
135
- RETURNING updated_values.#{quote_column_name("id")}, #{unique_index_columns.map { |x| "updated_values.#{quote_column_name(x)}" }.join(",")}
136
- SQL
137
- else
138
- ""
139
- end
133
+ <<-SQL
134
+ RETURNING updated_values.#{quote_column_name("id")}, #{unique_index_columns.map { |x| "updated_values.#{quote_column_name(x)}" }.join(",")}
135
+ SQL
140
136
  end
141
137
  end
142
138
  end
@@ -55,8 +55,6 @@ module InventoryRefresh::SaveCollection
55
55
  end
56
56
 
57
57
  def insert_query_on_conflict_behavior(all_attribute_keys, on_conflict, mode, ignore_cols, column_name)
58
- return "" unless inventory_collection.parallel_safe?
59
-
60
58
  insert_query_on_conflict = insert_query_on_conflict_do(on_conflict)
61
59
  if on_conflict == :do_update
62
60
  insert_query_on_conflict += insert_query_on_conflict_update(all_attribute_keys, mode, ignore_cols, column_name)
@@ -94,8 +92,6 @@ module InventoryRefresh::SaveCollection
94
92
  :resource_counter
95
93
  end
96
94
 
97
- # TODO(lsmola) should we add :deleted => false to the update clause? That should handle a reconnect, without a
98
- # a need to list :deleted anywhere in the parser. We just need to check that a model has the :deleted attribute
99
95
  query = <<-SQL
100
96
  SET #{(all_attribute_keys - ignore_cols).map { |key| build_insert_set_cols(key) }.join(", ")}
101
97
  SQL
@@ -128,7 +124,7 @@ module InventoryRefresh::SaveCollection
128
124
  , #{attr_partial} = '{}', #{attr_partial_max} = NULL
129
125
 
130
126
  WHERE EXCLUDED.#{attr_full} IS NULL OR (
131
- (#{q_table_name}.#{attr_full} IS NULL OR EXCLUDED.#{attr_full} > #{q_table_name}.#{attr_full}) AND
127
+ (#{q_table_name}.#{attr_full} IS NULL OR EXCLUDED.#{attr_full} >= #{q_table_name}.#{attr_full}) AND
132
128
  (#{q_table_name}.#{attr_partial_max} IS NULL OR EXCLUDED.#{attr_full} >= #{q_table_name}.#{attr_partial_max})
133
129
  )
134
130
  SQL
@@ -154,9 +150,9 @@ module InventoryRefresh::SaveCollection
154
150
  #{insert_query_set_jsonb_version(cast, attr_partial, attr_partial_max, column_name)}
155
151
  , #{attr_partial_max} = greatest(#{q_table_name}.#{attr_partial_max}::#{cast}, EXCLUDED.#{attr_partial_max}::#{cast})
156
152
  WHERE EXCLUDED.#{attr_partial_max} IS NULL OR (
157
- (#{q_table_name}.#{attr_full} IS NULL OR EXCLUDED.#{attr_partial_max} > #{q_table_name}.#{attr_full}) AND (
153
+ (#{q_table_name}.#{attr_full} IS NULL OR EXCLUDED.#{attr_partial_max} >= #{q_table_name}.#{attr_full}) AND (
158
154
  (#{q_table_name}.#{attr_partial}->>'#{column_name}')::#{cast} IS NULL OR
159
- EXCLUDED.#{attr_partial_max}::#{cast} > (#{q_table_name}.#{attr_partial}->>'#{column_name}')::#{cast}
155
+ EXCLUDED.#{attr_partial_max}::#{cast} >= (#{q_table_name}.#{attr_partial}->>'#{column_name}')::#{cast}
160
156
  )
161
157
  )
162
158
  SQL
@@ -183,16 +179,12 @@ module InventoryRefresh::SaveCollection
183
179
  end
184
180
 
185
181
  def insert_query_returning_timestamps
186
- if inventory_collection.parallel_safe?
187
- # For upsert, we'll return also created and updated timestamps, so we can recognize what was created and what
188
- # updated
189
- if inventory_collection.internal_timestamp_columns.present?
190
- <<-SQL
191
- , #{inventory_collection.internal_timestamp_columns.map { |x| quote_column_name(x) }.join(",")}
192
- SQL
193
- end
194
- else
195
- ""
182
+ # For upsert, we'll return also created and updated timestamps, so we can recognize what was created and what
183
+ # updated
184
+ if inventory_collection.internal_timestamp_columns.present?
185
+ <<-SQL
186
+ , #{inventory_collection.internal_timestamp_columns.map { |x| quote_column_name(x) }.join(",")}
187
+ SQL
196
188
  end
197
189
  end
198
190
  end
@@ -1,5 +1,7 @@
1
+ require "inventory_refresh/exception"
1
2
  require "inventory_refresh/logging"
2
3
  require "inventory_refresh/save_collection/saver/retention_helper"
4
+ require "inventory_refresh/inventory_collection/index/type/local_db"
3
5
 
4
6
  module InventoryRefresh::SaveCollection
5
7
  class Sweeper < InventoryRefresh::SaveCollection::Base
@@ -10,42 +12,111 @@ module InventoryRefresh::SaveCollection
10
12
  # @param _ems [ActiveRecord] Manager owning the inventory_collections
11
13
  # @param inventory_collections [Array<InventoryRefresh::InventoryCollection>] Array of InventoryCollection objects
12
14
  # for sweeping
15
+ # @param sweep_scope [Array<String, Symbol, Hash>] Array of inventory collection names marking sweep. Or for
16
+ # targeted sweeping it's array of hashes, where key is inventory collection name pointing to an array of
17
+ # identifiers of inventory objects we want to target for sweeping.
13
18
  # @param refresh_state [ActiveRecord] Record of :refresh_states
14
- def sweep(_ems, inventory_collections, refresh_state)
19
+ def sweep(_ems, inventory_collections, sweep_scope, refresh_state)
20
+ scope_set = build_scope_set(sweep_scope)
21
+
15
22
  inventory_collections.each do |inventory_collection|
16
- next unless sweep_possible?(inventory_collection, refresh_state)
23
+ next unless sweep_possible?(inventory_collection, scope_set)
17
24
 
18
- new(inventory_collection, refresh_state).sweep
25
+ new(inventory_collection, refresh_state, sweep_scope).sweep
19
26
  end
20
27
  end
21
28
 
22
- def sweep_possible?(inventory_collection, refresh_state)
23
- inventory_collection.supports_column?(:last_seen_at) && inventory_collection.parallel_safe? &&
24
- inventory_collection.strategy == :local_db_find_missing_references &&
25
- in_scope?(inventory_collection, refresh_state.sweep_scope)
29
+ def sweep_possible?(inventory_collection, scope_set)
30
+ inventory_collection.supports_column?(:last_seen_at) && in_scope?(inventory_collection, scope_set)
31
+ end
32
+
33
+ def in_scope?(inventory_collection, scope_set)
34
+ scope_set.include?(inventory_collection&.name)
26
35
  end
27
36
 
28
- def in_scope?(inventory_collection, sweep_scope)
29
- return true unless sweep_scope
37
+ def build_scope_set(sweep_scope)
38
+ return [] unless sweep_scope
30
39
 
31
40
  if sweep_scope.kind_of?(Array)
32
- return true if sweep_scope.include?(inventory_collection&.name&.to_s)
41
+ sweep_scope.map(&:to_sym).to_set
42
+ elsif sweep_scope.kind_of?(Hash)
43
+ sweep_scope.keys.map(&:to_sym).to_set
33
44
  end
34
-
35
- false
36
45
  end
37
46
  end
38
47
 
39
48
  include InventoryRefresh::SaveCollection::Saver::RetentionHelper
40
49
 
41
- attr_reader :inventory_collection, :refresh_state, :model_class, :primary_key
50
+ attr_reader :inventory_collection, :refresh_state, :sweep_scope, :model_class, :primary_key
42
51
 
43
- def initialize(inventory_collection, refresh_state)
52
+ delegate :inventory_object_lazy?,
53
+ :inventory_object?,
54
+ :to => :inventory_collection
55
+
56
+ def initialize(inventory_collection, refresh_state, sweep_scope)
44
57
  @inventory_collection = inventory_collection
45
- @refresh_state = refresh_state
46
58
 
47
- @model_class = inventory_collection.model_class
48
- @primary_key = @model_class.primary_key
59
+ @refresh_state = refresh_state
60
+ @sweep_scope = sweep_scope
61
+
62
+ @model_class = inventory_collection.model_class
63
+ @primary_key = @model_class.primary_key
64
+ end
65
+
66
+ def apply_targeted_sweep_scope(all_entities_query)
67
+ if sweep_scope.kind_of?(Hash)
68
+ scope = sweep_scope[inventory_collection.name]
69
+ return all_entities_query if scope.nil? || scope.empty?
70
+
71
+ # Scan the scope to find all references, so we can load them from DB in batches
72
+ scan_sweep_scope!(scope)
73
+
74
+ scope_keys = Set.new
75
+ conditions = scope.map { |x| InventoryRefresh::InventoryObject.attributes_with_keys(x, inventory_collection, scope_keys) }
76
+ assert_conditions!(conditions, scope_keys)
77
+
78
+ all_entities_query.where(inventory_collection.build_multi_selection_condition(conditions, scope_keys))
79
+ else
80
+ all_entities_query
81
+ end
82
+ end
83
+
84
+ def loadable?(value)
85
+ inventory_object_lazy?(value) || inventory_object?(value)
86
+ end
87
+
88
+ def scan_sweep_scope!(scope)
89
+ scope.each do |sc|
90
+ sc.each_value do |value|
91
+ next unless loadable?(value)
92
+
93
+ value_inventory_collection = value.inventory_collection
94
+ value_inventory_collection.add_reference(value.reference, :key => value.key)
95
+ end
96
+ end
97
+ end
98
+
99
+ def assert_conditions!(conditions, scope_keys)
100
+ conditions.each do |cond|
101
+ assert_uniform_keys!(cond, scope_keys)
102
+ assert_non_existent_keys!(cond)
103
+ end
104
+ end
105
+
106
+ def assert_uniform_keys!(cond, scope_keys)
107
+ return if (diff = (scope_keys - cond.keys.to_set)).empty?
108
+
109
+ raise(InventoryRefresh::Exception::SweeperNonUniformScopeKeyFoundError,
110
+ "Sweeping scope for #{inventory_collection} contained non uniform keys. All keys for the"\
111
+ "scope must be the same, it's possible to send multiple sweeps with different key set. Missing keys"\
112
+ " for a scope were: #{diff.to_a}")
113
+ end
114
+
115
+ def assert_non_existent_keys!(cond)
116
+ return if (diff = (cond.keys.to_set - inventory_collection.all_column_names)).empty?
117
+
118
+ raise(InventoryRefresh::Exception::SweeperNonExistentScopeKeyFoundError,
119
+ "Sweeping scope for #{inventory_collection} contained keys that are not columns: #{diff.to_a}")
49
120
  end
50
121
 
51
122
  def sweep
@@ -55,7 +126,9 @@ module InventoryRefresh::SaveCollection
55
126
  table = model_class.arel_table
56
127
  date_field = table[:last_seen_at]
57
128
  all_entities_query = inventory_collection.full_collection_for_comparison
58
- all_entities_query.active if inventory_collection.retention_strategy == :archive
129
+ all_entities_query = all_entities_query.active if inventory_collection.retention_strategy == :archive && inventory_collection.supports_column?(:archived_at)
130
+
131
+ all_entities_query = apply_targeted_sweep_scope(all_entities_query)
59
132
 
60
133
  query = all_entities_query
61
134
  .where(date_field.lt(refresh_start)).or(all_entities_query.where(:last_seen_at => nil))