rocketjob 5.4.0.beta2 → 6.0.0.rc3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +149 -5
  3. data/bin/rocketjob_batch_perf +1 -1
  4. data/bin/rocketjob_perf +1 -1
  5. data/lib/rocket_job/batch.rb +3 -1
  6. data/lib/rocket_job/batch/categories.rb +341 -0
  7. data/lib/rocket_job/batch/io.rb +128 -60
  8. data/lib/rocket_job/batch/model.rb +20 -68
  9. data/lib/rocket_job/batch/performance.rb +19 -7
  10. data/lib/rocket_job/batch/statistics.rb +34 -12
  11. data/lib/rocket_job/batch/tabular.rb +2 -0
  12. data/lib/rocket_job/batch/tabular/input.rb +8 -6
  13. data/lib/rocket_job/batch/tabular/output.rb +4 -2
  14. data/lib/rocket_job/batch/throttle_running_workers.rb +8 -17
  15. data/lib/rocket_job/batch/worker.rb +27 -24
  16. data/lib/rocket_job/category/base.rb +78 -0
  17. data/lib/rocket_job/category/input.rb +110 -0
  18. data/lib/rocket_job/category/output.rb +25 -0
  19. data/lib/rocket_job/cli.rb +25 -17
  20. data/lib/rocket_job/dirmon_entry.rb +22 -12
  21. data/lib/rocket_job/event.rb +1 -1
  22. data/lib/rocket_job/extensions/iostreams/path.rb +32 -0
  23. data/lib/rocket_job/extensions/mongoid/contextual/mongo.rb +2 -2
  24. data/lib/rocket_job/extensions/mongoid/factory.rb +4 -12
  25. data/lib/rocket_job/extensions/mongoid/stringified_symbol.rb +50 -0
  26. data/lib/rocket_job/extensions/psych/yaml_tree.rb +8 -0
  27. data/lib/rocket_job/extensions/rocket_job_adapter.rb +2 -2
  28. data/lib/rocket_job/jobs/conversion_job.rb +39 -0
  29. data/lib/rocket_job/jobs/dirmon_job.rb +2 -2
  30. data/lib/rocket_job/jobs/housekeeping_job.rb +7 -7
  31. data/lib/rocket_job/jobs/on_demand_batch_job.rb +17 -6
  32. data/lib/rocket_job/jobs/on_demand_job.rb +1 -2
  33. data/lib/rocket_job/jobs/performance_job.rb +3 -1
  34. data/lib/rocket_job/jobs/re_encrypt/relational_job.rb +103 -96
  35. data/lib/rocket_job/jobs/upload_file_job.rb +44 -8
  36. data/lib/rocket_job/lookup_collection.rb +69 -0
  37. data/lib/rocket_job/plugins/job/model.rb +25 -50
  38. data/lib/rocket_job/plugins/job/throttle.rb +2 -2
  39. data/lib/rocket_job/plugins/job/throttle_running_jobs.rb +12 -4
  40. data/lib/rocket_job/plugins/job/worker.rb +2 -7
  41. data/lib/rocket_job/plugins/restart.rb +12 -5
  42. data/lib/rocket_job/plugins/state_machine.rb +2 -1
  43. data/lib/rocket_job/plugins/throttle_dependent_jobs.rb +38 -0
  44. data/lib/rocket_job/ractor_worker.rb +42 -0
  45. data/lib/rocket_job/server/model.rb +1 -1
  46. data/lib/rocket_job/sliced.rb +15 -70
  47. data/lib/rocket_job/sliced/bzip2_output_slice.rb +1 -1
  48. data/lib/rocket_job/sliced/input.rb +1 -1
  49. data/lib/rocket_job/sliced/slice.rb +5 -13
  50. data/lib/rocket_job/sliced/slices.rb +14 -2
  51. data/lib/rocket_job/sliced/writer/output.rb +33 -45
  52. data/lib/rocket_job/subscribers/server.rb +1 -1
  53. data/lib/rocket_job/thread_worker.rb +46 -0
  54. data/lib/rocket_job/throttle_definitions.rb +7 -1
  55. data/lib/rocket_job/version.rb +1 -1
  56. data/lib/rocket_job/worker.rb +21 -55
  57. data/lib/rocket_job/worker_pool.rb +5 -7
  58. data/lib/rocketjob.rb +53 -43
  59. metadata +36 -26
  60. data/lib/rocket_job/extensions/mongoid/remove_warnings.rb +0 -12
  61. data/lib/rocket_job/jobs/on_demand_batch_tabular_job.rb +0 -28
@@ -2,7 +2,11 @@ require "active_support/concern"
2
2
 
3
3
  module RocketJob
4
4
  module Batch
5
- # Allow statistics to be gathered while a batch job is running
5
+ # Allow statistics to be gathered while a batch job is running.
6
+ #
7
+ # Notes:
8
+ # - Statistics for successfully processed records within a slice are saved.
9
+ # - Statistics gathered during a perform that then results in an exception are discarded.
6
10
  module Statistics
7
11
  extend ActiveSupport::Concern
8
12
 
@@ -45,34 +49,52 @@ module RocketJob
45
49
  last = paths.pop
46
50
  return unless last
47
51
 
48
- target = paths.inject(in_memory) { |target, key| target.key?(key) ? target[key] : target[key] = Hash.new(0) }
49
- target[last] += increment
52
+ last_target = paths.inject(in_memory) do |target, sub_key|
53
+ target.key?(sub_key) ? target[sub_key] : target[sub_key] = Hash.new(0)
54
+ end
55
+ last_target[last] += increment
50
56
  end
51
57
  end
52
58
 
53
59
  included do
54
60
  field :statistics, type: Hash, default: -> { Hash.new(0) }
55
61
 
56
- around_slice :statistics_capture
62
+ around_slice :rocket_job_statistics_capture
63
+ after_perform :rocket_job_statistics_commit
57
64
  end
58
65
 
59
66
  # Increment a statistic
60
67
  def statistics_inc(key, increment = 1)
61
68
  return if key.nil? || key == ""
62
69
 
63
- # Being called within tests outside of a perform
64
- @slice_statistics ||= Stats.new(new_record? ? statistics : nil)
65
- key.is_a?(Hash) ? @slice_statistics.inc(key) : @slice_statistics.inc_key(key, increment)
70
+ (@rocket_job_perform_statistics ||= []) << (key.is_a?(Hash) ? key : [key, increment])
66
71
  end
67
72
 
68
73
  private
69
74
 
70
- # Capture the number of successful and failed tradelines
71
- # as well as those with notices and alerts.
72
- def statistics_capture
73
- @slice_statistics = Stats.new(new_record? ? statistics : nil)
75
+ def rocket_job_statistics_capture
76
+ @rocket_job_perform_statistics = nil
77
+ @rocket_job_slice_statistics = nil
74
78
  yield
75
- collection.update_one({_id: id}, {"$inc" => @slice_statistics.stats}) unless @slice_statistics.empty?
79
+ ensure
80
+ if @rocket_job_slice_statistics && !@rocket_job_slice_statistics.empty?
81
+ collection.update_one({_id: id}, {"$inc" => @rocket_job_slice_statistics.stats})
82
+ end
83
+ end
84
+
85
+ def rocket_job_slice_statistics
86
+ @rocket_job_slice_statistics ||= Stats.new(new_record? ? statistics : nil)
87
+ end
88
+
89
+ # Apply stats gathered during the perform to the slice level stats
90
+ def rocket_job_statistics_commit
91
+ return unless @rocket_job_perform_statistics
92
+
93
+ @rocket_job_perform_statistics.each do |key|
94
+ key.is_a?(Hash) ? rocket_job_slice_statistics.inc(key) : rocket_job_slice_statistics.inc_key(*key)
95
+ end
96
+
97
+ @rocket_job_perform_statistics = nil
76
98
  end
77
99
 
78
100
  # Overrides RocketJob::Batch::Logger#rocket_job_batch_log_payload
@@ -12,6 +12,8 @@ module RocketJob
12
12
  # )
13
13
  #
14
14
  # tabular.render(row)
15
+ #
16
+ # @deprecated
15
17
  class Tabular
16
18
  autoload :Input, "rocket_job/batch/tabular/input"
17
19
  autoload :Output, "rocket_job/batch/tabular/output"
@@ -3,15 +3,15 @@ require "active_support/concern"
3
3
  module RocketJob
4
4
  module Batch
5
5
  class Tabular
6
- # For the simple case where all `input_categories` have the same format,
7
- # If multiple input categories are used with different formats, then use IOStreams::Tabular directly
8
- # instead of this plugin.
6
+ # @deprecated
9
7
  module Input
10
8
  extend ActiveSupport::Concern
11
9
 
12
10
  included do
11
+ warn "#{name} is using RocketJob::Batch::Tabular::Input which is deprecated"
12
+
13
13
  field :tabular_input_header, type: Array, class_attribute: true, user_editable: true
14
- field :tabular_input_format, type: Symbol, default: :csv, class_attribute: true, user_editable: true
14
+ field :tabular_input_format, type: Mongoid::StringifiedSymbol, default: :csv, class_attribute: true, user_editable: true
15
15
  field :tabular_input_options, type: Hash, class_attribute: true
16
16
 
17
17
  # tabular_input_mode: [:line | :array | :hash]
@@ -22,7 +22,7 @@ module RocketJob
22
22
  # :hash
23
23
  # Parses each line from the file into a Hash and uploads each hash for processing by workers.
24
24
  # See IOStreams#each.
25
- field :tabular_input_mode, type: Symbol, default: :line, class_attribute: true, user_editable: true, copy_on_restart: true
25
+ field :tabular_input_mode, type: Mongoid::StringifiedSymbol, default: :line, class_attribute: true, user_editable: true, copy_on_restart: true
26
26
 
27
27
  validates_inclusion_of :tabular_input_format, in: IOStreams::Tabular.registered_formats
28
28
  validates_inclusion_of :tabular_input_mode, in: %i[line array hash row record]
@@ -119,7 +119,9 @@ module RocketJob
119
119
  end
120
120
 
121
121
  def tabular_input_header_present
122
- if tabular_input_header.present? || !tabular_input.header? || (tabular_input_mode == :hash || tabular_input_mode == :record)
122
+ if tabular_input_header.present? ||
123
+ !tabular_input.header? ||
124
+ (tabular_input_mode == :hash || tabular_input_mode == :record)
123
125
  return
124
126
  end
125
127
 
@@ -10,8 +10,10 @@ module RocketJob
10
10
  extend ActiveSupport::Concern
11
11
 
12
12
  included do
13
+ warn "#{name} is using RocketJob::Batch::Tabular::Output which is deprecated"
14
+
13
15
  field :tabular_output_header, type: Array, class_attribute: true, user_editable: true, copy_on_restart: true
14
- field :tabular_output_format, type: Symbol, default: :csv, class_attribute: true, user_editable: true, copy_on_restart: true
16
+ field :tabular_output_format, type: Mongoid::StringifiedSymbol, default: :csv, class_attribute: true, user_editable: true, copy_on_restart: true
15
17
  field :tabular_output_options, type: Hash, class_attribute: true
16
18
 
17
19
  validates_inclusion_of :tabular_output_format, in: IOStreams::Tabular.registered_formats
@@ -55,7 +57,7 @@ module RocketJob
55
57
 
56
58
  # Render the output from the perform.
57
59
  def tabular_output_render
58
- return unless collect_output?
60
+ return unless output_categories.present?
59
61
 
60
62
  @rocket_job_output = tabular_output.render(@rocket_job_output)
61
63
  end
@@ -37,34 +37,25 @@ module RocketJob
37
37
  validates :throttle_running_workers, numericality: {greater_than_or_equal_to: 0}, allow_nil: true
38
38
 
39
39
  define_batch_throttle :throttle_running_workers_exceeded?, filter: :throttle_filter_id
40
-
41
- # Deprecated. For backward compatibility.
42
- alias_method :throttle_running_slices, :throttle_running_workers
43
- alias_method :throttle_running_slices=, :throttle_running_workers=
44
40
  end
45
41
 
46
42
  private
47
43
 
48
- # Returns [Boolean] whether the throttle for this job has been exceeded
44
+ # Returns [true|false] whether the throttle for this job has been exceeded
49
45
  def throttle_running_workers_exceeded?(slice)
50
- return unless throttle_running_workers&.positive?
46
+ return false unless throttle_running_workers&.positive?
51
47
 
52
48
  input.running.with(read: {mode: :primary}) do |conn|
53
49
  conn.where(:id.ne => slice.id).count >= throttle_running_workers
54
50
  end
55
51
  end
56
52
 
57
- # Returns [Boolean] whether the throttle for this job has been exceeded
58
- #
59
- # With a Batch job, allow a higher priority queued job to replace a running one with
60
- # a lower priority.
61
- def throttle_running_jobs_exceeded?
62
- return unless throttle_running_jobs&.positive?
63
-
64
- # Cannot use this class since it will include instances of parent job classes.
65
- RocketJob::Job.with(read: {mode: :primary}) do |conn|
66
- conn.running.where("_type" => self.class.name, :id.ne => id, :priority.lte => priority).count >= throttle_running_jobs
67
- end
53
+ # Allows another job with a higher priority to start even though this one is running already
54
+ # @overrides RocketJob::Plugins::Job::ThrottleRunningJobs#throttle_running_jobs_base_query
55
+ def throttle_running_jobs_base_query
56
+ query = super
57
+ query[:priority.lte] = priority if throttle_running_workers&.positive?
58
+ query
68
59
  end
69
60
  end
70
61
  end
@@ -23,9 +23,6 @@ module RocketJob
23
23
  #
24
24
  # Slices are destroyed after their records are successfully processed
25
25
  #
26
- # Results are stored in the output collection if `collect_output?`
27
- # `nil` results from workers are kept if `collect_nil_output`
28
- #
29
26
  # If an exception was thrown the entire slice of records is marked as failed.
30
27
  #
31
28
  # Thread-safe, can be called by multiple threads at the same time
@@ -40,7 +37,8 @@ module RocketJob
40
37
 
41
38
  SemanticLogger.named_tagged(job: id.to_s) do
42
39
  until worker.shutdown?
43
- if slice = input.next_slice(worker.name)
40
+ slice = input.next_slice(worker.name)
41
+ if slice
44
42
  # Grab a slice before checking the throttle to reduce concurrency race condition.
45
43
  return true if slice.fail_on_exception!(re_raise_exceptions) { rocket_job_batch_throttled?(slice, worker) }
46
44
  next if slice.failed?
@@ -97,8 +95,8 @@ module RocketJob
97
95
  servers = []
98
96
  case sub_state
99
97
  when :before, :after
100
- unless server_name && !worker_on_server?(server_name)
101
- servers << ActiveWorker.new(worker_name, started_at, self) if running?
98
+ if running? && (server_name.nil? || worker_on_server?(server_name))
99
+ servers << ActiveWorker.new(worker_name, started_at, self)
102
100
  end
103
101
  when :processing
104
102
  query = input.running
@@ -143,19 +141,23 @@ module RocketJob
143
141
 
144
142
  # Perform individual slice without callbacks
145
143
  def rocket_job_perform_slice(slice, &block)
146
- count = 0
147
- RocketJob::Sliced::Writer::Output.collect(self, slice) do |writer|
148
- records = slice.records
149
-
150
- # Skip records already processed, if any.
151
- # slice.processing_record_number ||= 0
152
- # TODO: Must append to existing output slices before this can be enabled.
153
- # if !collect_output && (slice.processing_record_number > 1)
154
- # records = records[slice.processing_record_number - 1..-1]
155
- # end
156
- # Until the changes above have been implemented, reprocess all records in the slice.
144
+ slice.processing_record_number ||= 0
145
+ records = []
146
+ append = false
147
+
148
+ # Skip processed records in this slice if it has no output categpries.
149
+ if slice.processing_record_number > 1
150
+ records = slice.records[slice.processing_record_number - 1..-1]
151
+ append = true
152
+ logger.info("Resuming previously incomplete slice from record number #{slice.processing_record_number}")
153
+ else
154
+ # Reprocess all records in this slice.
157
155
  slice.processing_record_number = 0
156
+ records = slice.records
157
+ end
158
158
 
159
+ count = 0
160
+ RocketJob::Sliced::Writer::Output.collect(self, input_slice: slice, append: append) do |writer|
159
161
  records.each do |record|
160
162
  slice.processing_record_number += 1
161
163
  SemanticLogger.named_tagged(record: slice.current_record_number) do
@@ -174,8 +176,8 @@ module RocketJob
174
176
  return block_given? ? yield(record) : perform(record) if _perform_callbacks.empty?
175
177
 
176
178
  # @rocket_job_input and @rocket_job_output can be modified by before/around callbacks
177
- @rocket_job_input = record
178
- @rocket_job_output = nil
179
+ @rocket_job_input = record
180
+ @rocket_job_output = nil
179
181
 
180
182
  run_callbacks(:perform) do
181
183
  @rocket_job_output =
@@ -186,9 +188,9 @@ module RocketJob
186
188
  end
187
189
  end
188
190
 
189
- @rocket_job_input = nil
190
- result = @rocket_job_output
191
- @rocket_job_output = nil
191
+ @rocket_job_input = nil
192
+ result = @rocket_job_output
193
+ @rocket_job_output = nil
192
194
  result
193
195
  end
194
196
 
@@ -305,11 +307,12 @@ module RocketJob
305
307
  # Run Batch before and after callbacks
306
308
  def rocket_job_batch_callbacks(worker)
307
309
  # If this is the first worker to pickup this job
308
- if sub_state == :before
310
+ case sub_state
311
+ when :before
309
312
  rocket_job_batch_run_before_callbacks
310
313
  # Check for 0 record jobs
311
314
  rocket_job_batch_complete?(worker.name) if running?
312
- elsif sub_state == :after
315
+ when sub_state == :after
313
316
  rocket_job_batch_run_after_callbacks
314
317
  end
315
318
  end
@@ -0,0 +1,78 @@
1
+ require "active_support/concern"
2
+
3
+ module RocketJob
4
+ module Category
5
+ # Define the layout for each category of input or output data
6
+ module Base
7
+ extend ActiveSupport::Concern
8
+
9
+ included do
10
+ field :name, type: ::Mongoid::StringifiedSymbol, default: :main
11
+
12
+ # Whether to compress, encrypt, or use the bzip2 serialization for data in this category.
13
+ field :serializer, type: ::Mongoid::StringifiedSymbol, default: :compress
14
+ validates_inclusion_of :serializer, in: %i[none compress encrypt bzip2]
15
+
16
+ # The header columns when the file does not include a header row.
17
+ # Note:
18
+ # - All column names must be strings so that it can be serialized into MongoDB.
19
+ field :columns, type: Array
20
+
21
+ # On an input collection `format` specifies the format of the input data so that it can be
22
+ # transformed into a Hash when passed into the `#perform` method.
23
+ #
24
+ # On an output collection `format` specifies the format to transform the output hash into.
25
+ #
26
+ # `:auto` it uses the `file_name` on this category to determine the format.
27
+ # `nil` no transformation is performed on the data returned by the `#perform` method.
28
+ # Any other format supported by IOStreams, for example: csv, :hash, :array, :json, :psv, :fixed
29
+ #
30
+ # Default: `nil`
31
+ field :format, type: ::Mongoid::StringifiedSymbol
32
+ validates_inclusion_of :format, in: [nil, :auto] + IOStreams::Tabular.registered_formats
33
+
34
+ # Any specialized format specific options. For example, `:fixed` format requires a `:layout`.
35
+ field :format_options, type: Hash
36
+
37
+ # When `:format` is not supplied the file name can be used to infer the required format.
38
+ # Optional.
39
+ # Default: nil
40
+ field :file_name, type: IOStreams::Path
41
+ end
42
+
43
+ # Return which slice serializer class to use that matches the current options.
44
+ def serializer_class
45
+ case serializer
46
+ when :none
47
+ Sliced::Slice
48
+ when :compress
49
+ Sliced::CompressedSlice
50
+ when :encrypt
51
+ Sliced::EncryptedSlice
52
+ when :bzip2
53
+ Sliced::BZip2OutputSlice
54
+ else
55
+ raise(ArgumentError, "serialize: #{serializer.inspect} must be :none, :compress, :encrypt, or :bzip2")
56
+ end
57
+ end
58
+
59
+ def tabular
60
+ @tabular ||= IOStreams::Tabular.new(
61
+ columns: columns,
62
+ format: format == :auto ? nil : format,
63
+ format_options: format_options&.deep_symbolize_keys,
64
+ file_name: file_name
65
+ )
66
+ end
67
+
68
+ def reset_tabular
69
+ @tabular = nil
70
+ end
71
+
72
+ # Returns [true|false] whether this category has the attributes defined for tabular to work.
73
+ def tabular?
74
+ format.present?
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,110 @@
1
+ module RocketJob
2
+ module Category
3
+ # Define the layout for each category of input or output data
4
+ class Input
5
+ include SemanticLogger::Loggable
6
+ include Plugins::Document
7
+ include Category::Base
8
+
9
+ embedded_in :job, class_name: "RocketJob::Job", inverse_of: :input_categories
10
+
11
+ # Slice size for this input collection
12
+ field :slice_size, type: Integer, default: 100
13
+
14
+ #
15
+ # The fields below only apply if the field `format` has been set:
16
+ #
17
+
18
+ # List of columns to allow.
19
+ # Default: nil ( Allow all columns )
20
+ # Note:
21
+ # When supplied any columns that are rejected will be returned in the cleansed columns
22
+ # as nil so that they can be ignored during processing.
23
+ field :allowed_columns, type: Array
24
+
25
+ # List of columns that must be present, otherwise an Exception is raised.
26
+ field :required_columns, type: Array
27
+
28
+ # Whether to skip unknown columns in the uploaded file.
29
+ # Ignores any column that was not found in the `allowed_columns` list.
30
+ #
31
+ # false:
32
+ # Raises IOStreams::Tabular::InvalidHeader when a column is supplied that is not in `allowed_columns`.
33
+ # true:
34
+ # Ignore additional columns in a file that are not listed in `allowed_columns`
35
+ # Job processing will skip the additional columns entirely as if they were not supplied at all.
36
+ # A warning is logged with the names of the columns that were ignored.
37
+ # The `columns` field will list all skipped columns with a nil value so that downstream workers
38
+ # know to ignore those columns.
39
+ #
40
+ # Notes:
41
+ # - Only applicable when `allowed_columns` has been set.
42
+ # - Recommended to leave as `false` otherwise a misspelled column can result in missed columns.
43
+ field :skip_unknown, type: ::Mongoid::Boolean, default: false
44
+ validates_inclusion_of :skip_unknown, in: [true, false]
45
+
46
+ # When `#upload` is called with a file_name, it uploads the file using any of the following approaches:
47
+ # :line
48
+ # Uploads the file a line (String) at a time for processing by workers.
49
+ # This is the default behavior and is the most performant since it leaves the parsing of each line
50
+ # up to the workers themselves.
51
+ # :array
52
+ # Parses each line from the file as an Array and uploads each array for processing by workers.
53
+ # Every line in the input file is parsed and converted into an array before uploading.
54
+ # This approach ensures that the entire files is valid before starting to process it.
55
+ # Ideal for when files may contain invalid lines.
56
+ # Not recommended for large files since the CSV or other parsing is performed sequentially during the
57
+ # upload process.
58
+ # :hash
59
+ # Parses each line from the file into a Hash and uploads each hash for processing by workers.
60
+ # Similar to :array above in that the entire file is parsed before processing is started.
61
+ # Slightly less efficient than :array since it stores every record as a hash with both the key and value.
62
+ #
63
+ # Recommend using :array when the entire file must be parsed/validated before processing is started, and
64
+ # upload time is not important.
65
+ # See IOStreams#each for more details.
66
+ field :mode, type: ::Mongoid::StringifiedSymbol, default: :line
67
+ validates_inclusion_of :mode, in: %i[line array hash]
68
+
69
+ # When reading tabular input data (e.g. CSV, PSV) the header is automatically cleansed.
70
+ # This removes issues when the input header varies in case and other small ways. See IOStreams::Tabular
71
+ # Currently Supported:
72
+ # :default
73
+ # Each column is cleansed as follows:
74
+ # - Leading and trailing whitespace is stripped.
75
+ # - All characters converted to lower case.
76
+ # - Spaces and '-' are converted to '_'.
77
+ # - All characters except for letters, digits, and '_' are stripped.
78
+ # :none
79
+ # Do not cleanse the columns names supplied in the header row.
80
+ #
81
+ # Note: Submit a ticket if you have other cleansers that you want added.
82
+ field :header_cleanser, type: ::Mongoid::StringifiedSymbol, default: :default
83
+ validates :header_cleanser, inclusion: %i[default none]
84
+
85
+ validates_presence_of :slice_size
86
+
87
+ # Cleanses the header column names when `cleanse_header` is true
88
+ def cleanse_header!
89
+ return unless header_cleanser == :default
90
+
91
+ ignored_columns = tabular.header.cleanse!
92
+ logger.warn("Stripped out invalid columns from custom header", ignored_columns) unless ignored_columns.empty?
93
+
94
+ self.columns = tabular.header.columns
95
+ end
96
+
97
+ def tabular
98
+ @tabular ||= IOStreams::Tabular.new(
99
+ columns: columns,
100
+ format: format == :auto ? nil : format,
101
+ format_options: format_options&.deep_symbolize_keys,
102
+ file_name: file_name,
103
+ allowed_columns: allowed_columns,
104
+ required_columns: required_columns,
105
+ skip_unknown: skip_unknown
106
+ )
107
+ end
108
+ end
109
+ end
110
+ end