rocketjob 5.4.1 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +175 -5
  3. data/bin/rocketjob_batch_perf +1 -1
  4. data/bin/rocketjob_perf +1 -1
  5. data/lib/rocket_job/batch/categories.rb +345 -0
  6. data/lib/rocket_job/batch/io.rb +174 -106
  7. data/lib/rocket_job/batch/model.rb +20 -68
  8. data/lib/rocket_job/batch/performance.rb +19 -7
  9. data/lib/rocket_job/batch/statistics.rb +34 -12
  10. data/lib/rocket_job/batch/throttle_running_workers.rb +2 -6
  11. data/lib/rocket_job/batch/worker.rb +31 -26
  12. data/lib/rocket_job/batch.rb +3 -1
  13. data/lib/rocket_job/category/base.rb +81 -0
  14. data/lib/rocket_job/category/input.rb +170 -0
  15. data/lib/rocket_job/category/output.rb +34 -0
  16. data/lib/rocket_job/cli.rb +25 -17
  17. data/lib/rocket_job/dirmon_entry.rb +23 -13
  18. data/lib/rocket_job/event.rb +1 -1
  19. data/lib/rocket_job/extensions/iostreams/path.rb +32 -0
  20. data/lib/rocket_job/extensions/mongoid/contextual/mongo.rb +2 -2
  21. data/lib/rocket_job/extensions/mongoid/factory.rb +4 -12
  22. data/lib/rocket_job/extensions/mongoid/stringified_symbol.rb +50 -0
  23. data/lib/rocket_job/extensions/psych/yaml_tree.rb +8 -0
  24. data/lib/rocket_job/extensions/rocket_job_adapter.rb +2 -2
  25. data/lib/rocket_job/jobs/conversion_job.rb +43 -0
  26. data/lib/rocket_job/jobs/dirmon_job.rb +25 -36
  27. data/lib/rocket_job/jobs/housekeeping_job.rb +11 -12
  28. data/lib/rocket_job/jobs/on_demand_batch_job.rb +24 -11
  29. data/lib/rocket_job/jobs/on_demand_job.rb +3 -4
  30. data/lib/rocket_job/jobs/performance_job.rb +3 -1
  31. data/lib/rocket_job/jobs/re_encrypt/relational_job.rb +103 -96
  32. data/lib/rocket_job/jobs/upload_file_job.rb +48 -8
  33. data/lib/rocket_job/lookup_collection.rb +69 -0
  34. data/lib/rocket_job/plugins/cron.rb +60 -20
  35. data/lib/rocket_job/plugins/job/model.rb +25 -50
  36. data/lib/rocket_job/plugins/job/persistence.rb +36 -0
  37. data/lib/rocket_job/plugins/job/throttle.rb +2 -2
  38. data/lib/rocket_job/plugins/job/throttle_running_jobs.rb +1 -1
  39. data/lib/rocket_job/plugins/job/worker.rb +2 -7
  40. data/lib/rocket_job/plugins/restart.rb +3 -103
  41. data/lib/rocket_job/plugins/state_machine.rb +4 -3
  42. data/lib/rocket_job/plugins/throttle_dependent_jobs.rb +37 -0
  43. data/lib/rocket_job/ractor_worker.rb +42 -0
  44. data/lib/rocket_job/server/model.rb +1 -1
  45. data/lib/rocket_job/sliced/bzip2_output_slice.rb +18 -19
  46. data/lib/rocket_job/sliced/compressed_slice.rb +3 -6
  47. data/lib/rocket_job/sliced/encrypted_bzip2_output_slice.rb +49 -0
  48. data/lib/rocket_job/sliced/encrypted_slice.rb +4 -6
  49. data/lib/rocket_job/sliced/input.rb +42 -54
  50. data/lib/rocket_job/sliced/slice.rb +12 -16
  51. data/lib/rocket_job/sliced/slices.rb +26 -11
  52. data/lib/rocket_job/sliced/writer/input.rb +46 -18
  53. data/lib/rocket_job/sliced/writer/output.rb +33 -45
  54. data/lib/rocket_job/sliced.rb +1 -74
  55. data/lib/rocket_job/subscribers/server.rb +1 -1
  56. data/lib/rocket_job/thread_worker.rb +46 -0
  57. data/lib/rocket_job/throttle_definitions.rb +7 -1
  58. data/lib/rocket_job/version.rb +1 -1
  59. data/lib/rocket_job/worker.rb +21 -55
  60. data/lib/rocket_job/worker_pool.rb +5 -7
  61. data/lib/rocketjob.rb +53 -43
  62. metadata +36 -28
  63. data/lib/rocket_job/batch/tabular/input.rb +0 -131
  64. data/lib/rocket_job/batch/tabular/output.rb +0 -65
  65. data/lib/rocket_job/batch/tabular.rb +0 -56
  66. data/lib/rocket_job/extensions/mongoid/remove_warnings.rb +0 -12
  67. data/lib/rocket_job/jobs/on_demand_batch_tabular_job.rb +0 -28
@@ -69,12 +69,10 @@ module RocketJob
69
69
  # Return [false] on timeout
70
70
  def join(timeout = 5)
71
71
  while (worker = workers.first)
72
- if worker.join(timeout)
73
- # Worker thread is dead
74
- workers.shift
75
- else
76
- return false
77
- end
72
+ return false unless worker.join(timeout)
73
+
74
+ # Worker thread is dead
75
+ workers.shift
78
76
  end
79
77
  true
80
78
  end
@@ -91,7 +89,7 @@ module RocketJob
91
89
  private
92
90
 
93
91
  def add_one
94
- workers << Worker.new(id: next_worker_id, server_name: server_name)
92
+ workers << ThreadWorker.new(id: next_worker_id, server_name: server_name)
95
93
  rescue StandardError => e
96
94
  logger.fatal("Cannot start worker", e)
97
95
  end
data/lib/rocketjob.rb CHANGED
@@ -3,6 +3,8 @@ require "semantic_logger"
3
3
  require "symmetric-encryption"
4
4
  require "mongoid"
5
5
  require "rocket_job/extensions/mongo/logging"
6
+ require "rocket_job/extensions/iostreams/path"
7
+ require "rocket_job/extensions/psych/yaml_tree"
6
8
  require "rocket_job/version"
7
9
  require "rocket_job/rocket_job"
8
10
  require "rocket_job/config"
@@ -13,28 +15,37 @@ require "rocket_job/extensions/mongoid/clients/options"
13
15
  require "rocket_job/extensions/mongoid/contextual/mongo"
14
16
  require "rocket_job/extensions/mongoid/factory"
15
17
 
16
- # Apply patches for deprecated Symbol type
17
- require "rocket_job/extensions/mongoid/remove_warnings"
18
+ # Backport New StringifiedSymbol type in Mongoid v7.2
19
+ require "rocket_job/extensions/mongoid/stringified_symbol" unless defined?(Mongoid::StringifiedSymbol)
18
20
 
19
21
  # @formatter:off
20
22
  module RocketJob
21
- autoload :ActiveWorker, "rocket_job/active_worker"
22
- autoload :Batch, "rocket_job/batch"
23
- autoload :CLI, "rocket_job/cli"
24
- autoload :DirmonEntry, "rocket_job/dirmon_entry"
25
- autoload :Event, "rocket_job/event"
26
- autoload :Heartbeat, "rocket_job/heartbeat"
27
- autoload :Job, "rocket_job/job"
28
- autoload :JobException, "rocket_job/job_exception"
29
- autoload :Worker, "rocket_job/worker"
30
- autoload :Performance, "rocket_job/performance"
31
- autoload :Server, "rocket_job/server"
32
- autoload :Sliced, "rocket_job/sliced"
33
- autoload :Subscriber, "rocket_job/subscriber"
34
- autoload :Supervisor, "rocket_job/supervisor"
35
- autoload :ThrottleDefinition, "rocket_job/throttle_definition"
36
- autoload :ThrottleDefinitions, "rocket_job/throttle_definitions"
37
- autoload :WorkerPool, "rocket_job/worker_pool"
23
+ autoload :ActiveWorker, "rocket_job/active_worker"
24
+ autoload :Batch, "rocket_job/batch"
25
+ autoload :CLI, "rocket_job/cli"
26
+ autoload :DirmonEntry, "rocket_job/dirmon_entry"
27
+ autoload :Event, "rocket_job/event"
28
+ autoload :Heartbeat, "rocket_job/heartbeat"
29
+ autoload :Job, "rocket_job/job"
30
+ autoload :JobException, "rocket_job/job_exception"
31
+ autoload :LookupCollection, "rocket_job/lookup_collection"
32
+ autoload :Worker, "rocket_job/worker"
33
+ autoload :Performance, "rocket_job/performance"
34
+ autoload :RactorWorker, "rocket_job/ractor_worker"
35
+ autoload :Server, "rocket_job/server"
36
+ autoload :Sliced, "rocket_job/sliced"
37
+ autoload :Subscriber, "rocket_job/subscriber"
38
+ autoload :Supervisor, "rocket_job/supervisor"
39
+ autoload :ThreadWorker, "rocket_job/thread_worker"
40
+ autoload :ThrottleDefinition, "rocket_job/throttle_definition"
41
+ autoload :ThrottleDefinitions, "rocket_job/throttle_definitions"
42
+ autoload :WorkerPool, "rocket_job/worker_pool"
43
+
44
+ module Category
45
+ autoload :Base, "rocket_job/category/base"
46
+ autoload :Input, "rocket_job/category/input"
47
+ autoload :Output, "rocket_job/category/output"
48
+ end
38
49
 
39
50
  module Plugins
40
51
  module Job
@@ -49,38 +60,37 @@ module RocketJob
49
60
  autoload :Transaction, "rocket_job/plugins/job/transaction"
50
61
  autoload :Worker, "rocket_job/plugins/job/worker"
51
62
  end
52
- autoload :Cron, "rocket_job/plugins/cron"
53
- autoload :Document, "rocket_job/plugins/document"
54
- autoload :ProcessingWindow, "rocket_job/plugins/processing_window"
55
- autoload :Restart, "rocket_job/plugins/restart"
56
- autoload :Retry, "rocket_job/plugins/retry"
57
- autoload :Singleton, "rocket_job/plugins/singleton"
58
- autoload :StateMachine, "rocket_job/plugins/state_machine"
59
- autoload :Transaction, "rocket_job/plugins/transaction"
63
+ autoload :Cron, "rocket_job/plugins/cron"
64
+ autoload :Document, "rocket_job/plugins/document"
65
+ autoload :ProcessingWindow, "rocket_job/plugins/processing_window"
66
+ autoload :Retry, "rocket_job/plugins/retry"
67
+ autoload :Singleton, "rocket_job/plugins/singleton"
68
+ autoload :StateMachine, "rocket_job/plugins/state_machine"
69
+ autoload :Transaction, "rocket_job/plugins/transaction"
70
+ autoload :ThrottleDependentJobs, "rocket_job/plugins/throttle_dependent_jobs"
60
71
  end
61
72
 
62
73
  module Jobs
63
- autoload :ActiveJob, "rocket_job/jobs/active_job"
64
- autoload :CopyFileJob, "rocket_job/jobs/copy_file_job"
65
- autoload :DirmonJob, "rocket_job/jobs/dirmon_job"
66
- autoload :OnDemandBatchJob, "rocket_job/jobs/on_demand_batch_job"
67
- autoload :OnDemandBatchTabularJob, "rocket_job/jobs/on_demand_batch_tabular_job"
68
- autoload :OnDemandJob, "rocket_job/jobs/on_demand_job"
69
- autoload :HousekeepingJob, "rocket_job/jobs/housekeeping_job"
70
- autoload :PerformanceJob, "rocket_job/jobs/performance_job"
71
- autoload :SimpleJob, "rocket_job/jobs/simple_job"
72
- autoload :UploadFileJob, "rocket_job/jobs/upload_file_job"
74
+ autoload :ActiveJob, "rocket_job/jobs/active_job"
75
+ autoload :ConversionJob, "rocket_job/jobs/conversion_job"
76
+ autoload :CopyFileJob, "rocket_job/jobs/copy_file_job"
77
+ autoload :DirmonJob, "rocket_job/jobs/dirmon_job"
78
+ autoload :HousekeepingJob, "rocket_job/jobs/housekeeping_job"
79
+ autoload :OnDemandBatchJob, "rocket_job/jobs/on_demand_batch_job"
80
+ autoload :OnDemandJob, "rocket_job/jobs/on_demand_job"
81
+ autoload :PerformanceJob, "rocket_job/jobs/performance_job"
82
+ autoload :SimpleJob, "rocket_job/jobs/simple_job"
83
+ autoload :UploadFileJob, "rocket_job/jobs/upload_file_job"
84
+
73
85
  module ReEncrypt
74
- if defined?(ActiveRecord) && defined?(SyncAttr)
75
- autoload :RelationalJob, "rocket_job/jobs/re_encrypt/relational_job"
76
- end
86
+ autoload :RelationalJob, "rocket_job/jobs/re_encrypt/relational_job"
77
87
  end
78
88
  end
79
89
 
80
90
  module Subscribers
81
- autoload :Logger, "rocket_job/subscribers/logger"
82
- autoload :Server, "rocket_job/subscribers/server"
83
- autoload :Worker, "rocket_job/subscribers/worker"
91
+ autoload :Logger, "rocket_job/subscribers/logger"
92
+ autoload :Server, "rocket_job/subscribers/server"
93
+ autoload :Worker, "rocket_job/subscribers/worker"
84
94
  end
85
95
  end
86
96
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rocketjob
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.4.1
4
+ version: 6.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Reid Morrison
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-12-09 00:00:00.000000000 Z
11
+ date: 2021-08-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: aasm
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '4.12'
19
+ version: '5.1'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '4.12'
26
+ version: '5.1'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: concurrent-ruby
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -39,75 +39,75 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '1.1'
41
41
  - !ruby/object:Gem::Dependency
42
- name: iostreams
42
+ name: fugit
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '1.2'
47
+ version: '1.4'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '1.2'
54
+ version: '1.4'
55
55
  - !ruby/object:Gem::Dependency
56
- name: mongoid
56
+ name: iostreams
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '7.0'
61
+ version: '1.9'
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '7.0'
68
+ version: '1.9'
69
69
  - !ruby/object:Gem::Dependency
70
- name: semantic_logger
70
+ name: mongoid
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '4.1'
75
+ version: '7.1'
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '4.1'
82
+ version: '7.1'
83
83
  - !ruby/object:Gem::Dependency
84
- name: symmetric-encryption
84
+ name: semantic_logger
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - ">="
87
+ - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: '4.0'
89
+ version: '4.7'
90
90
  type: :runtime
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - ">="
94
+ - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: '4.0'
96
+ version: '4.7'
97
97
  - !ruby/object:Gem::Dependency
98
- name: fugit
98
+ name: symmetric-encryption
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: '1.3'
103
+ version: '4.3'
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: '1.3'
110
+ version: '4.3'
111
111
  description:
112
112
  email:
113
113
  executables:
@@ -124,6 +124,7 @@ files:
124
124
  - lib/rocket_job/active_worker.rb
125
125
  - lib/rocket_job/batch.rb
126
126
  - lib/rocket_job/batch/callbacks.rb
127
+ - lib/rocket_job/batch/categories.rb
127
128
  - lib/rocket_job/batch/io.rb
128
129
  - lib/rocket_job/batch/logger.rb
129
130
  - lib/rocket_job/batch/lower_priority.rb
@@ -133,37 +134,40 @@ files:
133
134
  - lib/rocket_job/batch/results.rb
134
135
  - lib/rocket_job/batch/state_machine.rb
135
136
  - lib/rocket_job/batch/statistics.rb
136
- - lib/rocket_job/batch/tabular.rb
137
- - lib/rocket_job/batch/tabular/input.rb
138
- - lib/rocket_job/batch/tabular/output.rb
139
137
  - lib/rocket_job/batch/throttle.rb
140
138
  - lib/rocket_job/batch/throttle_running_workers.rb
141
139
  - lib/rocket_job/batch/throttle_windows.rb
142
140
  - lib/rocket_job/batch/worker.rb
141
+ - lib/rocket_job/category/base.rb
142
+ - lib/rocket_job/category/input.rb
143
+ - lib/rocket_job/category/output.rb
143
144
  - lib/rocket_job/cli.rb
144
145
  - lib/rocket_job/config.rb
145
146
  - lib/rocket_job/dirmon_entry.rb
146
147
  - lib/rocket_job/event.rb
148
+ - lib/rocket_job/extensions/iostreams/path.rb
147
149
  - lib/rocket_job/extensions/mongo/logging.rb
148
150
  - lib/rocket_job/extensions/mongoid/clients/options.rb
149
151
  - lib/rocket_job/extensions/mongoid/contextual/mongo.rb
150
152
  - lib/rocket_job/extensions/mongoid/factory.rb
151
- - lib/rocket_job/extensions/mongoid/remove_warnings.rb
153
+ - lib/rocket_job/extensions/mongoid/stringified_symbol.rb
154
+ - lib/rocket_job/extensions/psych/yaml_tree.rb
152
155
  - lib/rocket_job/extensions/rocket_job_adapter.rb
153
156
  - lib/rocket_job/heartbeat.rb
154
157
  - lib/rocket_job/job.rb
155
158
  - lib/rocket_job/job_exception.rb
156
159
  - lib/rocket_job/jobs/active_job.rb
160
+ - lib/rocket_job/jobs/conversion_job.rb
157
161
  - lib/rocket_job/jobs/copy_file_job.rb
158
162
  - lib/rocket_job/jobs/dirmon_job.rb
159
163
  - lib/rocket_job/jobs/housekeeping_job.rb
160
164
  - lib/rocket_job/jobs/on_demand_batch_job.rb
161
- - lib/rocket_job/jobs/on_demand_batch_tabular_job.rb
162
165
  - lib/rocket_job/jobs/on_demand_job.rb
163
166
  - lib/rocket_job/jobs/performance_job.rb
164
167
  - lib/rocket_job/jobs/re_encrypt/relational_job.rb
165
168
  - lib/rocket_job/jobs/simple_job.rb
166
169
  - lib/rocket_job/jobs/upload_file_job.rb
170
+ - lib/rocket_job/lookup_collection.rb
167
171
  - lib/rocket_job/performance.rb
168
172
  - lib/rocket_job/plugins/cron.rb
169
173
  - lib/rocket_job/plugins/document.rb
@@ -180,7 +184,9 @@ files:
180
184
  - lib/rocket_job/plugins/retry.rb
181
185
  - lib/rocket_job/plugins/singleton.rb
182
186
  - lib/rocket_job/plugins/state_machine.rb
187
+ - lib/rocket_job/plugins/throttle_dependent_jobs.rb
183
188
  - lib/rocket_job/plugins/transaction.rb
189
+ - lib/rocket_job/ractor_worker.rb
184
190
  - lib/rocket_job/railtie.rb
185
191
  - lib/rocket_job/rocket_job.rb
186
192
  - lib/rocket_job/server.rb
@@ -189,6 +195,7 @@ files:
189
195
  - lib/rocket_job/sliced.rb
190
196
  - lib/rocket_job/sliced/bzip2_output_slice.rb
191
197
  - lib/rocket_job/sliced/compressed_slice.rb
198
+ - lib/rocket_job/sliced/encrypted_bzip2_output_slice.rb
192
199
  - lib/rocket_job/sliced/encrypted_slice.rb
193
200
  - lib/rocket_job/sliced/input.rb
194
201
  - lib/rocket_job/sliced/output.rb
@@ -202,13 +209,14 @@ files:
202
209
  - lib/rocket_job/subscribers/worker.rb
203
210
  - lib/rocket_job/supervisor.rb
204
211
  - lib/rocket_job/supervisor/shutdown.rb
212
+ - lib/rocket_job/thread_worker.rb
205
213
  - lib/rocket_job/throttle_definition.rb
206
214
  - lib/rocket_job/throttle_definitions.rb
207
215
  - lib/rocket_job/version.rb
208
216
  - lib/rocket_job/worker.rb
209
217
  - lib/rocket_job/worker_pool.rb
210
218
  - lib/rocketjob.rb
211
- homepage: http://rocketjob.io
219
+ homepage: https://rocketjob.io
212
220
  licenses:
213
221
  - Apache-2.0
214
222
  metadata: {}
@@ -227,7 +235,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
227
235
  - !ruby/object:Gem::Version
228
236
  version: '0'
229
237
  requirements: []
230
- rubygems_version: 3.0.8
238
+ rubygems_version: 3.2.22
231
239
  signing_key:
232
240
  specification_version: 4
233
241
  summary: Ruby's missing batch processing system.
@@ -1,131 +0,0 @@
1
- require "active_support/concern"
2
-
3
- module RocketJob
4
- module Batch
5
- class Tabular
6
- # For the simple case where all `input_categories` have the same format,
7
- # If multiple input categories are used with different formats, then use IOStreams::Tabular directly
8
- # instead of this plugin.
9
- module Input
10
- extend ActiveSupport::Concern
11
-
12
- included do
13
- field :tabular_input_header, type: Array, class_attribute: true, user_editable: true
14
- field :tabular_input_format, type: Symbol, default: :csv, class_attribute: true, user_editable: true
15
- field :tabular_input_options, type: Hash, class_attribute: true
16
-
17
- # tabular_input_mode: [:line | :array | :hash]
18
- # :line
19
- # Uploads the file a line (String) at a time for processing by workers.
20
- # :array
21
- # Parses each line from the file as an Array and uploads each array for processing by workers.
22
- # :hash
23
- # Parses each line from the file into a Hash and uploads each hash for processing by workers.
24
- # See IOStreams#each.
25
- field :tabular_input_mode, type: Symbol, default: :line, class_attribute: true, user_editable: true, copy_on_restart: true
26
-
27
- validates_inclusion_of :tabular_input_format, in: IOStreams::Tabular.registered_formats
28
- validates_inclusion_of :tabular_input_mode, in: %i[line array hash row record]
29
- validate :tabular_input_header_present
30
-
31
- class_attribute :tabular_input_white_list
32
- class_attribute :tabular_input_required
33
- class_attribute :tabular_input_skip_unknown
34
-
35
- # Cleanse all uploaded data by removing non-printable characters
36
- # and any characters that cannot be converted to UTF-8
37
- class_attribute :tabular_input_type
38
-
39
- self.tabular_input_white_list = nil
40
- self.tabular_input_required = nil
41
- self.tabular_input_skip_unknown = true
42
- self.tabular_input_type = :text
43
-
44
- before_perform :tabular_input_render
45
- end
46
-
47
- # Extract the header line during the upload.
48
- #
49
- # Overrides: RocketJob::Batch::IO#upload
50
- #
51
- # Notes:
52
- # - When supplying a block the header must be set manually
53
- def upload(stream = nil, **args, &block)
54
- input_stream = stream.nil? ? nil : IOStreams.new(stream)
55
-
56
- if stream && (tabular_input_type == :text)
57
- # Cannot change the length of fixed width lines
58
- replace = tabular_input_format == :fixed ? " " : ""
59
- input_stream.option_or_stream(:encode, encoding: "UTF-8", cleaner: :printable, replace: replace)
60
- end
61
-
62
- # If an input header is not required, then we don't extract it'
63
- return super(input_stream, stream_mode: tabular_input_mode, **args, &block) unless tabular_input.header?
64
-
65
- # If the header is already set then it is not expected in the file
66
- if tabular_input_header.present?
67
- tabular_input_cleanse_header
68
- return super(input_stream, stream_mode: tabular_input_mode, **args, &block)
69
- end
70
-
71
- case tabular_input_mode
72
- when :line
73
- parse_header = lambda do |line|
74
- tabular_input.parse_header(line)
75
- tabular_input_cleanse_header
76
- self.tabular_input_header = tabular_input.header.columns
77
- end
78
- super(input_stream, on_first: parse_header, stream_mode: :line, **args, &block)
79
- when :array, :row
80
- set_header = lambda do |row|
81
- tabular_input.header.columns = row
82
- tabular_input_cleanse_header
83
- self.tabular_input_header = tabular_input.header.columns
84
- end
85
- super(input_stream, on_first: set_header, stream_mode: :array, **args, &block)
86
- when :hash, :record
87
- super(input_stream, stream_mode: :hash, **args, &block)
88
- else
89
- raise(ArgumentError, "Invalid tabular_input_mode: #{stream_mode.inspect}")
90
- end
91
- end
92
-
93
- private
94
-
95
- # Shared instance used for this slice, by a single worker (thread)
96
- def tabular_input
97
- @tabular_input ||= IOStreams::Tabular.new(
98
- columns: tabular_input_header,
99
- allowed_columns: tabular_input_white_list,
100
- required_columns: tabular_input_required,
101
- skip_unknown: tabular_input_skip_unknown,
102
- format: tabular_input_format,
103
- format_options: tabular_input_options&.deep_symbolize_keys
104
- )
105
- end
106
-
107
- def tabular_input_render
108
- return if tabular_input_header.blank? && tabular_input.header?
109
-
110
- @rocket_job_input = tabular_input.record_parse(@rocket_job_input)
111
- end
112
-
113
- # Cleanse custom input header if supplied.
114
- def tabular_input_cleanse_header
115
- ignored_columns = tabular_input.header.cleanse!
116
- logger.warn("Stripped out invalid columns from custom header", ignored_columns) unless ignored_columns.empty?
117
-
118
- self.tabular_input_header = tabular_input.header.columns
119
- end
120
-
121
- def tabular_input_header_present
122
- if tabular_input_header.present? || !tabular_input.header? || (tabular_input_mode == :hash || tabular_input_mode == :record)
123
- return
124
- end
125
-
126
- errors.add(:tabular_input_header, "is required when tabular_input_format is #{tabular_input_format.inspect}")
127
- end
128
- end
129
- end
130
- end
131
- end