rocketjob 5.0.0 → 5.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 601a4455f5e51191440ce959b6325545d51fcaf531d179dfcd78a4ec016b44c6
4
- data.tar.gz: 7ea8fc1b5cc632461d01e608f5c93cb33e379e14138764fc9e424d15f9c07679
3
+ metadata.gz: 7614d289b64dc4300632b828fe2597532cf68894685b4c45944a48aa151fc21b
4
+ data.tar.gz: a9449dfd5902410a232963159a4a87ddda3c29709771c6a875eb7db07b120a3f
5
5
  SHA512:
6
- metadata.gz: 823280f2250aaa7e2bef010b8b7477f1db1d2f04631c9ce133b80545768e23cab98a14cbc597630988b69b02cfc115acf34a647f50f84fc35e61a708af4c2c09
7
- data.tar.gz: e562aa988c120459637cd11108a4401440faa674d1a3226a8c7e77a91ec31635935349803a9c17ace9b3603c14f8a97ec2e8196ba20a79bf518a429807152dce
6
+ metadata.gz: 0ab97f55bdd18969e69b0314281cd0d604aa8f08f5fc3897b1d1e52855727b72df74263643dd77721721b52e08ac048d576bd5516772e895dd8241ad3d6d65bc
7
+ data.tar.gz: 510c76a7e44de86eb57fac61fa7c894f88c1256daf97ced09f28e8c4db3a79d225f8114357e950cf44dbb081df280b6855b80d2bd11c1b1772b65ff77784b3be
@@ -16,10 +16,7 @@ module RocketJob
16
16
  def input(category = :main)
17
17
  raise "Category #{category.inspect}, must be registered in input_categories: #{input_categories.inspect}" unless input_categories.include?(category) || (category == :main)
18
18
 
19
- collection_name = "rocket_job.inputs.#{id}"
20
- collection_name << ".#{category}" unless category == :main
21
-
22
- (@inputs ||= {})[category] ||= RocketJob::Sliced::Input.new(collection_name: collection_name, slice_size: slice_size)
19
+ (@inputs ||= {})[category] ||= RocketJob::Sliced::Input.new(rocket_job_io_slice_arguments("inputs", category))
23
20
  end
24
21
 
25
22
  # Returns [RocketJob::Sliced::Output] output collection for holding output slices
@@ -33,10 +30,7 @@ module RocketJob
33
30
  def output(category = :main)
34
31
  raise "Category #{category.inspect}, must be registered in output_categories: #{output_categories.inspect}" unless output_categories.include?(category) || (category == :main)
35
32
 
36
- collection_name = "rocket_job.outputs.#{id}"
37
- collection_name << ".#{category}" unless category == :main
38
-
39
- (@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(collection_name: collection_name, slice_size: slice_size)
33
+ (@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(rocket_job_io_slice_arguments("outputs", category))
40
34
  end
41
35
 
42
36
  # Upload the supplied file, io, IOStreams::Path, or IOStreams::Stream.
@@ -394,6 +388,22 @@ module RocketJob
394
388
  RocketJob::Sliced::Writer::Output.collect(self, input_slice) { |writer| writer << result }
395
389
  end
396
390
  end
391
+
392
+ private
393
+
394
+ def rocket_job_io_slice_arguments(collection_type, category)
395
+ collection_name = "rocket_job.#{collection_type}.#{id}"
396
+ collection_name << ".#{category}" unless category == :main
397
+
398
+ args = {collection_name: collection_name, slice_size: slice_size}
399
+ if encrypt
400
+ args[:slice_class] = Sliced::EncryptedSlice
401
+ elsif compress
402
+ args[:slice_class] = Sliced::CompressedSlice
403
+ end
404
+ args
405
+ end
406
+
397
407
  end
398
408
  end
399
409
  end
@@ -41,6 +41,16 @@ module RocketJob
41
41
  # May or may not include the fully qualified path name.
42
42
  field :upload_file_name, type: String
43
43
 
44
+ # Compress uploaded records.
45
+ # The fields are not affected in any way, only the data stored in the
46
+ # records and results collections will compressed
47
+ field :compress, type: Boolean, default: false, class_attribute: true
48
+
49
+ # Encrypt uploaded records.
50
+ # The fields are not affected in any way, only the data stored in the
51
+ # records and results collections will be encrypted
52
+ field :encrypt, type: Boolean, default: false, class_attribute: true
53
+
44
54
  #
45
55
  # Values that jobs can also update during processing
46
56
  #
@@ -73,6 +83,16 @@ module RocketJob
73
83
  end
74
84
  end
75
85
 
86
+ # Returns [true|false] whether the slices for this job are encrypted
87
+ def encrypted?
88
+ encrypt == true
89
+ end
90
+
91
+ # Returns [true|false] whether the slices for this job are compressed
92
+ def compressed?
93
+ compress == true
94
+ end
95
+
76
96
  # Returns [Integer] percent of records completed so far
77
97
  # Returns 0 if the total record count has not yet been set
78
98
  def percent_complete
@@ -23,7 +23,7 @@ module RocketJob
23
23
 
24
24
  puts "Loading job with #{count} records/lines"
25
25
  args = {log_level: :warn, slice_size: slice_size}
26
- if defined?(::RocketJob::Enterprise)
26
+ if defined?(::RocketJob)
27
27
  args[:compress] = compress
28
28
  args[:encrypt] = encrypt
29
29
  end
@@ -80,9 +80,6 @@ module RocketJob
80
80
  logger.debug "Reading Mongo configuration from: #{config_file}"
81
81
  ::Mongoid.load!(config_file, environment)
82
82
 
83
- # Load Encryption configuration file if present
84
- return unless defined?(SymmetricEncryption)
85
-
86
83
  config_file =
87
84
  if encryption_file_name
88
85
  Pathname.new(encryption_file_name)
@@ -64,13 +64,13 @@ module RocketJob
64
64
  end
65
65
 
66
66
  def source_path
67
- source = IOStreams.path(source_url, **decrypt_args(source_args))
67
+ source = IOStreams.path(source_url, **decode_args(source_args))
68
68
  apply_streams(source, source_streams)
69
69
  source
70
70
  end
71
71
 
72
72
  def target_path
73
- target = IOStreams.path(target_url, **decrypt_args(target_args))
73
+ target = IOStreams.path(target_url, **decode_args(target_args))
74
74
  apply_streams(target, target_streams)
75
75
  target
76
76
  end
@@ -78,26 +78,29 @@ module RocketJob
78
78
  private
79
79
 
80
80
  def set_description
81
- self.description = "Copying to #{target_url}"
81
+ self.description ||= "Copying to #{target_url}"
82
82
  end
83
83
 
84
84
  def apply_streams(path, streams)
85
- streams.each_pair { |stream, args| path.stream(stream.to_sym, args.nil? ? {} : decrypt_args(args)) }
85
+ streams.each_pair { |stream, args| path.stream(stream.to_sym, args.nil? ? {} : decode_args(args)) }
86
86
  end
87
87
 
88
- def decrypt_args(args)
88
+ def decode_args(args)
89
89
  return args.symbolize_keys unless defined?(SymmetricEncryption)
90
90
 
91
- decrypted_args = {}
91
+ decoded_args = {}
92
92
  args.each_pair do |key, value|
93
- if key.to_s.start_with?("encrypted_")
94
- unencrypted_key = key.to_s.sub("encrypted_", "")
95
- decrypted_args[unencrypted_key.to_sym] = SymmetricEncryption.decrypt(value)
93
+ if key.to_s.start_with?("encrypted_") && defined?(SymmetricEncryption)
94
+ original_key = key.to_s.sub("encrypted_", "").to_sym
95
+ decoded_args[original_key] = SymmetricEncryption.decrypt(value)
96
+ elsif key.to_s.start_with?("secret_config_") && defined?(SecretConfig)
97
+ original_key = key.to_s.sub("secret_config_", "").to_sym
98
+ decoded_args[original_key] = SecretConfig.fetch(value)
96
99
  else
97
- decrypted_args[key.to_sym] = value
100
+ decoded_args[key.to_sym] = value
98
101
  end
99
102
  end
100
- decrypted_args
103
+ decoded_args
101
104
  end
102
105
  end
103
106
  end
@@ -0,0 +1,130 @@
1
+ begin
2
+ require 'active_record'
3
+ rescue LoadError
4
+ raise 'RocketJob::Jobs::ReEncrypt::RelationalJob uses ActiveRecord to obtain the database connection, please install the gem "activerecord".'
5
+ end
6
+
7
+ # Batch Worker to Re-encrypt all encrypted fields in MySQL that start with `encrytped_`.
8
+ #
9
+ # Run in Rails console:
10
+ # RocketJob::Jobs::ReEncrypt::RelationalJob.start
11
+ #
12
+ # Notes:
13
+ # * Uses table names directly since models can be removed over time and the data still needs to be re-encrypted.
14
+ # * This job will find any column in the database that starts with`encrypted_`.
15
+ # * This means that temporary or other tables not part of the application tables will also be processed.
16
+ # * Since it automatically finds and re-encrypts any column, new columns are handled without any manual intervention.
17
+ module RocketJob
18
+ module Jobs
19
+ module ReEncrypt
20
+ class RelationalJob < RocketJob::Job
21
+ include RocketJob::Batch
22
+
23
+ self.slice_size = 1000
24
+ self.priority = 30
25
+ self.destroy_on_complete = false
26
+ self.compress = true
27
+ self.throttle_running_jobs = 1
28
+ self.throttle_running_slices = 10
29
+
30
+ # Name of the table being re-encrypted
31
+ field :table_name, type: String
32
+
33
+ # Limit the number of records to re-encrypt in test environments
34
+ field :limit, type: Integer
35
+
36
+ validates_presence_of :table_name
37
+ before_batch :upload_records
38
+
39
+ # Returns [Hash] of table names with each entry being an array
40
+ # of columns that start with encrypted_
41
+ sync_cattr_reader :encrypted_columns do
42
+ h = {}
43
+ connection.tables.each do |table|
44
+ columns = connection.columns(table)
45
+ columns.each do |column|
46
+ if column.name.start_with?('encrypted_')
47
+ add_column = column.name
48
+ (h[table] ||= []) << add_column if add_column
49
+ end
50
+ end
51
+ end
52
+ h
53
+ end
54
+
55
+ # Re-encrypt all `encrypted_` columns in the relational database.
56
+ # Queues a Job for each table that needs re-encryption.
57
+ def self.start(**args)
58
+ encrypted_columns.keys.collect do |table|
59
+ create!(table_name: table, description: table, **args)
60
+ end
61
+ end
62
+
63
+ # Re-encrypt all encrypted columns for the named table.
64
+ # Does not use AR models since we do not have models for all tables.
65
+ def perform(range)
66
+ start_id, end_id = range
67
+
68
+ columns = self.class.encrypted_columns[table_name]
69
+ unless columns&.size&.positive?
70
+ logger.error "No columns for table: #{table_name} from #{start_id} to #{end_id}"
71
+ return
72
+ end
73
+
74
+ logger.info "Processing: #{table_name} from #{start_id} to #{end_id}"
75
+ sql = "select id, #{columns.join(',')} from #{quoted_table_name} where id >= #{start_id} and id <= #{end_id}"
76
+
77
+ # Use AR to fetch all the records
78
+ self.class.connection.select_rows(sql).each do |row|
79
+ row = row.unshift(nil)
80
+ index = 1
81
+ sql = "update #{quoted_table_name} set "
82
+ updates = []
83
+ columns.collect do |column|
84
+ index += 1
85
+ value = row[index]
86
+ # Prevent re-encryption
87
+ unless value.blank?
88
+ new_value = re_encrypt(value)
89
+ updates << "#{column} = \"#{new_value}\"" if new_value != value
90
+ end
91
+ end
92
+ if updates.size.positive?
93
+ sql << updates.join(', ')
94
+ sql << " where id=#{row[1]}"
95
+ logger.trace sql
96
+ self.class.connection.execute sql
97
+ else
98
+ logger.trace { "Skipping empty values #{table_name}:#{row[1]}" }
99
+ end
100
+ end
101
+ end
102
+
103
+ # Returns a database connection.
104
+ #
105
+ # Override this method to support other ways of obtaining a thread specific database connection.
106
+ def self.connection
107
+ ActiveRecord::Base.connection
108
+ end
109
+
110
+ private
111
+
112
+ def quoted_table_name
113
+ @quoted_table_name ||= self.class.connection.quote_table_name(table_name)
114
+ end
115
+
116
+ def re_encrypt(encrypted_value)
117
+ return encrypted_value if (encrypted_value == '') || encrypted_value.nil?
118
+ SymmetricEncryption.encrypt(SymmetricEncryption.decrypt(encrypted_value))
119
+ end
120
+
121
+ # Upload range to re-encrypt all rows in the specified table.
122
+ def upload_records
123
+ start_id = self.class.connection.select_value("select min(id) from #{quoted_table_name}").to_i
124
+ last_id = self.class.connection.select_value("select max(id) from #{quoted_table_name}").to_i
125
+ self.record_count = last_id.positive? ? (input.upload_integer_range_in_reverse_order(start_id, last_id) * slice_size) : 0
126
+ end
127
+ end
128
+ end
129
+ end
130
+ end
@@ -85,7 +85,7 @@ module RocketJob
85
85
  # @formatter:on
86
86
 
87
87
  # By default all jobs are not pausable / resumable
88
- class_attribute(:pausable)
88
+ class_attribute(:pausable, instance_predicate: false)
89
89
  self.pausable = false
90
90
 
91
91
  # Define a before and after callback method for each event
@@ -112,7 +112,7 @@ module RocketJob
112
112
  end
113
113
 
114
114
  # All regular jobs can be paused or resumed whilst queued.
115
- def self.pausable?
115
+ def pausable?
116
116
  queued? || paused? || pausable
117
117
  end
118
118
 
@@ -0,0 +1,29 @@
1
+ require 'zlib'
2
+ module RocketJob
3
+ module Sliced
4
+ # Compress the records within a slice
5
+ class CompressedSlice < ::RocketJob::Sliced::Slice
6
+ private
7
+
8
+ def parse_records
9
+ records = attributes.delete('records')
10
+
11
+ # Convert BSON::Binary to a string
12
+ binary_str = records.data
13
+
14
+ str = Zlib::Inflate.inflate(binary_str)
15
+ @records = Hash.from_bson(BSON::ByteBuffer.new(str))['r']
16
+ end
17
+
18
+ def serialize_records
19
+ return [] if @records.nil? || @records.empty?
20
+
21
+ # Convert slice of records into a single string
22
+ str = {'r' => records.to_a}.to_bson.to_s
23
+
24
+ data = Zlib::Deflate.deflate(str)
25
+ BSON::Binary.new(data)
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,35 @@
1
+ require 'symmetric-encryption'
2
+ module RocketJob
3
+ module Sliced
4
+ # Compress the records within a slice
5
+ class EncryptedSlice < ::RocketJob::Sliced::Slice
6
+ private
7
+
8
+ def parse_records
9
+ records = attributes.delete('records')
10
+
11
+ # Convert BSON::Binary to a string
12
+ binary_str = records.data
13
+
14
+ header = SymmetricEncryption::Header.new
15
+ header.parse(binary_str)
16
+ # Use the header that is present to decrypt the data, since its version could be different
17
+ str = header.cipher.binary_decrypt(binary_str, header: header)
18
+
19
+ @records = Hash.from_bson(BSON::ByteBuffer.new(str))['r']
20
+ end
21
+
22
+ def serialize_records
23
+ return [] if @records.nil? || @records.empty?
24
+
25
+ # Convert slice of records into a single string
26
+ str = {'r' => to_a}.to_bson.to_s
27
+
28
+ # Encrypt to binary without applying an encoding such as Base64
29
+ # Use a random_iv with each encryption for better security
30
+ data = SymmetricEncryption.cipher.binary_encrypt(str, random_iv: true, compress: true)
31
+ BSON::Binary.new(data)
32
+ end
33
+ end
34
+ end
35
+ end
@@ -68,8 +68,10 @@ module RocketJob
68
68
 
69
69
  args = (method(action).arity == 0) || parameters.nil? ? nil : parameters.symbolize_keys
70
70
  args ? public_send(action, **args) : public_send(action)
71
+ rescue ArgumentError => exc
72
+ logger.error("##{action}: Invalid Arguments. Resuming..", exc)
71
73
  rescue StandardError => exc
72
- logger.error('Exception calling subscriber. Resuming..', exc)
74
+ logger.error("##{action}: Exception caught. Resuming..", exc)
73
75
  end
74
76
 
75
77
  def process_event(name, action, parameters)
@@ -9,8 +9,8 @@ module RocketJob
9
9
  @supervisor = supervisor
10
10
  end
11
11
 
12
- def kill(server_id: nil, wait_timeout: 3)
13
- return unless my_server?(server_id)
12
+ def kill(server_id: nil, name: nil, wait_timeout: 3)
13
+ return unless my_server?(server_id, name)
14
14
 
15
15
  supervisor.synchronize do
16
16
  supervisor.worker_pool.stop
@@ -22,38 +22,38 @@ module RocketJob
22
22
  logger.info "Killed"
23
23
  end
24
24
 
25
- def pause(server_id: nil)
26
- return unless my_server?(server_id)
25
+ def pause(server_id: nil, name: nil)
26
+ return unless my_server?(server_id, name)
27
27
 
28
28
  supervisor.synchronize { supervisor.server.pause! if supervisor.server.may_pause? }
29
29
  Supervisor.event!
30
30
  logger.info "Paused"
31
31
  end
32
32
 
33
- def refresh(server_id: nil)
34
- return unless my_server?(server_id)
33
+ def refresh(server_id: nil, name: nil)
34
+ return unless my_server?(server_id, name)
35
35
 
36
36
  Supervisor.event!
37
37
  logger.info "Refreshed"
38
38
  end
39
39
 
40
- def resume(server_id: nil)
41
- return unless my_server?(server_id)
40
+ def resume(server_id: nil, name: nil)
41
+ return unless my_server?(server_id, name)
42
42
 
43
43
  supervisor.synchronize { supervisor.server.resume! if supervisor.server.may_resume? }
44
44
  Supervisor.event!
45
45
  logger.info "Resumed"
46
46
  end
47
47
 
48
- def stop(server_id: nil)
49
- return unless my_server?(server_id)
48
+ def stop(server_id: nil, name: nil)
49
+ return unless my_server?(server_id, name)
50
50
 
51
51
  Supervisor.shutdown!
52
52
  logger.info "Shutdown"
53
53
  end
54
54
 
55
- def thread_dump(server_id: nil)
56
- return unless my_server?(server_id)
55
+ def thread_dump(server_id: nil, name: nil)
56
+ return unless my_server?(server_id, name)
57
57
 
58
58
  logger.info "Thread dump"
59
59
  supervisor.worker_pool.log_backtraces
@@ -61,10 +61,11 @@ module RocketJob
61
61
 
62
62
  private
63
63
 
64
- def my_server?(server_id)
65
- return true if server_id.nil?
64
+ def my_server?(server_id, name)
65
+ return true if server_id.nil? && name.nil?
66
+ return true if supervisor.server.name == name
66
67
 
67
- server_id == supervisor.server.id
68
+ server_id.to_s == supervisor.server.id.to_s
68
69
  end
69
70
  end
70
71
  end
@@ -1,3 +1,3 @@
1
1
  module RocketJob
2
- VERSION = '5.0.0'.freeze
2
+ VERSION = '5.1.0'.freeze
3
3
  end
@@ -1,5 +1,6 @@
1
1
  require 'iostreams'
2
2
  require 'semantic_logger'
3
+ require 'symmetric-encryption'
3
4
  require 'mongoid'
4
5
  require 'rocket_job/extensions/mongo/logging'
5
6
  require 'rocket_job/version'
@@ -63,18 +64,22 @@ module RocketJob
63
64
  end
64
65
 
65
66
  module Jobs
66
- autoload :ActiveJob, 'rocket_job/jobs/active_job'
67
- autoload :CopyFileJob, 'rocket_job/jobs/copy_file_job'
68
- autoload :DirmonJob, 'rocket_job/jobs/dirmon_job'
69
- autoload :OnDemandBatchJob, 'rocket_job/jobs/on_demand_batch_job'
70
- autoload :OnDemandJob, 'rocket_job/jobs/on_demand_job'
71
- autoload :HousekeepingJob, 'rocket_job/jobs/housekeeping_job'
72
- autoload :PerformanceJob, 'rocket_job/jobs/performance_job'
73
- autoload :SimpleJob, 'rocket_job/jobs/simple_job'
74
- autoload :UploadFileJob, 'rocket_job/jobs/upload_file_job'
67
+ autoload :ActiveJob, 'rocket_job/jobs/active_job'
68
+ autoload :CopyFileJob, 'rocket_job/jobs/copy_file_job'
69
+ autoload :DirmonJob, 'rocket_job/jobs/dirmon_job'
70
+ autoload :OnDemandBatchJob, 'rocket_job/jobs/on_demand_batch_job'
71
+ autoload :OnDemandBatchTabularJob, 'rocket_job/jobs/on_demand_batch_tabular_job'
72
+ autoload :OnDemandJob, 'rocket_job/jobs/on_demand_job'
73
+ autoload :HousekeepingJob, 'rocket_job/jobs/housekeeping_job'
74
+ autoload :PerformanceJob, 'rocket_job/jobs/performance_job'
75
+ autoload :RelationalJob, 'rocket_job/jobs/re_encrypt/relational_job'
76
+ autoload :SimpleJob, 'rocket_job/jobs/simple_job'
77
+ autoload :UploadFileJob, 'rocket_job/jobs/upload_file_job'
75
78
  end
76
79
 
77
80
  module Sliced
81
+ autoload :CompressedSlice, 'rocket_job/sliced/compressed_slice'
82
+ autoload :EncryptedSlice, 'rocket_job/sliced/encrypted_slice'
78
83
  autoload :Input, 'rocket_job/sliced/input'
79
84
  autoload :Output, 'rocket_job/sliced/output'
80
85
  autoload :Slice, 'rocket_job/sliced/slice'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rocketjob
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.0.0
4
+ version: 5.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Reid Morrison
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-15 00:00:00.000000000 Z
11
+ date: 2020-02-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: aasm
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
82
  version: '4.1'
83
+ - !ruby/object:Gem::Dependency
84
+ name: symmetric-encryption
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '4.0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '4.0'
83
97
  description:
84
98
  email:
85
99
  - support@rocketjob.io
@@ -135,6 +149,7 @@ files:
135
149
  - lib/rocket_job/jobs/on_demand_batch_tabular_job.rb
136
150
  - lib/rocket_job/jobs/on_demand_job.rb
137
151
  - lib/rocket_job/jobs/performance_job.rb
152
+ - lib/rocket_job/jobs/re_encrypt/relational_job.rb
138
153
  - lib/rocket_job/jobs/simple_job.rb
139
154
  - lib/rocket_job/jobs/upload_file_job.rb
140
155
  - lib/rocket_job/performance.rb
@@ -161,6 +176,8 @@ files:
161
176
  - lib/rocket_job/server.rb
162
177
  - lib/rocket_job/server/model.rb
163
178
  - lib/rocket_job/server/state_machine.rb
179
+ - lib/rocket_job/sliced/compressed_slice.rb
180
+ - lib/rocket_job/sliced/encrypted_slice.rb
164
181
  - lib/rocket_job/sliced/input.rb
165
182
  - lib/rocket_job/sliced/output.rb
166
183
  - lib/rocket_job/sliced/slice.rb