rocketjob 5.3.3 → 5.4.0.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rocket_job/batch/io.rb +14 -19
- data/lib/rocket_job/batch/model.rb +2 -2
- data/lib/rocket_job/jobs/upload_file_job.rb +1 -1
- data/lib/rocket_job/sliced.rb +91 -0
- data/lib/rocket_job/sliced/bzip2_output_slice.rb +43 -0
- data/lib/rocket_job/sliced/input.rb +3 -3
- data/lib/rocket_job/sliced/slice.rb +6 -0
- data/lib/rocket_job/sliced/slices.rb +6 -0
- data/lib/rocket_job/version.rb +1 -1
- data/lib/rocketjob.rb +1 -19
- metadata +11 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 55ab6ca2b3f76cdb4ddf679a2d3e88e1d6a6f3106a69349129a267b629ef4a53
|
4
|
+
data.tar.gz: 4e0d07878fb4265179b4a270650cc9b89ca4bca55f1d8f9a3451cb3064062c35
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fdc7ae3827d987404f431410cb81161fbfa269dfe7575a4a86a6abf362c59c6acd0d2c7e9856273ced396eeaf20e50561f1be10a7de0d5c4ae45e5648d15d083
|
7
|
+
data.tar.gz: 7bb9e9ac90569e78e135293efcd357a0d75037b5417f5f530f18ae3f17f44acf28d7de5b4aeef880a0315117f2992623958a4fb4c9df93ca7273369ac052e759
|
data/lib/rocket_job/batch/io.rb
CHANGED
@@ -18,7 +18,7 @@ module RocketJob
|
|
18
18
|
raise "Category #{category.inspect}, must be registered in input_categories: #{input_categories.inspect}"
|
19
19
|
end
|
20
20
|
|
21
|
-
(@inputs ||= {})[category] ||= RocketJob::Sliced
|
21
|
+
(@inputs ||= {})[category] ||= RocketJob::Sliced.factory(:input, category, self)
|
22
22
|
end
|
23
23
|
|
24
24
|
# Returns [RocketJob::Sliced::Output] output collection for holding output slices
|
@@ -34,7 +34,7 @@ module RocketJob
|
|
34
34
|
raise "Category #{category.inspect}, must be registered in output_categories: #{output_categories.inspect}"
|
35
35
|
end
|
36
36
|
|
37
|
-
(@outputs ||= {})[category] ||= RocketJob::Sliced
|
37
|
+
(@outputs ||= {})[category] ||= RocketJob::Sliced.factory(:output, category, self)
|
38
38
|
end
|
39
39
|
|
40
40
|
# Upload the supplied file, io, IOStreams::Path, or IOStreams::Stream.
|
@@ -355,8 +355,18 @@ module RocketJob
|
|
355
355
|
|
356
356
|
return output(category).download(header_line: header_line, &block) if block
|
357
357
|
|
358
|
-
|
359
|
-
|
358
|
+
output_collection = output(category)
|
359
|
+
|
360
|
+
if output_collection.binary?
|
361
|
+
IOStreams.new(stream).stream(:none).writer(**args) do |io|
|
362
|
+
raise(ArgumenError, "A `header_line` is not supported with binary output collections") if header_line
|
363
|
+
|
364
|
+
output_collection.download { |record| io << record[:binary] }
|
365
|
+
end
|
366
|
+
else
|
367
|
+
IOStreams.new(stream).writer(:line, **args) do |io|
|
368
|
+
output_collection.download(header_line: header_line) { |record| io << record }
|
369
|
+
end
|
360
370
|
end
|
361
371
|
end
|
362
372
|
|
@@ -393,21 +403,6 @@ module RocketJob
|
|
393
403
|
RocketJob::Sliced::Writer::Output.collect(self, input_slice) { |writer| writer << result }
|
394
404
|
end
|
395
405
|
end
|
396
|
-
|
397
|
-
private
|
398
|
-
|
399
|
-
def rocket_job_io_slice_arguments(collection_type, category)
|
400
|
-
collection_name = "rocket_job.#{collection_type}.#{id}"
|
401
|
-
collection_name << ".#{category}" unless category == :main
|
402
|
-
|
403
|
-
args = {collection_name: collection_name, slice_size: slice_size}
|
404
|
-
if encrypt
|
405
|
-
args[:slice_class] = Sliced::EncryptedSlice
|
406
|
-
elsif compress
|
407
|
-
args[:slice_class] = Sliced::CompressedSlice
|
408
|
-
end
|
409
|
-
args
|
410
|
-
end
|
411
406
|
end
|
412
407
|
end
|
413
408
|
end
|
@@ -44,12 +44,12 @@ module RocketJob
|
|
44
44
|
# Compress uploaded records.
|
45
45
|
# The fields are not affected in any way, only the data stored in the
|
46
46
|
# records and results collections will compressed
|
47
|
-
field :compress, type:
|
47
|
+
field :compress, type: Object, default: false, class_attribute: true
|
48
48
|
|
49
49
|
# Encrypt uploaded records.
|
50
50
|
# The fields are not affected in any way, only the data stored in the
|
51
51
|
# records and results collections will be encrypted
|
52
|
-
field :encrypt, type:
|
52
|
+
field :encrypt, type: Object, default: false, class_attribute: true
|
53
53
|
|
54
54
|
#
|
55
55
|
# Values that jobs can also update during processing
|
@@ -0,0 +1,91 @@
|
|
1
|
+
module RocketJob
|
2
|
+
module Sliced
|
3
|
+
autoload :BZip2OutputSlice, "rocket_job/sliced/bzip2_output_slice"
|
4
|
+
autoload :CompressedSlice, "rocket_job/sliced/compressed_slice"
|
5
|
+
autoload :EncryptedSlice, "rocket_job/sliced/encrypted_slice"
|
6
|
+
autoload :Input, "rocket_job/sliced/input"
|
7
|
+
autoload :Output, "rocket_job/sliced/output"
|
8
|
+
autoload :Slice, "rocket_job/sliced/slice"
|
9
|
+
autoload :Slices, "rocket_job/sliced/slices"
|
10
|
+
autoload :Store, "rocket_job/sliced/store"
|
11
|
+
|
12
|
+
module Writer
|
13
|
+
autoload :Input, "rocket_job/sliced/writer/input"
|
14
|
+
autoload :Output, "rocket_job/sliced/writer/output"
|
15
|
+
end
|
16
|
+
|
17
|
+
# Returns [RocketJob::Sliced::Slices] for the relevant type and category.
|
18
|
+
#
|
19
|
+
# Supports compress and encrypt with [true|false|Hash] values.
|
20
|
+
# When [Hash] they must specify whether the apply to the input or output collection types.
|
21
|
+
#
|
22
|
+
# Example, compress both input and output collections:
|
23
|
+
# class MyJob < RocketJob::Job
|
24
|
+
# include RocketJob::Batch
|
25
|
+
# self.compress = true
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# Example, compress just the output collections:
|
29
|
+
# class MyJob < RocketJob::Job
|
30
|
+
# include RocketJob::Batch
|
31
|
+
# self.compress = {output: true}
|
32
|
+
# end
|
33
|
+
#
|
34
|
+
# To use the specialized BZip output compressor, and the regular compressor for the input collections:
|
35
|
+
# class MyJob < RocketJob::Job
|
36
|
+
# include RocketJob::Batch
|
37
|
+
# self.compress = {output: :bzip2, input: true}
|
38
|
+
# end
|
39
|
+
def self.factory(type, category, job)
|
40
|
+
raise(ArgumentError, "Unknown type: #{type.inspect}") unless %i[input output].include?(type)
|
41
|
+
|
42
|
+
collection_name = "rocket_job.#{type}s.#{job.id}"
|
43
|
+
collection_name << ".#{category}" unless category == :main
|
44
|
+
|
45
|
+
args = {collection_name: collection_name, slice_size: job.slice_size}
|
46
|
+
klass = slice_class(type, job)
|
47
|
+
args[:slice_class] = klass if klass
|
48
|
+
|
49
|
+
if type == :input
|
50
|
+
RocketJob::Sliced::Input.new(args)
|
51
|
+
else
|
52
|
+
RocketJob::Sliced::Output.new(args)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
# Parses the encrypt and compress options to determine which slice serializer to use.
|
59
|
+
# `encrypt` takes priority over any `compress` option.
|
60
|
+
def self.slice_class(type, job)
|
61
|
+
encrypt = extract_value(type, job.encrypt)
|
62
|
+
compress = extract_value(type, job.compress)
|
63
|
+
|
64
|
+
if encrypt
|
65
|
+
case encrypt
|
66
|
+
when true
|
67
|
+
EncryptedSlice
|
68
|
+
else
|
69
|
+
raise(ArgumentError, "Unknown job `encrypt` value: #{compress}") unless compress.is_a?(Slices)
|
70
|
+
# Returns the supplied class to use for encryption.
|
71
|
+
encrypt
|
72
|
+
end
|
73
|
+
elsif compress
|
74
|
+
case compress
|
75
|
+
when true
|
76
|
+
CompressedSlice
|
77
|
+
when :bzip2
|
78
|
+
BZip2OutputSlice
|
79
|
+
else
|
80
|
+
raise(ArgumentError, "Unknown job `compress` value: #{compress}") unless compress.is_a?(Slices)
|
81
|
+
# Returns the supplied class to use for compression.
|
82
|
+
compress
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def self.extract_value(type, value)
|
88
|
+
value.is_a?(Hash) ? value[type] : value
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module RocketJob
|
2
|
+
module Sliced
|
3
|
+
# This is a specialized output serializer that renders each output slice as a single BZip2 compressed stream.
|
4
|
+
# BZip2 allows multiple output streams to be written into a single BZip2 file.
|
5
|
+
#
|
6
|
+
# Notes:
|
7
|
+
# * The `bzip2` linux command line utility supports multiple embedded BZip2 stream,
|
8
|
+
# but some other custom implementations may not. They may only read the first slice and stop.
|
9
|
+
# * It is only designed for use on output collections.
|
10
|
+
#
|
11
|
+
# To download the output when using this slice:
|
12
|
+
#
|
13
|
+
# # Download the binary BZip2 streams into a single file
|
14
|
+
# IOStreams.path(output_file_name).stream(:none).writer do |io|
|
15
|
+
# job.download { |slice| io << slice[:binary] }
|
16
|
+
# end
|
17
|
+
class BZip2OutputSlice < ::RocketJob::Sliced::Slice
|
18
|
+
# This is a specialized binary slice for creating binary data from each slice
|
19
|
+
# that must be downloaded as-is into output files.
|
20
|
+
def self.binary?
|
21
|
+
true
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def parse_records
|
27
|
+
records = attributes.delete("records")
|
28
|
+
|
29
|
+
# Convert BSON::Binary to a string
|
30
|
+
@records = [{binary: records.data}]
|
31
|
+
end
|
32
|
+
|
33
|
+
def serialize_records
|
34
|
+
return [] if @records.nil? || @records.empty?
|
35
|
+
|
36
|
+
lines = records.to_a.join("\n")
|
37
|
+
s = StringIO.new
|
38
|
+
IOStreams::Bzip2::Writer.stream(s) { |io| io.write(lines) }
|
39
|
+
BSON::Binary.new(s.string)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -5,7 +5,7 @@ module RocketJob
|
|
5
5
|
# Create indexes before uploading
|
6
6
|
create_indexes
|
7
7
|
Writer::Input.collect(self, on_first: on_first, &block)
|
8
|
-
rescue
|
8
|
+
rescue Exception => e
|
9
9
|
drop
|
10
10
|
raise(e)
|
11
11
|
end
|
@@ -73,7 +73,7 @@ module RocketJob
|
|
73
73
|
count += 1
|
74
74
|
end
|
75
75
|
count
|
76
|
-
rescue
|
76
|
+
rescue Exception => e
|
77
77
|
drop
|
78
78
|
raise(e)
|
79
79
|
end
|
@@ -91,7 +91,7 @@ module RocketJob
|
|
91
91
|
count += 1
|
92
92
|
end
|
93
93
|
count
|
94
|
-
rescue
|
94
|
+
rescue Exception => e
|
95
95
|
drop
|
96
96
|
raise(e)
|
97
97
|
end
|
@@ -94,6 +94,12 @@ module RocketJob
|
|
94
94
|
end
|
95
95
|
end
|
96
96
|
|
97
|
+
# Returns whether this is a specialized binary slice for creating binary data from each slice
|
98
|
+
# that is then just downloaded as-is into output files.
|
99
|
+
def self.binary?
|
100
|
+
false
|
101
|
+
end
|
102
|
+
|
97
103
|
# `records` array has special handling so that it can be modified in place instead of having
|
98
104
|
# to replace the entire array every time. For example, when appending lines with `<<`.
|
99
105
|
def records
|
@@ -42,6 +42,12 @@ module RocketJob
|
|
42
42
|
slice
|
43
43
|
end
|
44
44
|
|
45
|
+
# Returns whether this collection contains specialized binary slices for creating binary data from each slice
|
46
|
+
# that is then just downloaded as-is into output files.
|
47
|
+
def binary?
|
48
|
+
slice_class.binary?
|
49
|
+
end
|
50
|
+
|
45
51
|
# Returns output slices in the order of their id
|
46
52
|
# which is usually the order in which they were written.
|
47
53
|
def each
|
data/lib/rocket_job/version.rb
CHANGED
data/lib/rocketjob.rb
CHANGED
@@ -29,6 +29,7 @@ module RocketJob
|
|
29
29
|
autoload :Worker, "rocket_job/worker"
|
30
30
|
autoload :Performance, "rocket_job/performance"
|
31
31
|
autoload :Server, "rocket_job/server"
|
32
|
+
autoload :Sliced, "rocket_job/sliced"
|
32
33
|
autoload :Subscriber, "rocket_job/subscriber"
|
33
34
|
autoload :Supervisor, "rocket_job/supervisor"
|
34
35
|
autoload :ThrottleDefinition, "rocket_job/throttle_definition"
|
@@ -48,10 +49,6 @@ module RocketJob
|
|
48
49
|
autoload :Transaction, "rocket_job/plugins/job/transaction"
|
49
50
|
autoload :Worker, "rocket_job/plugins/job/worker"
|
50
51
|
end
|
51
|
-
module Rufus
|
52
|
-
autoload :CronLine, "rocket_job/plugins/rufus/cron_line"
|
53
|
-
autoload :ZoTime, "rocket_job/plugins/rufus/zo_time"
|
54
|
-
end
|
55
52
|
autoload :Cron, "rocket_job/plugins/cron"
|
56
53
|
autoload :Document, "rocket_job/plugins/document"
|
57
54
|
autoload :ProcessingWindow, "rocket_job/plugins/processing_window"
|
@@ -80,21 +77,6 @@ module RocketJob
|
|
80
77
|
end
|
81
78
|
end
|
82
79
|
|
83
|
-
module Sliced
|
84
|
-
autoload :CompressedSlice, "rocket_job/sliced/compressed_slice"
|
85
|
-
autoload :EncryptedSlice, "rocket_job/sliced/encrypted_slice"
|
86
|
-
autoload :Input, "rocket_job/sliced/input"
|
87
|
-
autoload :Output, "rocket_job/sliced/output"
|
88
|
-
autoload :Slice, "rocket_job/sliced/slice"
|
89
|
-
autoload :Slices, "rocket_job/sliced/slices"
|
90
|
-
autoload :Store, "rocket_job/sliced/store"
|
91
|
-
|
92
|
-
module Writer
|
93
|
-
autoload :Input, "rocket_job/sliced/writer/input"
|
94
|
-
autoload :Output, "rocket_job/sliced/writer/output"
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
80
|
module Subscribers
|
99
81
|
autoload :Logger, "rocket_job/subscribers/logger"
|
100
82
|
autoload :Server, "rocket_job/subscribers/server"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rocketjob
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.4.0.beta1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Reid Morrison
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-10-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: aasm
|
@@ -108,8 +108,8 @@ dependencies:
|
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '1.3'
|
111
|
-
description:
|
112
|
-
email:
|
111
|
+
description:
|
112
|
+
email:
|
113
113
|
executables:
|
114
114
|
- rocketjob
|
115
115
|
- rocketjob_perf
|
@@ -186,6 +186,8 @@ files:
|
|
186
186
|
- lib/rocket_job/server.rb
|
187
187
|
- lib/rocket_job/server/model.rb
|
188
188
|
- lib/rocket_job/server/state_machine.rb
|
189
|
+
- lib/rocket_job/sliced.rb
|
190
|
+
- lib/rocket_job/sliced/bzip2_output_slice.rb
|
189
191
|
- lib/rocket_job/sliced/compressed_slice.rb
|
190
192
|
- lib/rocket_job/sliced/encrypted_slice.rb
|
191
193
|
- lib/rocket_job/sliced/input.rb
|
@@ -210,7 +212,7 @@ homepage: http://rocketjob.io
|
|
210
212
|
licenses:
|
211
213
|
- Apache-2.0
|
212
214
|
metadata: {}
|
213
|
-
post_install_message:
|
215
|
+
post_install_message:
|
214
216
|
rdoc_options: []
|
215
217
|
require_paths:
|
216
218
|
- lib
|
@@ -221,12 +223,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
221
223
|
version: '2.3'
|
222
224
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
223
225
|
requirements:
|
224
|
-
- - "
|
226
|
+
- - ">"
|
225
227
|
- !ruby/object:Gem::Version
|
226
|
-
version:
|
228
|
+
version: 1.3.1
|
227
229
|
requirements: []
|
228
230
|
rubygems_version: 3.0.8
|
229
|
-
signing_key:
|
231
|
+
signing_key:
|
230
232
|
specification_version: 4
|
231
233
|
summary: Ruby's missing batch processing system.
|
232
234
|
test_files: []
|