rocketjob 4.3.0.beta → 4.3.0.beta2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rocket_job/batch/io.rb +10 -49
- data/lib/rocket_job/batch/tabular/input.rb +10 -10
- data/lib/rocket_job/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b5544569821640636a5dc8cd7953f2c0fbadcfa90ec1c7ac029fa4fc249d1269
|
4
|
+
data.tar.gz: 601088d6918f605b80565d2185872b161e1ccf27d923bd6483fe49ac8d5b2586
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9eda8d1c26f5e808f5ef2c1157c032ed490daaf697040a8987951c5ad25f979783796567b536c897317445b413f774fddcbd82ed3898a4578e3de4ece7a91ed7
|
7
|
+
data.tar.gz: 21744b1a7fad3e03c49367888e4662d0d00f60c671829c54cd8c7174ead64a8b9ce0642c61ae37c063508d5df3c0a7280a78e2bb1f752ac3fcd8e57343bf4a99
|
data/lib/rocket_job/batch/io.rb
CHANGED
@@ -67,32 +67,6 @@ module RocketJob
|
|
67
67
|
# Parses each line from the file into a Hash and uploads each hash for processing by workers.
|
68
68
|
# See IOStreams::Stream#each_line, IOStreams::Stream#each_row, and IOStreams::Stream#each_record.
|
69
69
|
#
|
70
|
-
# encoding: [String|Encoding]
|
71
|
-
# Encode returned data with this encoding.
|
72
|
-
# 'US-ASCII': Original 7 bit ASCII Format
|
73
|
-
# 'ASCII-8BIT': 8-bit ASCII Format
|
74
|
-
# 'UTF-8': UTF-8 Format
|
75
|
-
# Etc.
|
76
|
-
# Default: 'UTF-8'
|
77
|
-
# NOTE: If a IOStreams::Path, or IOStreams::Stream was supplied then the encoding will be set
|
78
|
-
# if not already set in the supplied stream.
|
79
|
-
#
|
80
|
-
# encode_replace: [String]
|
81
|
-
# The character to replace with when a character cannot be converted to the target encoding.
|
82
|
-
# nil: Don't replace any invalid characters. Encoding::UndefinedConversionError is raised.
|
83
|
-
# Default: nil
|
84
|
-
# NOTE: If a IOStreams::Path, or IOStreams::Stream was supplied then the encoding will be set
|
85
|
-
# if not already set in the supplied stream.
|
86
|
-
#
|
87
|
-
# encode_cleaner: [nil|symbol|Proc]
|
88
|
-
# Cleanse data read from the input stream.
|
89
|
-
# nil: No cleansing
|
90
|
-
# :printable Cleanse all non-printable characters except \r and \n
|
91
|
-
# Proc/lambda Proc to call after every read to cleanse the data
|
92
|
-
# Default: :printable
|
93
|
-
# NOTE: If a IOStreams::Path, or IOStreams::Stream was supplied then the encoding will be set
|
94
|
-
# if not already set in the supplied stream.
|
95
|
-
#
|
96
70
|
# Example:
|
97
71
|
# # Load plain text records from a file
|
98
72
|
# job.upload('hello.csv')
|
@@ -100,7 +74,8 @@ module RocketJob
|
|
100
74
|
# Example:
|
101
75
|
# # Load plain text records from a file, stripping all non-printable characters,
|
102
76
|
# # as well as any characters that cannot be converted to UTF-8
|
103
|
-
#
|
77
|
+
# path = IOStreams.path('hello.csv').option(:encode, cleaner: :printable, replace: '')
|
78
|
+
# job.upload(path)
|
104
79
|
#
|
105
80
|
# Example: Zip
|
106
81
|
# # Since csv is not known to RocketJob it is ignored
|
@@ -140,15 +115,15 @@ module RocketJob
|
|
140
115
|
# * If an io stream is supplied, it is read until it returns nil.
|
141
116
|
# * Only use this method for UTF-8 data, for binary data use #input_slice or #input_records.
|
142
117
|
# * CSV parsing is slow, so it is usually left for the workers to do.
|
143
|
-
def upload(stream = nil, file_name: nil, category: :main,
|
118
|
+
def upload(stream = nil, file_name: nil, category: :main, stream_mode: :line, on_first: nil, **args, &block)
|
144
119
|
raise(ArgumentError, 'Either stream, or a block must be supplied') unless stream || block
|
145
120
|
|
146
121
|
count =
|
147
122
|
if block
|
148
123
|
input(category).upload(on_first: on_first, &block)
|
149
124
|
else
|
150
|
-
path
|
151
|
-
|
125
|
+
path = IOStreams.new(stream)
|
126
|
+
path.file_name = file_name if file_name
|
152
127
|
self.upload_file_name = path.file_name
|
153
128
|
input(category).upload(on_first: on_first) do |io|
|
154
129
|
path.public_send("each_#{stream_mode}".to_sym, **args) { |line| io << line }
|
@@ -372,16 +347,13 @@ module RocketJob
|
|
372
347
|
# Notes:
|
373
348
|
# - The records are returned in '_id' order. Usually this is the order in
|
374
349
|
# which the records were originally loaded.
|
375
|
-
def download(stream = nil, category: :main, header_line: nil,
|
350
|
+
def download(stream = nil, category: :main, header_line: nil, **args, &block)
|
376
351
|
raise "Cannot download incomplete job: #{id}. Currently in state: #{state}-#{sub_state}" if rocket_job_processing?
|
377
352
|
|
378
|
-
if block
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
path.line_writer(**args) do |io|
|
383
|
-
output(category).download(header_line: header_line) { |record| io << record }
|
384
|
-
end
|
353
|
+
return output(category).download(header_line: header_line, &block) if block
|
354
|
+
|
355
|
+
IOStreams.new(stream).line_writer(**args) do |io|
|
356
|
+
output(category).download(header_line: header_line) { |record| io << record }
|
385
357
|
end
|
386
358
|
end
|
387
359
|
|
@@ -417,17 +389,6 @@ module RocketJob
|
|
417
389
|
RocketJob::Sliced::Writer::Output.collect(self, input_slice) { |writer| writer << result }
|
418
390
|
end
|
419
391
|
end
|
420
|
-
|
421
|
-
private
|
422
|
-
|
423
|
-
def build_path(stream, file_name, encoding: nil, encode_cleaner: nil, encode_replace: nil)
|
424
|
-
path = IOStreams.new(stream)
|
425
|
-
path.file_name = file_name if file_name
|
426
|
-
if (encoding || encode_cleaner || encode_replace) && !path.setting(:encode)
|
427
|
-
path.option_or_stream(:encode, encoding: encoding, cleaner: encode_cleaner, replace: encode_replace)
|
428
|
-
end
|
429
|
-
path
|
430
|
-
end
|
431
392
|
end
|
432
393
|
end
|
433
394
|
end
|
@@ -49,20 +49,20 @@ module RocketJob
|
|
49
49
|
#
|
50
50
|
# Notes:
|
51
51
|
# - When supplying a block the header must be set manually
|
52
|
-
def upload(
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
52
|
+
def upload(stream = nil, **args, &block)
|
53
|
+
input_stream = stream.nil? ? nil : IOStreams.new(stream)
|
54
|
+
|
55
|
+
if stream && (tabular_input_type == :text)
|
56
|
+
input_stream.option_or_stream(:encode, encoding: 'UTF-8', cleaner: :printable, replace: '')
|
57
57
|
end
|
58
58
|
|
59
59
|
# If an input header is not required, then we don't extract it'
|
60
|
-
return super(
|
60
|
+
return super(input_stream, stream_mode: tabular_input_mode, **args, &block) unless tabular_input.header?
|
61
61
|
|
62
62
|
# If the header is already set then it is not expected in the file
|
63
63
|
if tabular_input_header.present?
|
64
64
|
tabular_input_cleanse_header
|
65
|
-
return super(
|
65
|
+
return super(input_stream, stream_mode: tabular_input_mode, **args, &block)
|
66
66
|
end
|
67
67
|
|
68
68
|
case tabular_input_mode
|
@@ -72,16 +72,16 @@ module RocketJob
|
|
72
72
|
tabular_input_cleanse_header
|
73
73
|
self.tabular_input_header = tabular_input.header.columns
|
74
74
|
end
|
75
|
-
super(
|
75
|
+
super(input_stream, on_first: parse_header, stream_mode: tabular_input_mode, **args, &block)
|
76
76
|
when :row
|
77
77
|
set_header = -> (row) do
|
78
78
|
tabular_input.header.columns = row
|
79
79
|
tabular_input_cleanse_header
|
80
80
|
self.tabular_input_header = tabular_input.header.columns
|
81
81
|
end
|
82
|
-
super(
|
82
|
+
super(input_stream, on_first: set_header, stream_mode: tabular_input_mode, **args, &block)
|
83
83
|
when :record
|
84
|
-
super(
|
84
|
+
super(input_stream, stream_mode: tabular_input_mode, **args, &block)
|
85
85
|
else
|
86
86
|
raise(ArgumentError, "Invalid tabular_input_mode: #{stream_mode.inspect}")
|
87
87
|
end
|
data/lib/rocket_job/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rocketjob
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.3.0.
|
4
|
+
version: 4.3.0.beta2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Reid Morrison
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-10-
|
11
|
+
date: 2019-10-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: aasm
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 1.0.0.
|
47
|
+
version: 1.0.0.beta2
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 1.0.0.
|
54
|
+
version: 1.0.0.beta2
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: mongoid
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|