rocketjob 4.3.0.beta → 4.3.0.beta2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rocket_job/batch/io.rb +10 -49
- data/lib/rocket_job/batch/tabular/input.rb +10 -10
- data/lib/rocket_job/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b5544569821640636a5dc8cd7953f2c0fbadcfa90ec1c7ac029fa4fc249d1269
|
4
|
+
data.tar.gz: 601088d6918f605b80565d2185872b161e1ccf27d923bd6483fe49ac8d5b2586
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9eda8d1c26f5e808f5ef2c1157c032ed490daaf697040a8987951c5ad25f979783796567b536c897317445b413f774fddcbd82ed3898a4578e3de4ece7a91ed7
|
7
|
+
data.tar.gz: 21744b1a7fad3e03c49367888e4662d0d00f60c671829c54cd8c7174ead64a8b9ce0642c61ae37c063508d5df3c0a7280a78e2bb1f752ac3fcd8e57343bf4a99
|
data/lib/rocket_job/batch/io.rb
CHANGED
@@ -67,32 +67,6 @@ module RocketJob
|
|
67
67
|
# Parses each line from the file into a Hash and uploads each hash for processing by workers.
|
68
68
|
# See IOStreams::Stream#each_line, IOStreams::Stream#each_row, and IOStreams::Stream#each_record.
|
69
69
|
#
|
70
|
-
# encoding: [String|Encoding]
|
71
|
-
# Encode returned data with this encoding.
|
72
|
-
# 'US-ASCII': Original 7 bit ASCII Format
|
73
|
-
# 'ASCII-8BIT': 8-bit ASCII Format
|
74
|
-
# 'UTF-8': UTF-8 Format
|
75
|
-
# Etc.
|
76
|
-
# Default: 'UTF-8'
|
77
|
-
# NOTE: If a IOStreams::Path, or IOStreams::Stream was supplied then the encoding will be set
|
78
|
-
# if not already set in the supplied stream.
|
79
|
-
#
|
80
|
-
# encode_replace: [String]
|
81
|
-
# The character to replace with when a character cannot be converted to the target encoding.
|
82
|
-
# nil: Don't replace any invalid characters. Encoding::UndefinedConversionError is raised.
|
83
|
-
# Default: nil
|
84
|
-
# NOTE: If a IOStreams::Path, or IOStreams::Stream was supplied then the encoding will be set
|
85
|
-
# if not already set in the supplied stream.
|
86
|
-
#
|
87
|
-
# encode_cleaner: [nil|symbol|Proc]
|
88
|
-
# Cleanse data read from the input stream.
|
89
|
-
# nil: No cleansing
|
90
|
-
# :printable Cleanse all non-printable characters except \r and \n
|
91
|
-
# Proc/lambda Proc to call after every read to cleanse the data
|
92
|
-
# Default: :printable
|
93
|
-
# NOTE: If a IOStreams::Path, or IOStreams::Stream was supplied then the encoding will be set
|
94
|
-
# if not already set in the supplied stream.
|
95
|
-
#
|
96
70
|
# Example:
|
97
71
|
# # Load plain text records from a file
|
98
72
|
# job.upload('hello.csv')
|
@@ -100,7 +74,8 @@ module RocketJob
|
|
100
74
|
# Example:
|
101
75
|
# # Load plain text records from a file, stripping all non-printable characters,
|
102
76
|
# # as well as any characters that cannot be converted to UTF-8
|
103
|
-
#
|
77
|
+
# path = IOStreams.path('hello.csv').option(:encode, cleaner: :printable, replace: '')
|
78
|
+
# job.upload(path)
|
104
79
|
#
|
105
80
|
# Example: Zip
|
106
81
|
# # Since csv is not known to RocketJob it is ignored
|
@@ -140,15 +115,15 @@ module RocketJob
|
|
140
115
|
# * If an io stream is supplied, it is read until it returns nil.
|
141
116
|
# * Only use this method for UTF-8 data, for binary data use #input_slice or #input_records.
|
142
117
|
# * CSV parsing is slow, so it is usually left for the workers to do.
|
143
|
-
def upload(stream = nil, file_name: nil, category: :main,
|
118
|
+
def upload(stream = nil, file_name: nil, category: :main, stream_mode: :line, on_first: nil, **args, &block)
|
144
119
|
raise(ArgumentError, 'Either stream, or a block must be supplied') unless stream || block
|
145
120
|
|
146
121
|
count =
|
147
122
|
if block
|
148
123
|
input(category).upload(on_first: on_first, &block)
|
149
124
|
else
|
150
|
-
path
|
151
|
-
|
125
|
+
path = IOStreams.new(stream)
|
126
|
+
path.file_name = file_name if file_name
|
152
127
|
self.upload_file_name = path.file_name
|
153
128
|
input(category).upload(on_first: on_first) do |io|
|
154
129
|
path.public_send("each_#{stream_mode}".to_sym, **args) { |line| io << line }
|
@@ -372,16 +347,13 @@ module RocketJob
|
|
372
347
|
# Notes:
|
373
348
|
# - The records are returned in '_id' order. Usually this is the order in
|
374
349
|
# which the records were originally loaded.
|
375
|
-
def download(stream = nil, category: :main, header_line: nil,
|
350
|
+
def download(stream = nil, category: :main, header_line: nil, **args, &block)
|
376
351
|
raise "Cannot download incomplete job: #{id}. Currently in state: #{state}-#{sub_state}" if rocket_job_processing?
|
377
352
|
|
378
|
-
if block
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
path.line_writer(**args) do |io|
|
383
|
-
output(category).download(header_line: header_line) { |record| io << record }
|
384
|
-
end
|
353
|
+
return output(category).download(header_line: header_line, &block) if block
|
354
|
+
|
355
|
+
IOStreams.new(stream).line_writer(**args) do |io|
|
356
|
+
output(category).download(header_line: header_line) { |record| io << record }
|
385
357
|
end
|
386
358
|
end
|
387
359
|
|
@@ -417,17 +389,6 @@ module RocketJob
|
|
417
389
|
RocketJob::Sliced::Writer::Output.collect(self, input_slice) { |writer| writer << result }
|
418
390
|
end
|
419
391
|
end
|
420
|
-
|
421
|
-
private
|
422
|
-
|
423
|
-
def build_path(stream, file_name, encoding: nil, encode_cleaner: nil, encode_replace: nil)
|
424
|
-
path = IOStreams.new(stream)
|
425
|
-
path.file_name = file_name if file_name
|
426
|
-
if (encoding || encode_cleaner || encode_replace) && !path.setting(:encode)
|
427
|
-
path.option_or_stream(:encode, encoding: encoding, cleaner: encode_cleaner, replace: encode_replace)
|
428
|
-
end
|
429
|
-
path
|
430
|
-
end
|
431
392
|
end
|
432
393
|
end
|
433
394
|
end
|
@@ -49,20 +49,20 @@ module RocketJob
|
|
49
49
|
#
|
50
50
|
# Notes:
|
51
51
|
# - When supplying a block the header must be set manually
|
52
|
-
def upload(
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
52
|
+
def upload(stream = nil, **args, &block)
|
53
|
+
input_stream = stream.nil? ? nil : IOStreams.new(stream)
|
54
|
+
|
55
|
+
if stream && (tabular_input_type == :text)
|
56
|
+
input_stream.option_or_stream(:encode, encoding: 'UTF-8', cleaner: :printable, replace: '')
|
57
57
|
end
|
58
58
|
|
59
59
|
# If an input header is not required, then we don't extract it'
|
60
|
-
return super(
|
60
|
+
return super(input_stream, stream_mode: tabular_input_mode, **args, &block) unless tabular_input.header?
|
61
61
|
|
62
62
|
# If the header is already set then it is not expected in the file
|
63
63
|
if tabular_input_header.present?
|
64
64
|
tabular_input_cleanse_header
|
65
|
-
return super(
|
65
|
+
return super(input_stream, stream_mode: tabular_input_mode, **args, &block)
|
66
66
|
end
|
67
67
|
|
68
68
|
case tabular_input_mode
|
@@ -72,16 +72,16 @@ module RocketJob
|
|
72
72
|
tabular_input_cleanse_header
|
73
73
|
self.tabular_input_header = tabular_input.header.columns
|
74
74
|
end
|
75
|
-
super(
|
75
|
+
super(input_stream, on_first: parse_header, stream_mode: tabular_input_mode, **args, &block)
|
76
76
|
when :row
|
77
77
|
set_header = -> (row) do
|
78
78
|
tabular_input.header.columns = row
|
79
79
|
tabular_input_cleanse_header
|
80
80
|
self.tabular_input_header = tabular_input.header.columns
|
81
81
|
end
|
82
|
-
super(
|
82
|
+
super(input_stream, on_first: set_header, stream_mode: tabular_input_mode, **args, &block)
|
83
83
|
when :record
|
84
|
-
super(
|
84
|
+
super(input_stream, stream_mode: tabular_input_mode, **args, &block)
|
85
85
|
else
|
86
86
|
raise(ArgumentError, "Invalid tabular_input_mode: #{stream_mode.inspect}")
|
87
87
|
end
|
data/lib/rocket_job/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rocketjob
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.3.0.
|
4
|
+
version: 4.3.0.beta2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Reid Morrison
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-10-
|
11
|
+
date: 2019-10-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: aasm
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 1.0.0.
|
47
|
+
version: 1.0.0.beta2
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 1.0.0.
|
54
|
+
version: 1.0.0.beta2
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: mongoid
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|