streamingly 0.2.0 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/streamingly/reducer.rb +7 -5
- data/lib/streamingly/serde.rb +5 -2
- data/lib/streamingly/version.rb +1 -1
- data/spec/streamingly/serde_spec.rb +40 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dee9c9df4012d58362750ed8c7c9d1562f4f927f
|
4
|
+
data.tar.gz: 3572f514fc6650de3a820e2a5728a5fcac9bc584
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ccc461ab15a8bbdea0db4faf2bf668b574e9c926c8a65bbb5672682ac4e4f80fcb1d92c75c0d9b993c8c20a985ea6c5683fc7f541bd13562a159093f6d4ec974
|
7
|
+
data.tar.gz: 01b0db0a463137f8f24e4320448ae75770879eb9ef180c1ef4213297120c14cc4c79ae11b40ff19d7d38aba40e36938f0c20d6a0d08b9902e70a6eccbc3275d9
|
data/lib/streamingly/reducer.rb
CHANGED
@@ -23,7 +23,7 @@ module Streamingly
|
|
23
23
|
private
|
24
24
|
|
25
25
|
def flush
|
26
|
-
@accumulator ? @accumulator.flush : []
|
26
|
+
@accumulator ? (@accumulator.flush || []).compact : []
|
27
27
|
rescue StandardError => error
|
28
28
|
on_error(error, {})
|
29
29
|
[]
|
@@ -33,8 +33,10 @@ module Streamingly
|
|
33
33
|
# Streaming Hadoop only treats the first tab as the delimiter between
|
34
34
|
# the key and value. Additional tabs are grouped into the value:
|
35
35
|
# http://hadoop.apache.org/docs/r0.18.3/streaming.html#How+Does+Streaming+Work
|
36
|
-
key, value = line.split("\t", 2)
|
36
|
+
key, value = (line || '').split("\t", 2)
|
37
|
+
return [] unless key && value
|
37
38
|
|
39
|
+
results = nil
|
38
40
|
if @prev_key != key
|
39
41
|
results = flush
|
40
42
|
|
@@ -42,16 +44,16 @@ module Streamingly
|
|
42
44
|
@accumulator = new_accumulator(key)
|
43
45
|
end
|
44
46
|
|
45
|
-
@accumulator.apply_value(value)
|
47
|
+
@accumulator.apply_value(value) if @accumulator
|
46
48
|
|
47
49
|
results || []
|
48
50
|
rescue StandardError => error
|
49
|
-
on_error(error,
|
51
|
+
on_error(error, line: line)
|
50
52
|
[]
|
51
53
|
end
|
52
54
|
|
53
55
|
def on_error(error, error_context)
|
54
|
-
raise error unless @error_callback_defined
|
56
|
+
raise error unless @error_callback_defined && !@accumulator.nil?
|
55
57
|
@accumulator.on_error(error, error_context)
|
56
58
|
end
|
57
59
|
|
data/lib/streamingly/serde.rb
CHANGED
@@ -34,6 +34,7 @@ module Streamingly
|
|
34
34
|
|
35
35
|
def self.from_tabbed_csv(string)
|
36
36
|
k, v = string.split("\t", 2)
|
37
|
+
return if k.nil? || v.nil?
|
37
38
|
key = from_string_or_csv(k)
|
38
39
|
value = if v.include? "\t"
|
39
40
|
from_tabbed_csv(v)
|
@@ -44,11 +45,13 @@ module Streamingly
|
|
44
45
|
end
|
45
46
|
|
46
47
|
def self.from_string_or_csv(string)
|
47
|
-
if string.include? ','
|
48
|
-
from_csv(string)
|
48
|
+
if string.include? ',' # Likely a CSV
|
49
|
+
from_csv(string) # Attempt to parse
|
49
50
|
else
|
50
51
|
string
|
51
52
|
end
|
53
|
+
rescue CSV::MalformedCSVError # Not actually CSV, fallback to string
|
54
|
+
string
|
52
55
|
end
|
53
56
|
|
54
57
|
def self.resolve_class(class_name)
|
data/lib/streamingly/version.rb
CHANGED
@@ -28,4 +28,43 @@ describe Streamingly::SerDe do
|
|
28
28
|
end
|
29
29
|
end
|
30
30
|
|
31
|
-
|
31
|
+
describe '.to_csv' do
|
32
|
+
it 'is identity function for a string' do
|
33
|
+
record = 'test_string'
|
34
|
+
expect(described_class.to_csv(record)).to eq record
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'is equal to string version of Streamingly kv' do
|
38
|
+
record = Streamingly::KV.new('key', 'value')
|
39
|
+
expect(described_class.to_csv(record)).to eq record.to_s
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'serializes struct to CSV, interpreting decimal fields as floats' do
|
43
|
+
Record = Struct.new(:number, :string)
|
44
|
+
record = Record.new(1, 'string_value')
|
45
|
+
expect(described_class.to_csv(record)).to eq 'Record,1,string_value'
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
describe '.from_string_or_csv' do
|
50
|
+
it 'returns CSV serialization for CSV string' do
|
51
|
+
expect(described_class.from_string_or_csv('1,2')).to eq ['1', '2']
|
52
|
+
end
|
53
|
+
|
54
|
+
it 'returns string if not containing a comma' do
|
55
|
+
expect(described_class.from_string_or_csv('foo')).to eq 'foo'
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'returns string if containing a comma but not valid CSV' do
|
59
|
+
expect(described_class.from_string_or_csv('"foo,bar')).to eq '"foo,bar'
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
describe '.from_tabbed_csv' do
|
64
|
+
it 'returns nested KV pair structure with the first tab as the split' do
|
65
|
+
expect(described_class.from_tabbed_csv("a\tb\tc")).to eq(
|
66
|
+
Streamingly::KV.new('a', Streamingly::KV.new('b', 'c'))
|
67
|
+
)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: streamingly
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matt Gillooly
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-06-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|