streamingly 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/streamingly/reducer.rb +7 -5
- data/lib/streamingly/serde.rb +5 -2
- data/lib/streamingly/version.rb +1 -1
- data/spec/streamingly/serde_spec.rb +40 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dee9c9df4012d58362750ed8c7c9d1562f4f927f
|
4
|
+
data.tar.gz: 3572f514fc6650de3a820e2a5728a5fcac9bc584
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ccc461ab15a8bbdea0db4faf2bf668b574e9c926c8a65bbb5672682ac4e4f80fcb1d92c75c0d9b993c8c20a985ea6c5683fc7f541bd13562a159093f6d4ec974
|
7
|
+
data.tar.gz: 01b0db0a463137f8f24e4320448ae75770879eb9ef180c1ef4213297120c14cc4c79ae11b40ff19d7d38aba40e36938f0c20d6a0d08b9902e70a6eccbc3275d9
|
data/lib/streamingly/reducer.rb
CHANGED
@@ -23,7 +23,7 @@ module Streamingly
|
|
23
23
|
private
|
24
24
|
|
25
25
|
def flush
|
26
|
-
@accumulator ? @accumulator.flush : []
|
26
|
+
@accumulator ? (@accumulator.flush || []).compact : []
|
27
27
|
rescue StandardError => error
|
28
28
|
on_error(error, {})
|
29
29
|
[]
|
@@ -33,8 +33,10 @@ module Streamingly
|
|
33
33
|
# Streaming Hadoop only treats the first tab as the delimiter between
|
34
34
|
# the key and value. Additional tabs are grouped into the value:
|
35
35
|
# http://hadoop.apache.org/docs/r0.18.3/streaming.html#How+Does+Streaming+Work
|
36
|
-
key, value = line.split("\t", 2)
|
36
|
+
key, value = (line || '').split("\t", 2)
|
37
|
+
return [] unless key && value
|
37
38
|
|
39
|
+
results = nil
|
38
40
|
if @prev_key != key
|
39
41
|
results = flush
|
40
42
|
|
@@ -42,16 +44,16 @@ module Streamingly
|
|
42
44
|
@accumulator = new_accumulator(key)
|
43
45
|
end
|
44
46
|
|
45
|
-
@accumulator.apply_value(value)
|
47
|
+
@accumulator.apply_value(value) if @accumulator
|
46
48
|
|
47
49
|
results || []
|
48
50
|
rescue StandardError => error
|
49
|
-
on_error(error,
|
51
|
+
on_error(error, line: line)
|
50
52
|
[]
|
51
53
|
end
|
52
54
|
|
53
55
|
def on_error(error, error_context)
|
54
|
-
raise error unless @error_callback_defined
|
56
|
+
raise error unless @error_callback_defined && !@accumulator.nil?
|
55
57
|
@accumulator.on_error(error, error_context)
|
56
58
|
end
|
57
59
|
|
data/lib/streamingly/serde.rb
CHANGED
@@ -34,6 +34,7 @@ module Streamingly
|
|
34
34
|
|
35
35
|
def self.from_tabbed_csv(string)
|
36
36
|
k, v = string.split("\t", 2)
|
37
|
+
return if k.nil? || v.nil?
|
37
38
|
key = from_string_or_csv(k)
|
38
39
|
value = if v.include? "\t"
|
39
40
|
from_tabbed_csv(v)
|
@@ -44,11 +45,13 @@ module Streamingly
|
|
44
45
|
end
|
45
46
|
|
46
47
|
def self.from_string_or_csv(string)
|
47
|
-
if string.include? ','
|
48
|
-
from_csv(string)
|
48
|
+
if string.include? ',' # Likely a CSV
|
49
|
+
from_csv(string) # Attempt to parse
|
49
50
|
else
|
50
51
|
string
|
51
52
|
end
|
53
|
+
rescue CSV::MalformedCSVError # Not actually CSV, fallback to string
|
54
|
+
string
|
52
55
|
end
|
53
56
|
|
54
57
|
def self.resolve_class(class_name)
|
data/lib/streamingly/version.rb
CHANGED
@@ -28,4 +28,43 @@ describe Streamingly::SerDe do
|
|
28
28
|
end
|
29
29
|
end
|
30
30
|
|
31
|
-
|
31
|
+
describe '.to_csv' do
|
32
|
+
it 'is identity function for a string' do
|
33
|
+
record = 'test_string'
|
34
|
+
expect(described_class.to_csv(record)).to eq record
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'is equal to string version of Streamingly kv' do
|
38
|
+
record = Streamingly::KV.new('key', 'value')
|
39
|
+
expect(described_class.to_csv(record)).to eq record.to_s
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'serializes struct to CSV, interpreting decimal fields as floats' do
|
43
|
+
Record = Struct.new(:number, :string)
|
44
|
+
record = Record.new(1, 'string_value')
|
45
|
+
expect(described_class.to_csv(record)).to eq 'Record,1,string_value'
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
describe '.from_string_or_csv' do
|
50
|
+
it 'returns CSV serialization for CSV string' do
|
51
|
+
expect(described_class.from_string_or_csv('1,2')).to eq ['1', '2']
|
52
|
+
end
|
53
|
+
|
54
|
+
it 'returns string if not containing a comma' do
|
55
|
+
expect(described_class.from_string_or_csv('foo')).to eq 'foo'
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'returns string if containing a comma but not valid CSV' do
|
59
|
+
expect(described_class.from_string_or_csv('"foo,bar')).to eq '"foo,bar'
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
describe '.from_tabbed_csv' do
|
64
|
+
it 'returns nested KV pair structure with the first tab as the split' do
|
65
|
+
expect(described_class.from_tabbed_csv("a\tb\tc")).to eq(
|
66
|
+
Streamingly::KV.new('a', Streamingly::KV.new('b', 'c'))
|
67
|
+
)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: streamingly
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matt Gillooly
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-06-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|