streamingly 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3868d732dd4b4cae20b4e659401dfb3d30f2d1f8
4
- data.tar.gz: 4123fd67e1617b8d8f08e414989508760b7b1014
3
+ metadata.gz: dee9c9df4012d58362750ed8c7c9d1562f4f927f
4
+ data.tar.gz: 3572f514fc6650de3a820e2a5728a5fcac9bc584
5
5
  SHA512:
6
- metadata.gz: c56860e644409b403cee9670c6706360ee04e617c8875d7eebb6df7bbc87c397a6361cd401b5407caf057a7377b17f6d8ef296b92a90622f4da286ee188d834b
7
- data.tar.gz: b99332851d72cb7ec2363db6524985ac6dd7a12e51c8ac7d42955fb46c26fec771bc257ed866376e58e8da127f3c0c1a550b29f1b4d3b68ade217fc4b4da6083
6
+ metadata.gz: ccc461ab15a8bbdea0db4faf2bf668b574e9c926c8a65bbb5672682ac4e4f80fcb1d92c75c0d9b993c8c20a985ea6c5683fc7f541bd13562a159093f6d4ec974
7
+ data.tar.gz: 01b0db0a463137f8f24e4320448ae75770879eb9ef180c1ef4213297120c14cc4c79ae11b40ff19d7d38aba40e36938f0c20d6a0d08b9902e70a6eccbc3275d9
@@ -23,7 +23,7 @@ module Streamingly
23
23
  private
24
24
 
25
25
  def flush
26
- @accumulator ? @accumulator.flush : []
26
+ @accumulator ? (@accumulator.flush || []).compact : []
27
27
  rescue StandardError => error
28
28
  on_error(error, {})
29
29
  []
@@ -33,8 +33,10 @@ module Streamingly
33
33
  # Streaming Hadoop only treats the first tab as the delimiter between
34
34
  # the key and value. Additional tabs are grouped into the value:
35
35
  # http://hadoop.apache.org/docs/r0.18.3/streaming.html#How+Does+Streaming+Work
36
- key, value = line.split("\t", 2)
36
+ key, value = (line || '').split("\t", 2)
37
+ return [] unless key && value
37
38
 
39
+ results = nil
38
40
  if @prev_key != key
39
41
  results = flush
40
42
 
@@ -42,16 +44,16 @@ module Streamingly
42
44
  @accumulator = new_accumulator(key)
43
45
  end
44
46
 
45
- @accumulator.apply_value(value)
47
+ @accumulator.apply_value(value) if @accumulator
46
48
 
47
49
  results || []
48
50
  rescue StandardError => error
49
- on_error(error, { :line => line })
51
+ on_error(error, line: line)
50
52
  []
51
53
  end
52
54
 
53
55
  def on_error(error, error_context)
54
- raise error unless @error_callback_defined
56
+ raise error unless @error_callback_defined && !@accumulator.nil?
55
57
  @accumulator.on_error(error, error_context)
56
58
  end
57
59
 
@@ -34,6 +34,7 @@ module Streamingly
34
34
 
35
35
  def self.from_tabbed_csv(string)
36
36
  k, v = string.split("\t", 2)
37
+ return if k.nil? || v.nil?
37
38
  key = from_string_or_csv(k)
38
39
  value = if v.include? "\t"
39
40
  from_tabbed_csv(v)
@@ -44,11 +45,13 @@ module Streamingly
44
45
  end
45
46
 
46
47
  def self.from_string_or_csv(string)
47
- if string.include? ','
48
- from_csv(string)
48
+ if string.include? ',' # Likely a CSV
49
+ from_csv(string) # Attempt to parse
49
50
  else
50
51
  string
51
52
  end
53
+ rescue CSV::MalformedCSVError # Not actually CSV, fallback to string
54
+ string
52
55
  end
53
56
 
54
57
  def self.resolve_class(class_name)
@@ -1,3 +1,3 @@
1
1
  module Streamingly
2
- VERSION = "0.2.0"
2
+ VERSION = '0.2.2'
3
3
  end
@@ -28,4 +28,43 @@ describe Streamingly::SerDe do
28
28
  end
29
29
  end
30
30
 
31
- end
31
+ describe '.to_csv' do
32
+ it 'is identity function for a string' do
33
+ record = 'test_string'
34
+ expect(described_class.to_csv(record)).to eq record
35
+ end
36
+
37
+ it 'is equal to string version of Streamingly kv' do
38
+ record = Streamingly::KV.new('key', 'value')
39
+ expect(described_class.to_csv(record)).to eq record.to_s
40
+ end
41
+
42
+ it 'serializes struct to CSV, interpreting decimal fields as floats' do
43
+ Record = Struct.new(:number, :string)
44
+ record = Record.new(1, 'string_value')
45
+ expect(described_class.to_csv(record)).to eq 'Record,1,string_value'
46
+ end
47
+ end
48
+
49
+ describe '.from_string_or_csv' do
50
+ it 'returns CSV serialization for CSV string' do
51
+ expect(described_class.from_string_or_csv('1,2')).to eq ['1', '2']
52
+ end
53
+
54
+ it 'returns string if not containing a comma' do
55
+ expect(described_class.from_string_or_csv('foo')).to eq 'foo'
56
+ end
57
+
58
+ it 'returns string if containing a comma but not valid CSV' do
59
+ expect(described_class.from_string_or_csv('"foo,bar')).to eq '"foo,bar'
60
+ end
61
+ end
62
+
63
+ describe '.from_tabbed_csv' do
64
+ it 'returns nested KV pair structure with the first tab as the split' do
65
+ expect(described_class.from_tabbed_csv("a\tb\tc")).to eq(
66
+ Streamingly::KV.new('a', Streamingly::KV.new('b', 'c'))
67
+ )
68
+ end
69
+ end
70
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: streamingly
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Gillooly
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-12-31 00:00:00.000000000 Z
11
+ date: 2015-06-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler