nebulous 0.0.2 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e16fcc8e895cd4078d1ea39ab8553ee3246cd5fa
4
- data.tar.gz: 46b62d5133aff6cea1cec0370af683834f16fe28
3
+ metadata.gz: 15734d97663b72356a0c85a85a07e5bddc191a0f
4
+ data.tar.gz: a2fdc1e48d2835bc8a6e509733cbbb5a09ed9bdd
5
5
  SHA512:
6
- metadata.gz: 7dd3e249cb5b64e7477c85bd0e34f44750b14d02ae0fafaeb6a33e93decab821c6798b66bfccfd87a10811c1115a008a94f4786c34ab1e72fd87c448aeeab705
7
- data.tar.gz: 8791db72af0fd20b251382fd88982abd0af2a60c6bee83890efdfc8ac68576789783c9a6c1395ebf4c234a14f3db8f3a5dfe6d6d697b2c2d24e34aa0b82ab0b6
6
+ metadata.gz: d98c33418a5c0c497027d57491e8e3698357bd35762b9c11efe67e8d97dce7458693b4129d52516d155e498ff3023b825c6c04e224e679e5f681fa17886dfd3f
7
+ data.tar.gz: 43845b83b364c806fa803affe6b5c8981af2286a68e0ca157d321c63328b0c27ddfb4e7d4f451ba82e5bb5dff7ba617513282c1ef8aa1258f9b106c2bd885862
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- nebulous (0.0.1)
4
+ nebulous (0.0.3)
5
5
  activesupport
6
6
  cocaine (~> 0.5)
7
7
 
data/README.md CHANGED
@@ -18,7 +18,7 @@ Or install it yourself as:
18
18
 
19
19
  ## Usage
20
20
 
21
- TBD
21
+
22
22
 
23
23
  ## Contributing
24
24
 
@@ -3,10 +3,14 @@ require 'ostruct'
3
3
  require 'cocaine'
4
4
  require 'active_support/all'
5
5
  require 'nebulous/version'
6
- require 'nebulous/parser'
6
+ require 'nebulous/delimiter_detector'
7
7
  require 'nebulous/row'
8
8
  require 'nebulous/chunk'
9
- require 'nebulous/delimiter_detector'
9
+ require 'nebulous/input'
10
+ require 'nebulous/input/reader'
11
+ require 'nebulous/input/parsing'
12
+ require 'nebulous/input/delimiters'
13
+ require 'nebulous/parser'
10
14
 
11
15
  module Nebulous
12
16
  def self.process(file, *args, &block)
@@ -0,0 +1,4 @@
1
+ module Nebulous
2
+ module Input
3
+ end
4
+ end
@@ -0,0 +1,16 @@
1
+ module Nebulous
2
+ module Input
3
+ module Delimiters
4
+ def delimiters
5
+ @delimiters ||= Nebulous::DelimiterDetector.new(file.path).detect
6
+ end
7
+
8
+ private
9
+
10
+ def merge_delimiters
11
+ options.row_sep ||= delimiters[:row_sep]
12
+ options.col_sep ||= delimiters[:col_sep]
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,56 @@
1
+ module Nebulous
2
+ module Input
3
+ module Parsing
4
+ def parse_row
5
+ sequence
6
+ Row.parse(read_complete_line, options).to_numeric.merge(@headers)
7
+ end
8
+
9
+ def read_headers
10
+ @headers ||= Row.headers(readline, options) if options[:headers]
11
+ end
12
+
13
+ def chunk
14
+ @chunk ||= Chunk.new chunk_options
15
+ end
16
+
17
+ def sequence
18
+ @index += 1
19
+ end
20
+
21
+ def limit?
22
+ options.limit && options.limit == @index
23
+ end
24
+
25
+ def yield_chunk(chunk, &_block)
26
+ if chunk.full? || file.eof?
27
+ yield chunk.map(&:to_a)
28
+ @chunk = nil
29
+ end
30
+ end
31
+
32
+ def iterate(&block)
33
+ while !file.eof?
34
+ break if limit?
35
+ chunk << replace_keys(parse_row)
36
+ yield_chunk(chunk, &block) if block_given? && options.chunk
37
+ end
38
+
39
+ @chunk.to_a
40
+ end
41
+
42
+ def replace_keys(row)
43
+ return row unless options.mapping
44
+ row.map do |key, value|
45
+ [options.mapping[key], value] if options.mapping.has_key?(key)
46
+ end.compact.to_h
47
+ end
48
+
49
+ def chunk_options
50
+ Hash.new.tap do |attrs|
51
+ attrs[:size] = options.chunk.to_i if options.chunk
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,29 @@
1
+ module Nebulous
2
+ module Input
3
+ module Reader
4
+ def read_input(input)
5
+ input.respond_to?(:readline) ? input : File.open(input, "r:#{encoding}")
6
+ end
7
+
8
+ def read_complete_line
9
+ ln = readline
10
+ while ln.count(options.quote_char) % 2 == 1
11
+ ln += readline
12
+ end
13
+ ln
14
+ end
15
+
16
+ def readline
17
+ file.readline(line_terminator).encode(encoding, invalid: :replace).chomp
18
+ end
19
+
20
+ def line_terminator
21
+ options.row_sep
22
+ end
23
+
24
+ def encoding
25
+ options.encoding
26
+ end
27
+ end
28
+ end
29
+ end
@@ -1,5 +1,9 @@
1
1
  module Nebulous
2
2
  class Parser
3
+ include Nebulous::Input::Reader
4
+ include Nebulous::Input::Parsing
5
+ include Nebulous::Input::Delimiters
6
+
3
7
  DEFAULT_OPTIONS = {
4
8
  col_sep: nil,
5
9
  row_sep: nil,
@@ -9,7 +13,6 @@ module Nebulous
9
13
  headers: true,
10
14
  mapping: nil,
11
15
  limit: false,
12
- remove_empty_values: true,
13
16
  encoding: Encoding::UTF_8.to_s
14
17
  }
15
18
 
@@ -34,10 +37,6 @@ module Nebulous
34
37
  file.rewind
35
38
  end
36
39
 
37
- def delimiters
38
- @delimiters ||= DelimiterDetector.new(file.path).detect
39
- end
40
-
41
40
  private
42
41
 
43
42
  def reset
@@ -45,85 +44,5 @@ module Nebulous
45
44
  @headers = nil
46
45
  @chunk = nil
47
46
  end
48
-
49
- def chunk
50
- @chunk ||= Chunk.new chunk_options
51
- end
52
-
53
- def read_headers
54
- @headers ||= Row.headers(readline, options) if options[:headers]
55
- end
56
-
57
- def iterate(&block)
58
- while !file.eof?
59
- break if limit?
60
- chunk << replace_keys(parse_row.merge(@headers))
61
- yield_chunk(chunk, &block) if block_given? && options.chunk
62
- end
63
-
64
- @chunk.to_a
65
- end
66
-
67
- def sequence
68
- @index += 1
69
- end
70
-
71
- def limit?
72
- options.limit && options.limit == @index
73
- end
74
-
75
- def parse_row
76
- sequence
77
- Row.parse(read_complete_line, options)
78
- end
79
-
80
- def yield_chunk(chunk, &_block)
81
- if chunk.full? || file.eof?
82
- yield chunk.map(&:to_a)
83
- @chunk = nil
84
- end
85
- end
86
-
87
- def read_input(input)
88
- input.respond_to?(:readline) ? input : File.open(input, "r:#{encoding}")
89
- end
90
-
91
- def read_complete_line
92
- ln = readline
93
- while ln.count(options.quote_char) % 2 == 1
94
- ln += readline
95
- end
96
- ln
97
- end
98
-
99
- def readline
100
- file.readline(line_terminator).encode(encoding, invalid: :replace).chomp
101
- end
102
-
103
- def encoding
104
- options.encoding
105
- end
106
-
107
- def merge_delimiters
108
- options.row_sep ||= delimiters[:row_sep]
109
- options.col_sep ||= delimiters[:col_sep]
110
- end
111
-
112
- def line_terminator
113
- options.row_sep
114
- end
115
-
116
- def chunk_options
117
- Hash.new.tap do |attrs|
118
- attrs[:size] = options.chunk.to_i if options.chunk
119
- end
120
- end
121
-
122
- def replace_keys(row)
123
- return row unless options.mapping
124
- row.map do |key, value|
125
- [options.mapping[key], value] if options.mapping.has_key?(key)
126
- end.compact.to_h
127
- end
128
47
  end
129
48
  end
@@ -9,26 +9,27 @@ module Nebulous
9
9
  end
10
10
 
11
11
  def self.parse(str, opts)
12
- str.gsub!(opts.comment_exp, '')
12
+ opts = opts.to_h
13
+ str.gsub!(opts[:comment_exp], '')
13
14
  str.chomp!
14
15
 
15
16
  begin
16
- args = opts.to_h.slice(:col_sep, :row_sep, :quote_char)
17
+ args = opts.slice(:col_sep, :row_sep, :quote_char)
17
18
  data = CSV.parse_line str, args
18
19
  rescue CSV::MalformedCSVError
19
- exp = /(#{opts.col_sep})(?=(?:[^"]|"[^"]*")*$)/
20
+ exp = /(#{opts[:col_sep]})(?=(?:[^"]|"[^"]*")*$)/
20
21
  data = str.gsub(exp, "\0").split(/\0/)
21
22
  end
22
23
 
23
24
  data.map!(&:strip)
24
- new(data).to_numeric
25
+ new(data)
25
26
  end
26
27
 
27
28
  def to_numeric
28
29
  arr = map do |val|
29
30
  case val
30
31
  when /^[+-]?\d+\.\d+$/
31
- val.to_i
32
+ val.to_f
32
33
  when /^[+-]?\d+$/
33
34
  val.to_i
34
35
  else
@@ -1,3 +1,3 @@
1
1
  module Nebulous
2
- VERSION = '0.0.2'.freeze
2
+ VERSION = '0.0.4'.freeze
3
3
  end
@@ -1,6 +1,30 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe Nebulous::Chunk do
4
- context 'around batches of csv data' do
4
+ context 'around chunk of csv data' do
5
+ subject { Nebulous::Chunk }
6
+
7
+ context '#full?' do
8
+ let(:chunk) { subject.new size: size }
9
+
10
+ before do
11
+ chunk << ['row']
12
+ chunk << ['row']
13
+ end
14
+
15
+ context 'when not full' do
16
+ let(:size) { 3 }
17
+ it 'returns expected value' do
18
+ expect(chunk.full?).to be_falsy
19
+ end
20
+ end
21
+
22
+ context 'when full' do
23
+ let(:size) { 2 }
24
+ it 'returns expected value' do
25
+ expect(chunk.full?).to be_truthy
26
+ end
27
+ end
28
+ end
5
29
  end
6
30
  end
@@ -2,5 +2,70 @@ require 'spec_helper'
2
2
 
3
3
  describe Nebulous::Row do
4
4
  context 'around reading csv rows' do
5
+ subject { Nebulous::Row }
6
+
7
+ let(:col_sep) { ',' }
8
+ let(:row_sep) { "\n" }
9
+ let(:options) do
10
+ { col_sep: col_sep, row_sep: row_sep, quote_char: '"', comment_exp: /^#/ }
11
+ end
12
+
13
+ context '::headers' do
14
+ it 'parses and normalizes a csv string as headers' do
15
+ headers = subject.headers("First name, last-name, guests", options)
16
+ expect(headers).to eq(
17
+ {first_name: :first_name, last_name: :last_name, guests: :guests}
18
+ )
19
+ end
20
+ end
21
+
22
+ context '::parse' do
23
+ context 'with valid csv' do
24
+ it 'returns expected parsed result' do
25
+ row = subject.parse "raw denim, Austin,selvage,artisan", options
26
+ expect(row).to eq ["raw denim", "Austin", "selvage", "artisan"]
27
+ end
28
+ end
29
+
30
+ context 'with valid tsv' do
31
+ let(:col_sep) { "\t" }
32
+ it 'returns expected parsed result' do
33
+ row = subject.parse "raw denim\tAustin\t selvage\tartisan", options
34
+ expect(row).to eq ["raw denim", "Austin", "selvage", "artisan"]
35
+ end
36
+ end
37
+
38
+ context 'with malformed csv' do
39
+ it 'returns expected parsed result' do
40
+ row = subject.parse 'raw denim, Austin "TX, US", artisan', options
41
+ expect(row).to eq ["raw denim", "Austin \"TX, US\"", "artisan"]
42
+ end
43
+ end
44
+
45
+ context 'with malformed tsv' do
46
+ let(:col_sep) { "\t" }
47
+ it 'returns expected parsed result' do
48
+ row = subject.parse "raw denim\t Austin \"TX, US\"\t artisan", options
49
+ expect(row).to eq ["raw denim", "Austin \"TX, US\"", "artisan"]
50
+ end
51
+ end
52
+ end
53
+
54
+ context '#to_numeric' do
55
+ it 'converts numeric values to ints/floats' do
56
+ row = subject.new ["1", "two", "3", "4.5"]
57
+ expect(row.to_numeric).to eq [1, "two", 3, 4.5]
58
+ end
59
+ end
60
+
61
+ context '#merge' do
62
+ it 'zips a row with provided headers' do
63
+ headers = subject.headers "first name, last name", options
64
+ row = subject.new ["bob", "barker"]
65
+ expect(row.merge(headers)).to eq(
66
+ { first_name: "bob", last_name: "barker" }
67
+ )
68
+ end
69
+ end
5
70
  end
6
71
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nebulous
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Zach Graves
@@ -112,6 +112,10 @@ files:
112
112
  - lib/nebulous.rb
113
113
  - lib/nebulous/chunk.rb
114
114
  - lib/nebulous/delimiter_detector.rb
115
+ - lib/nebulous/input.rb
116
+ - lib/nebulous/input/delimiters.rb
117
+ - lib/nebulous/input/parsing.rb
118
+ - lib/nebulous/input/reader.rb
115
119
  - lib/nebulous/parser.rb
116
120
  - lib/nebulous/row.rb
117
121
  - lib/nebulous/version.rb