nebulous 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e16fcc8e895cd4078d1ea39ab8553ee3246cd5fa
4
- data.tar.gz: 46b62d5133aff6cea1cec0370af683834f16fe28
3
+ metadata.gz: 15734d97663b72356a0c85a85a07e5bddc191a0f
4
+ data.tar.gz: a2fdc1e48d2835bc8a6e509733cbbb5a09ed9bdd
5
5
  SHA512:
6
- metadata.gz: 7dd3e249cb5b64e7477c85bd0e34f44750b14d02ae0fafaeb6a33e93decab821c6798b66bfccfd87a10811c1115a008a94f4786c34ab1e72fd87c448aeeab705
7
- data.tar.gz: 8791db72af0fd20b251382fd88982abd0af2a60c6bee83890efdfc8ac68576789783c9a6c1395ebf4c234a14f3db8f3a5dfe6d6d697b2c2d24e34aa0b82ab0b6
6
+ metadata.gz: d98c33418a5c0c497027d57491e8e3698357bd35762b9c11efe67e8d97dce7458693b4129d52516d155e498ff3023b825c6c04e224e679e5f681fa17886dfd3f
7
+ data.tar.gz: 43845b83b364c806fa803affe6b5c8981af2286a68e0ca157d321c63328b0c27ddfb4e7d4f451ba82e5bb5dff7ba617513282c1ef8aa1258f9b106c2bd885862
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- nebulous (0.0.1)
4
+ nebulous (0.0.3)
5
5
  activesupport
6
6
  cocaine (~> 0.5)
7
7
 
data/README.md CHANGED
@@ -18,7 +18,7 @@ Or install it yourself as:
18
18
 
19
19
  ## Usage
20
20
 
21
- TBD
21
+
22
22
 
23
23
  ## Contributing
24
24
 
@@ -3,10 +3,14 @@ require 'ostruct'
3
3
  require 'cocaine'
4
4
  require 'active_support/all'
5
5
  require 'nebulous/version'
6
- require 'nebulous/parser'
6
+ require 'nebulous/delimiter_detector'
7
7
  require 'nebulous/row'
8
8
  require 'nebulous/chunk'
9
- require 'nebulous/delimiter_detector'
9
+ require 'nebulous/input'
10
+ require 'nebulous/input/reader'
11
+ require 'nebulous/input/parsing'
12
+ require 'nebulous/input/delimiters'
13
+ require 'nebulous/parser'
10
14
 
11
15
  module Nebulous
12
16
  def self.process(file, *args, &block)
@@ -0,0 +1,4 @@
1
+ module Nebulous
2
+ module Input
3
+ end
4
+ end
@@ -0,0 +1,16 @@
1
+ module Nebulous
2
+ module Input
3
+ module Delimiters
4
+ def delimiters
5
+ @delimiters ||= Nebulous::DelimiterDetector.new(file.path).detect
6
+ end
7
+
8
+ private
9
+
10
+ def merge_delimiters
11
+ options.row_sep ||= delimiters[:row_sep]
12
+ options.col_sep ||= delimiters[:col_sep]
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,56 @@
1
+ module Nebulous
2
+ module Input
3
+ module Parsing
4
+ def parse_row
5
+ sequence
6
+ Row.parse(read_complete_line, options).to_numeric.merge(@headers)
7
+ end
8
+
9
+ def read_headers
10
+ @headers ||= Row.headers(readline, options) if options[:headers]
11
+ end
12
+
13
+ def chunk
14
+ @chunk ||= Chunk.new chunk_options
15
+ end
16
+
17
+ def sequence
18
+ @index += 1
19
+ end
20
+
21
+ def limit?
22
+ options.limit && options.limit == @index
23
+ end
24
+
25
+ def yield_chunk(chunk, &_block)
26
+ if chunk.full? || file.eof?
27
+ yield chunk.map(&:to_a)
28
+ @chunk = nil
29
+ end
30
+ end
31
+
32
+ def iterate(&block)
33
+ while !file.eof?
34
+ break if limit?
35
+ chunk << replace_keys(parse_row)
36
+ yield_chunk(chunk, &block) if block_given? && options.chunk
37
+ end
38
+
39
+ @chunk.to_a
40
+ end
41
+
42
+ def replace_keys(row)
43
+ return row unless options.mapping
44
+ row.map do |key, value|
45
+ [options.mapping[key], value] if options.mapping.has_key?(key)
46
+ end.compact.to_h
47
+ end
48
+
49
+ def chunk_options
50
+ Hash.new.tap do |attrs|
51
+ attrs[:size] = options.chunk.to_i if options.chunk
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,29 @@
1
+ module Nebulous
2
+ module Input
3
+ module Reader
4
+ def read_input(input)
5
+ input.respond_to?(:readline) ? input : File.open(input, "r:#{encoding}")
6
+ end
7
+
8
+ def read_complete_line
9
+ ln = readline
10
+ while ln.count(options.quote_char) % 2 == 1
11
+ ln += readline
12
+ end
13
+ ln
14
+ end
15
+
16
+ def readline
17
+ file.readline(line_terminator).encode(encoding, invalid: :replace).chomp
18
+ end
19
+
20
+ def line_terminator
21
+ options.row_sep
22
+ end
23
+
24
+ def encoding
25
+ options.encoding
26
+ end
27
+ end
28
+ end
29
+ end
@@ -1,5 +1,9 @@
1
1
  module Nebulous
2
2
  class Parser
3
+ include Nebulous::Input::Reader
4
+ include Nebulous::Input::Parsing
5
+ include Nebulous::Input::Delimiters
6
+
3
7
  DEFAULT_OPTIONS = {
4
8
  col_sep: nil,
5
9
  row_sep: nil,
@@ -9,7 +13,6 @@ module Nebulous
9
13
  headers: true,
10
14
  mapping: nil,
11
15
  limit: false,
12
- remove_empty_values: true,
13
16
  encoding: Encoding::UTF_8.to_s
14
17
  }
15
18
 
@@ -34,10 +37,6 @@ module Nebulous
34
37
  file.rewind
35
38
  end
36
39
 
37
- def delimiters
38
- @delimiters ||= DelimiterDetector.new(file.path).detect
39
- end
40
-
41
40
  private
42
41
 
43
42
  def reset
@@ -45,85 +44,5 @@ module Nebulous
45
44
  @headers = nil
46
45
  @chunk = nil
47
46
  end
48
-
49
- def chunk
50
- @chunk ||= Chunk.new chunk_options
51
- end
52
-
53
- def read_headers
54
- @headers ||= Row.headers(readline, options) if options[:headers]
55
- end
56
-
57
- def iterate(&block)
58
- while !file.eof?
59
- break if limit?
60
- chunk << replace_keys(parse_row.merge(@headers))
61
- yield_chunk(chunk, &block) if block_given? && options.chunk
62
- end
63
-
64
- @chunk.to_a
65
- end
66
-
67
- def sequence
68
- @index += 1
69
- end
70
-
71
- def limit?
72
- options.limit && options.limit == @index
73
- end
74
-
75
- def parse_row
76
- sequence
77
- Row.parse(read_complete_line, options)
78
- end
79
-
80
- def yield_chunk(chunk, &_block)
81
- if chunk.full? || file.eof?
82
- yield chunk.map(&:to_a)
83
- @chunk = nil
84
- end
85
- end
86
-
87
- def read_input(input)
88
- input.respond_to?(:readline) ? input : File.open(input, "r:#{encoding}")
89
- end
90
-
91
- def read_complete_line
92
- ln = readline
93
- while ln.count(options.quote_char) % 2 == 1
94
- ln += readline
95
- end
96
- ln
97
- end
98
-
99
- def readline
100
- file.readline(line_terminator).encode(encoding, invalid: :replace).chomp
101
- end
102
-
103
- def encoding
104
- options.encoding
105
- end
106
-
107
- def merge_delimiters
108
- options.row_sep ||= delimiters[:row_sep]
109
- options.col_sep ||= delimiters[:col_sep]
110
- end
111
-
112
- def line_terminator
113
- options.row_sep
114
- end
115
-
116
- def chunk_options
117
- Hash.new.tap do |attrs|
118
- attrs[:size] = options.chunk.to_i if options.chunk
119
- end
120
- end
121
-
122
- def replace_keys(row)
123
- return row unless options.mapping
124
- row.map do |key, value|
125
- [options.mapping[key], value] if options.mapping.has_key?(key)
126
- end.compact.to_h
127
- end
128
47
  end
129
48
  end
@@ -9,26 +9,27 @@ module Nebulous
9
9
  end
10
10
 
11
11
  def self.parse(str, opts)
12
- str.gsub!(opts.comment_exp, '')
12
+ opts = opts.to_h
13
+ str.gsub!(opts[:comment_exp], '')
13
14
  str.chomp!
14
15
 
15
16
  begin
16
- args = opts.to_h.slice(:col_sep, :row_sep, :quote_char)
17
+ args = opts.slice(:col_sep, :row_sep, :quote_char)
17
18
  data = CSV.parse_line str, args
18
19
  rescue CSV::MalformedCSVError
19
- exp = /(#{opts.col_sep})(?=(?:[^"]|"[^"]*")*$)/
20
+ exp = /(#{opts[:col_sep]})(?=(?:[^"]|"[^"]*")*$)/
20
21
  data = str.gsub(exp, "\0").split(/\0/)
21
22
  end
22
23
 
23
24
  data.map!(&:strip)
24
- new(data).to_numeric
25
+ new(data)
25
26
  end
26
27
 
27
28
  def to_numeric
28
29
  arr = map do |val|
29
30
  case val
30
31
  when /^[+-]?\d+\.\d+$/
31
- val.to_i
32
+ val.to_f
32
33
  when /^[+-]?\d+$/
33
34
  val.to_i
34
35
  else
@@ -1,3 +1,3 @@
1
1
  module Nebulous
2
- VERSION = '0.0.2'.freeze
2
+ VERSION = '0.0.4'.freeze
3
3
  end
@@ -1,6 +1,30 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe Nebulous::Chunk do
4
- context 'around batches of csv data' do
4
+ context 'around chunk of csv data' do
5
+ subject { Nebulous::Chunk }
6
+
7
+ context '#full?' do
8
+ let(:chunk) { subject.new size: size }
9
+
10
+ before do
11
+ chunk << ['row']
12
+ chunk << ['row']
13
+ end
14
+
15
+ context 'when not full' do
16
+ let(:size) { 3 }
17
+ it 'returns expected value' do
18
+ expect(chunk.full?).to be_falsy
19
+ end
20
+ end
21
+
22
+ context 'when full' do
23
+ let(:size) { 2 }
24
+ it 'returns expected value' do
25
+ expect(chunk.full?).to be_truthy
26
+ end
27
+ end
28
+ end
5
29
  end
6
30
  end
@@ -2,5 +2,70 @@ require 'spec_helper'
2
2
 
3
3
  describe Nebulous::Row do
4
4
  context 'around reading csv rows' do
5
+ subject { Nebulous::Row }
6
+
7
+ let(:col_sep) { ',' }
8
+ let(:row_sep) { "\n" }
9
+ let(:options) do
10
+ { col_sep: col_sep, row_sep: row_sep, quote_char: '"', comment_exp: /^#/ }
11
+ end
12
+
13
+ context '::headers' do
14
+ it 'parses and normalizes a csv string as headers' do
15
+ headers = subject.headers("First name, last-name, guests", options)
16
+ expect(headers).to eq(
17
+ {first_name: :first_name, last_name: :last_name, guests: :guests}
18
+ )
19
+ end
20
+ end
21
+
22
+ context '::parse' do
23
+ context 'with valid csv' do
24
+ it 'returns expected parsed result' do
25
+ row = subject.parse "raw denim, Austin,selvage,artisan", options
26
+ expect(row).to eq ["raw denim", "Austin", "selvage", "artisan"]
27
+ end
28
+ end
29
+
30
+ context 'with valid tsv' do
31
+ let(:col_sep) { "\t" }
32
+ it 'returns expected parsed result' do
33
+ row = subject.parse "raw denim\tAustin\t selvage\tartisan", options
34
+ expect(row).to eq ["raw denim", "Austin", "selvage", "artisan"]
35
+ end
36
+ end
37
+
38
+ context 'with malformed csv' do
39
+ it 'returns expected parsed result' do
40
+ row = subject.parse 'raw denim, Austin "TX, US", artisan', options
41
+ expect(row).to eq ["raw denim", "Austin \"TX, US\"", "artisan"]
42
+ end
43
+ end
44
+
45
+ context 'with malformed tsv' do
46
+ let(:col_sep) { "\t" }
47
+ it 'returns expected parsed result' do
48
+ row = subject.parse "raw denim\t Austin \"TX, US\"\t artisan", options
49
+ expect(row).to eq ["raw denim", "Austin \"TX, US\"", "artisan"]
50
+ end
51
+ end
52
+ end
53
+
54
+ context '#to_numeric' do
55
+ it 'converts numeric values to ints/floats' do
56
+ row = subject.new ["1", "two", "3", "4.5"]
57
+ expect(row.to_numeric).to eq [1, "two", 3, 4.5]
58
+ end
59
+ end
60
+
61
+ context '#merge' do
62
+ it 'zips a row with provided headers' do
63
+ headers = subject.headers "first name, last name", options
64
+ row = subject.new ["bob", "barker"]
65
+ expect(row.merge(headers)).to eq(
66
+ { first_name: "bob", last_name: "barker" }
67
+ )
68
+ end
69
+ end
5
70
  end
6
71
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nebulous
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Zach Graves
@@ -112,6 +112,10 @@ files:
112
112
  - lib/nebulous.rb
113
113
  - lib/nebulous/chunk.rb
114
114
  - lib/nebulous/delimiter_detector.rb
115
+ - lib/nebulous/input.rb
116
+ - lib/nebulous/input/delimiters.rb
117
+ - lib/nebulous/input/parsing.rb
118
+ - lib/nebulous/input/reader.rb
115
119
  - lib/nebulous/parser.rb
116
120
  - lib/nebulous/row.rb
117
121
  - lib/nebulous/version.rb