csvreader 1.2.1 → 1.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,62 +1,22 @@
1
- # encoding: utf-8
2
-
3
- class CsvReader
4
-
5
- class ParserTab
6
-
7
- def parse( data, **kwargs, &block )
8
-
9
- ## note: input: required each_line (string or io/file for example)
10
- ## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
11
-
12
- input = data ## assume it's a string or io/file handle
13
-
14
- if block_given?
15
- parse_lines( input, &block )
16
- else
17
- records = []
18
-
19
- parse_lines( input ) do |record|
20
- records << record
21
- end
22
-
23
- records
24
- end
25
- end ## method parse
26
-
27
-
28
-
29
- private
30
-
31
- def parse_lines( input, &block )
32
-
33
- ## note: each line only works with \n (windows) or \r\n (unix)
34
- ## will NOT work with \r (old mac, any others?) only!!!!
35
- input.each_line do |line|
36
-
37
- ## puts "line:"
38
- ## pp line
39
-
40
- ## note: chomp('') if is an empty string,
41
- ## it will remove all trailing newlines from the string.
42
- ## use line.sub(/[\n\r]*$/, '') or similar instead - why? why not?
43
- line = line.chomp( '' )
44
- ## pp line
45
-
46
- # note: trailing empty fields get (auto-)trimmed by split !!!!!!!
47
- # Solution!! change split( "\t" ) to split( "\t", -1 )
48
- # If the limit parameter is omitted, trailing null fields are suppressed.
49
- # If limit is a positive number, at most that number of fields will be returned
50
- # (if limit is 1, the entire string is returned as the only entry in an array).
51
- # If negative, there is no limit to the number of fields returned, and trailing null fields are not suppressed.
52
- values = line.split( "\t", -1 )
53
- ## pp values
54
-
55
- ## note: requires block - enforce? how? why? why not?
56
- block.call( values )
57
- end
58
- end # method parse_lines
59
-
60
-
61
- end # class ParserTab
62
- end # class CsvReader
1
+
2
+ class CsvReader
3
+
4
+ class ParserTab
5
+
6
+ def parse( data, **kwargs, &block )
7
+ ## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
8
+
9
+ ## note: input: required each_line (string or io/file for example)
10
+ ## assume data is a string or io/file handle
11
+ tab = TabReader.new( data )
12
+
13
+ if block_given?
14
+ tab.each( &block )
15
+ else
16
+ tab.to_a
17
+ end
18
+ end ## method parse
19
+
20
+
21
+ end # class ParserTab
22
+ end # class CsvReader
@@ -1,123 +1,122 @@
1
- # encoding: utf-8
2
-
3
- class CsvReader
4
-
5
- class ParserTable
6
-
7
- ###################################
8
- ## add simple logger with debug flag/switch
9
- #
10
- # use Parser.debug = true # to turn on
11
- #
12
- # todo/fix: use logutils instead of std logger - why? why not?
13
-
14
- def self.build_logger()
15
- l = Logger.new( STDOUT )
16
- l.level = :info ## set to :info on start; note: is 0 (debug) by default
17
- l
18
- end
19
- def self.logger() @@logger ||= build_logger; end
20
- def logger() self.class.logger; end
21
-
22
-
23
-
24
-
25
- attr_reader :config ## todo/fix: change config to proper dialect class/struct - why? why not?
26
-
27
- ##
28
- ## todo/check:
29
- ## null values - include NA - why? why not?
30
- ## make null values case sensitive or add an option for case sensitive
31
- ## or better allow a proc as option for checking too!!!
32
- def initialize( space: nil )
33
- @config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
34
-
35
- ## e.g. treat/convert char to space e.g. _-+• etc
36
- ## Man_Utd => Man Utd
37
- ## or use it for leading and trailing spaces without quotes
38
- ## todo/check: only use for unquoted values? why? why not?
39
- @config[:space] = space
40
- end
41
-
42
-
43
- #########################################
44
- ## config convenience helpers
45
- def space=( value ) @config[:space]=value; end
46
-
47
-
48
-
49
-
50
-
51
- def parse( str_or_readable, **kwargs, &block )
52
-
53
- ## note: input: required each_line (string or io/file for example)
54
- ## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
55
-
56
- input = str_or_readable ## assume it's a string or io/file handle
57
-
58
- if block_given?
59
- parse_lines( input, &block )
60
- else
61
- records = []
62
-
63
- parse_lines( input ) do |record|
64
- records << record
65
- end
66
-
67
- records
68
- end
69
- end ## method parse
70
-
71
-
72
-
73
- private
74
-
75
- def parse_lines( input, &block )
76
-
77
- space = config[:space]
78
-
79
- ## note: each line only works with \n (windows) or \r\n (unix)
80
- ## will NOT work with \r (old mac, any others?) only!!!!
81
- input.each_line do |line|
82
-
83
- logger.debug "line:" if logger.debug?
84
- logger.debug line.pretty_inspect if logger.debug?
85
-
86
-
87
- ## note: chomp('') if is an empty string,
88
- ## it will remove all trailing newlines from the string.
89
- ## use line.sub(/[\n\r]*$/, '') or similar instead - why? why not?
90
- line = line.chomp( '' )
91
- line = line.strip ## strip leading and trailing whitespaces (space/tab) too
92
- logger.debug line.pretty_inspect if logger.debug?
93
-
94
- if line.empty? ## skip blank lines
95
- logger.debug "skip blank line" if logger.debug?
96
- next
97
- end
98
-
99
- if line.start_with?( "#" ) ## skip comment lines
100
- logger.debug "skip comment line" if logger.debug?
101
- next
102
- end
103
-
104
- # note: string.split defaults to split by space (e.g. /\s+/) :-)
105
- # for just make it "explicit" with /[ \t]+/
106
-
107
- values = line.split( /[ \t]+/ )
108
- logger.debug values.pretty_inspect if logger.debug?
109
-
110
- if space
111
- ## e.g. translate _-+ etc. if configured to space
112
- ## Man_Utd => Man Utd etc.
113
- values = values.map {|value| value.tr(space,' ') }
114
- end
115
-
116
- ## note: requires block - enforce? how? why? why not?
117
- block.call( values )
118
- end
119
- end # method parse_lines
120
-
121
-
122
- end # class ParserTable
123
- end # class CsvReader
1
+
2
+ class CsvReader
3
+
4
+ class ParserTable
5
+
6
+ ###################################
7
+ ## add simple logger with debug flag/switch
8
+ #
9
+ # use Parser.debug = true # to turn on
10
+ #
11
+ # todo/fix: use logutils instead of std logger - why? why not?
12
+
13
+ def self.build_logger()
14
+ l = Logger.new( STDOUT )
15
+ l.level = :info ## set to :info on start; note: is 0 (debug) by default
16
+ l
17
+ end
18
+ def self.logger() @@logger ||= build_logger; end
19
+ def logger() self.class.logger; end
20
+
21
+
22
+
23
+
24
+ attr_reader :config ## todo/fix: change config to proper dialect class/struct - why? why not?
25
+
26
+ ##
27
+ ## todo/check:
28
+ ## null values - include NA - why? why not?
29
+ ## make null values case sensitive or add an option for case sensitive
30
+ ## or better allow a proc as option for checking too!!!
31
+ def initialize( space: nil )
32
+ @config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
33
+
34
+ ## e.g. treat/convert char to space e.g. _-+• etc
35
+ ## Man_Utd => Man Utd
36
+ ## or use it for leading and trailing spaces without quotes
37
+ ## todo/check: only use for unquoted values? why? why not?
38
+ @config[:space] = space
39
+ end
40
+
41
+
42
+ #########################################
43
+ ## config convenience helpers
44
+ def space=( value ) @config[:space]=value; end
45
+
46
+
47
+
48
+
49
+
50
+ def parse( str_or_readable, **kwargs, &block )
51
+
52
+ ## note: input: required each_line (string or io/file for example)
53
+ ## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
54
+
55
+ input = str_or_readable ## assume it's a string or io/file handle
56
+
57
+ if block_given?
58
+ parse_lines( input, &block )
59
+ else
60
+ records = []
61
+
62
+ parse_lines( input ) do |record|
63
+ records << record
64
+ end
65
+
66
+ records
67
+ end
68
+ end ## method parse
69
+
70
+
71
+
72
+ private
73
+
74
+ def parse_lines( input, &block )
75
+
76
+ space = config[:space]
77
+
78
+ ## note: each line only works with \n (windows) or \r\n (unix)
79
+ ## will NOT work with \r (old mac, any others?) only!!!!
80
+ input.each_line do |line|
81
+
82
+ logger.debug "line:" if logger.debug?
83
+ logger.debug line.pretty_inspect if logger.debug?
84
+
85
+
86
+ ## note: chomp('') if is an empty string,
87
+ ## it will remove all trailing newlines from the string.
88
+ ## use line.sub(/[\n\r]*$/, '') or similar instead - why? why not?
89
+ line = line.chomp( '' )
90
+ line = line.strip ## strip leading and trailing whitespaces (space/tab) too
91
+ logger.debug line.pretty_inspect if logger.debug?
92
+
93
+ if line.empty? ## skip blank lines
94
+ logger.debug "skip blank line" if logger.debug?
95
+ next
96
+ end
97
+
98
+ if line.start_with?( "#" ) ## skip comment lines
99
+ logger.debug "skip comment line" if logger.debug?
100
+ next
101
+ end
102
+
103
+ # note: string.split defaults to split by space (e.g. /\s+/) :-)
104
+ # for just make it "explicit" with /[ \t]+/
105
+
106
+ values = line.split( /[ \t]+/ )
107
+ logger.debug values.pretty_inspect if logger.debug?
108
+
109
+ if space
110
+ ## e.g. translate _-+ etc. if configured to space
111
+ ## Man_Utd => Man Utd etc.
112
+ values = values.map {|value| value.tr(space,' ') }
113
+ end
114
+
115
+ ## note: requires block - enforce? how? why? why not?
116
+ block.call( values )
117
+ end
118
+ end # method parse_lines
119
+
120
+
121
+ end # class ParserTable
122
+ end # class CsvReader
@@ -0,0 +1,23 @@
1
+
2
+
3
+ class CsvReader
4
+
5
+ class ParserYaml
6
+
7
+ def parse( data, **kwargs, &block )
8
+ ## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
9
+
10
+ ## note: input: required each_line (string or io/file for example)
11
+ ## assume data is a string or io/file handle
12
+ csv = CsvYaml.new( data )
13
+
14
+ if block_given?
15
+ csv.each( &block )
16
+ else
17
+ csv.to_a
18
+ end
19
+ end ## method parse
20
+
21
+
22
+ end # class ParserYaml
23
+ end # class CsvReader
@@ -1,4 +1,3 @@
1
- # encoding: utf-8
2
1
 
3
2
  class CsvReader
4
3
 
@@ -155,11 +154,11 @@ class CsvReader
155
154
 
156
155
  ## check array / pipeline of converters is empty (size=0 e.g. is [])
157
156
  if @converters.empty?
158
- @parser.parse( @io, kwargs, &block )
157
+ @parser.parse( @io, **kwargs, &block )
159
158
  else
160
159
  ## add "post"-processing with converters pipeline
161
160
  ## that is, convert all strings to integer, float, date, ... if wanted
162
- @parser.parse( @io, kwargs ) do |raw_record|
161
+ @parser.parse( @io, **kwargs ) do |raw_record|
163
162
  record = []
164
163
  raw_record.each_with_index do | value, i |
165
164
  record << @converters.convert( value, i )
@@ -1,4 +1,3 @@
1
- # encoding: utf-8
2
1
 
3
2
  class CsvHashReader
4
3
 
@@ -6,6 +5,8 @@ class CsvHashReader
6
5
  ## add convenience shortcuts / aliases for CsvReader support classes
7
6
  Parser = CsvReader::Parser
8
7
  ParserFixed = CsvReader::ParserFixed
8
+ ParserJson = CsvReader::ParserJson
9
+ ParserYaml = CsvReader::ParserYaml
9
10
  Converter = CsvReader::Converter
10
11
 
11
12
 
@@ -167,7 +168,7 @@ def_delegators :@io,
167
168
  kwargs[:width] = @kwargs[:width] if @parser.is_a?( ParserFixed )
168
169
 
169
170
 
170
- @parser.parse( @io, kwargs ) do |raw_values| # sep: sep
171
+ @parser.parse( @io, **kwargs ) do |raw_values| # sep: sep
171
172
  if @names.nil? ## check for (first) headers row
172
173
  if @header_converters.empty?
173
174
  @names = raw_values ## store header row / a.k.a. field/column names
@@ -1,32 +1,30 @@
1
- # encoding: utf-8
2
-
3
-
4
- class CsvReader ## note: uses a class for now - change to module - why? why not?
5
-
6
- module Version
7
- MAJOR = 1 ## todo: namespace inside version or something - why? why not??
8
- MINOR = 2
9
- PATCH = 1
10
-
11
- ## self.to_s - why? why not?
12
- end
13
-
14
- VERSION = [Version::MAJOR,
15
- Version::MINOR,
16
- Version::PATCH].join('.')
17
-
18
- def self.version ## keep (as an alternative to VERSION) - why? why not?
19
- VERSION
20
- end
21
-
22
-
23
-
24
- def self.banner
25
- "csvreader/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
26
- end
27
-
28
- def self.root
29
- File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
30
- end
31
-
32
- end # class CsvReader
1
+
2
+ class CsvReader ## note: uses a class for now - change to module - why? why not?
3
+
4
+ module Version
5
+ MAJOR = 1 ## todo: namespace inside version or something - why? why not??
6
+ MINOR = 2
7
+ PATCH = 5
8
+
9
+ ## self.to_s - why? why not?
10
+ end
11
+
12
+ VERSION = [Version::MAJOR,
13
+ Version::MINOR,
14
+ Version::PATCH].join('.')
15
+
16
+ def self.version ## keep (as an alternative to VERSION) - why? why not?
17
+ VERSION
18
+ end
19
+
20
+
21
+
22
+ def self.banner
23
+ "csvreader/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})"
24
+ end
25
+
26
+ def self.root
27
+ File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
28
+ end
29
+
30
+ end # class CsvReader
data/lib/csvreader.rb CHANGED
@@ -1,4 +1,3 @@
1
- # encoding: utf-8
2
1
 
3
2
 
4
3
  ## our own code (without "top-level" shortcuts e.g. "modular version")
data/test/helper.rb CHANGED
@@ -13,7 +13,7 @@ require 'csvreader'
13
13
  ## add test_data_dir helper
14
14
  class CsvReader
15
15
  def self.test_data_dir
16
- "#{root}/test/data"
16
+ "#{root}/datasets"
17
17
  end
18
18
  end
19
19
 
@@ -0,0 +1,28 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_parser_autofix.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+
11
+ class TestParserAutofix < MiniTest::Test
12
+
13
+
14
+ def parser
15
+ CsvReader::Parser::DEFAULT
16
+ end
17
+
18
+
19
+ def test_quote_with_trailing_value
20
+ recs = [[ "Farrokh", "\"Freddy\" Mercury", "Bulsara" ]]
21
+
22
+ assert_equal recs, parser.parse( %Q{Farrokh,"Freddy" Mercury,Bulsara} )
23
+ assert_equal recs, parser.parse( %Q{ Farrokh , "Freddy" Mercury , Bulsara } )
24
+ assert_equal recs, parser.parse( %Q{Farrokh, "Freddy" Mercury ,Bulsara} )
25
+ end
26
+
27
+
28
+ end # class TestParserAutofix
@@ -1,66 +1,66 @@
1
- # encoding: utf-8
2
-
3
- ###
4
- # to run use
5
- # ruby -I ./lib -I ./test test/test_parser_formats.rb
6
-
7
-
8
- require 'helper'
9
-
10
- class TestParserFormats < MiniTest::Test
11
-
12
-
13
- def parser
14
- CsvReader::Parser
15
- end
16
-
17
-
18
- def test_parse_whitespace
19
- records = [["a", "b", "c"],
20
- ["1", "2", "3"]]
21
-
22
- ## don't care about newlines (\r\n) ??? - fix? why? why not?
23
- assert_equal records, parser.default.parse( "a,b,c\n1,2,3" )
24
- assert_equal records, parser.default.parse( "a,b,c\n1,2,3\n" )
25
- assert_equal records, parser.default.parse( " a, b ,c \n\n1,2,3\n" )
26
- assert_equal records, parser.default.parse( " a, b ,c \n \n1,2,3\n" )
27
-
28
- assert_equal [["a", "b", "c"],
29
- [""],
30
- ["1", "2", "3"]], parser.default.parse( %Q{a,b,c\n""\n1,2,3\n} )
31
- assert_equal [["", ""],
32
- [""],
33
- ["", "", ""]], parser.default.parse( %Q{,\n""\n"","",""\n} )
34
-
35
-
36
- ## strict rfc4180 - no trim leading or trailing spaces or blank lines
37
- assert_equal records, parser.strict.parse( "a,b,c\n1,2,3" )
38
- assert_equal [["a", "b", "c"],
39
- [""],
40
- ["1", "2", "3"]], parser.strict.parse( "a,b,c\n\n1,2,3" )
41
- assert_equal [[" a", " b ", "c "],
42
- [""],
43
- ["1", "2", "3"]], parser.strict.parse( " a, b ,c \n\n1,2,3" )
44
- assert_equal [[" a", " b ", "c "],
45
- [" "],
46
- ["",""],
47
- ["1", "2", "3"]], parser.strict.parse( " a, b ,c \n \n,\n1,2,3" )
48
- end
49
-
50
-
51
- def test_parse_empties
52
- assert_equal [], parser.default.parse( "\n \n \n" )
53
-
54
- ## strict rfc4180 - no trim leading or trailing spaces or blank lines
55
- assert_equal [[""],
56
- [" "],
57
- [" "]], parser.strict.parse( "\n \n \n" )
58
- assert_equal [[""],
59
- [" "],
60
- [" "]], parser.strict.parse( "\n \n " )
61
-
62
- assert_equal [[""]], parser.strict.parse( "\n" )
63
- assert_equal [], parser.strict.parse( "" )
64
- end
65
-
66
- end # class TestParserFormats
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_parser_formats.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestParserFormats < MiniTest::Test
11
+
12
+
13
+ def parser
14
+ CsvReader::Parser
15
+ end
16
+
17
+
18
+ def test_parse_whitespace
19
+ records = [["a", "b", "c"],
20
+ ["1", "2", "3"]]
21
+
22
+ ## don't care about newlines (\r\n) ??? - fix? why? why not?
23
+ assert_equal records, parser.default.parse( "a,b,c\n1,2,3" )
24
+ assert_equal records, parser.default.parse( "a,b,c\n1,2,3\n" )
25
+ assert_equal records, parser.default.parse( " a, b ,c \n\n1,2,3\n" )
26
+ assert_equal records, parser.default.parse( " a, b ,c \n \n1,2,3\n" )
27
+
28
+ assert_equal [["a", "b", "c"],
29
+ [""],
30
+ ["1", "2", "3"]], parser.default.parse( %Q{a,b,c\n""\n1,2,3\n} )
31
+ assert_equal [["", ""],
32
+ [""],
33
+ ["", "", ""]], parser.default.parse( %Q{,\n""\n"","",""\n} )
34
+
35
+
36
+ ## strict rfc4180 - no trim leading or trailing spaces or blank lines
37
+ assert_equal records, parser.strict.parse( "a,b,c\n1,2,3" )
38
+ assert_equal [["a", "b", "c"],
39
+ [""],
40
+ ["1", "2", "3"]], parser.strict.parse( "a,b,c\n\n1,2,3" )
41
+ assert_equal [[" a", " b ", "c "],
42
+ [""],
43
+ ["1", "2", "3"]], parser.strict.parse( " a, b ,c \n\n1,2,3" )
44
+ assert_equal [[" a", " b ", "c "],
45
+ [" "],
46
+ ["",""],
47
+ ["1", "2", "3"]], parser.strict.parse( " a, b ,c \n \n,\n1,2,3" )
48
+ end
49
+
50
+
51
+ def test_parse_empties
52
+ assert_equal [], parser.default.parse( "\n \n \n" )
53
+
54
+ ## strict rfc4180 - no trim leading or trailing spaces or blank lines
55
+ assert_equal [[""],
56
+ [" "],
57
+ [" "]], parser.strict.parse( "\n \n \n" )
58
+ assert_equal [[""],
59
+ [" "],
60
+ [" "]], parser.strict.parse( "\n \n " )
61
+
62
+ assert_equal [[""]], parser.strict.parse( "\n" )
63
+ assert_equal [], parser.strict.parse( "" )
64
+ end
65
+
66
+ end # class TestParserFormats