csvreader 1.2.4 → 1.2.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,123 +1,122 @@
1
- # encoding: utf-8
2
-
3
- class CsvReader
4
-
5
- class ParserTable
6
-
7
- ###################################
8
- ## add simple logger with debug flag/switch
9
- #
10
- # use Parser.debug = true # to turn on
11
- #
12
- # todo/fix: use logutils instead of std logger - why? why not?
13
-
14
- def self.build_logger()
15
- l = Logger.new( STDOUT )
16
- l.level = :info ## set to :info on start; note: is 0 (debug) by default
17
- l
18
- end
19
- def self.logger() @@logger ||= build_logger; end
20
- def logger() self.class.logger; end
21
-
22
-
23
-
24
-
25
- attr_reader :config ## todo/fix: change config to proper dialect class/struct - why? why not?
26
-
27
- ##
28
- ## todo/check:
29
- ## null values - include NA - why? why not?
30
- ## make null values case sensitive or add an option for case sensitive
31
- ## or better allow a proc as option for checking too!!!
32
- def initialize( space: nil )
33
- @config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
34
-
35
- ## e.g. treat/convert char to space e.g. _-+• etc
36
- ## Man_Utd => Man Utd
37
- ## or use it for leading and trailing spaces without quotes
38
- ## todo/check: only use for unquoted values? why? why not?
39
- @config[:space] = space
40
- end
41
-
42
-
43
- #########################################
44
- ## config convenience helpers
45
- def space=( value ) @config[:space]=value; end
46
-
47
-
48
-
49
-
50
-
51
- def parse( str_or_readable, **kwargs, &block )
52
-
53
- ## note: input: required each_line (string or io/file for example)
54
- ## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
55
-
56
- input = str_or_readable ## assume it's a string or io/file handle
57
-
58
- if block_given?
59
- parse_lines( input, &block )
60
- else
61
- records = []
62
-
63
- parse_lines( input ) do |record|
64
- records << record
65
- end
66
-
67
- records
68
- end
69
- end ## method parse
70
-
71
-
72
-
73
- private
74
-
75
- def parse_lines( input, &block )
76
-
77
- space = config[:space]
78
-
79
- ## note: each line only works with \n (windows) or \r\n (unix)
80
- ## will NOT work with \r (old mac, any others?) only!!!!
81
- input.each_line do |line|
82
-
83
- logger.debug "line:" if logger.debug?
84
- logger.debug line.pretty_inspect if logger.debug?
85
-
86
-
87
- ## note: chomp('') if is an empty string,
88
- ## it will remove all trailing newlines from the string.
89
- ## use line.sub(/[\n\r]*$/, '') or similar instead - why? why not?
90
- line = line.chomp( '' )
91
- line = line.strip ## strip leading and trailing whitespaces (space/tab) too
92
- logger.debug line.pretty_inspect if logger.debug?
93
-
94
- if line.empty? ## skip blank lines
95
- logger.debug "skip blank line" if logger.debug?
96
- next
97
- end
98
-
99
- if line.start_with?( "#" ) ## skip comment lines
100
- logger.debug "skip comment line" if logger.debug?
101
- next
102
- end
103
-
104
- # note: string.split defaults to split by space (e.g. /\s+/) :-)
105
- # for just make it "explicit" with /[ \t]+/
106
-
107
- values = line.split( /[ \t]+/ )
108
- logger.debug values.pretty_inspect if logger.debug?
109
-
110
- if space
111
- ## e.g. translate _-+ etc. if configured to space
112
- ## Man_Utd => Man Utd etc.
113
- values = values.map {|value| value.tr(space,' ') }
114
- end
115
-
116
- ## note: requires block - enforce? how? why? why not?
117
- block.call( values )
118
- end
119
- end # method parse_lines
120
-
121
-
122
- end # class ParserTable
123
- end # class CsvReader
1
+
2
+ class CsvReader
3
+
4
+ class ParserTable
5
+
6
+ ###################################
7
+ ## add simple logger with debug flag/switch
8
+ #
9
+ # use Parser.debug = true # to turn on
10
+ #
11
+ # todo/fix: use logutils instead of std logger - why? why not?
12
+
13
+ def self.build_logger()
14
+ l = Logger.new( STDOUT )
15
+ l.level = :info ## set to :info on start; note: is 0 (debug) by default
16
+ l
17
+ end
18
+ def self.logger() @@logger ||= build_logger; end
19
+ def logger() self.class.logger; end
20
+
21
+
22
+
23
+
24
+ attr_reader :config ## todo/fix: change config to proper dialect class/struct - why? why not?
25
+
26
+ ##
27
+ ## todo/check:
28
+ ## null values - include NA - why? why not?
29
+ ## make null values case sensitive or add an option for case sensitive
30
+ ## or better allow a proc as option for checking too!!!
31
+ def initialize( space: nil )
32
+ @config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
33
+
34
+ ## e.g. treat/convert char to space e.g. _-+• etc
35
+ ## Man_Utd => Man Utd
36
+ ## or use it for leading and trailing spaces without quotes
37
+ ## todo/check: only use for unquoted values? why? why not?
38
+ @config[:space] = space
39
+ end
40
+
41
+
42
+ #########################################
43
+ ## config convenience helpers
44
+ def space=( value ) @config[:space]=value; end
45
+
46
+
47
+
48
+
49
+
50
+ def parse( str_or_readable, **kwargs, &block )
51
+
52
+ ## note: input: required each_line (string or io/file for example)
53
+ ## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
54
+
55
+ input = str_or_readable ## assume it's a string or io/file handle
56
+
57
+ if block_given?
58
+ parse_lines( input, &block )
59
+ else
60
+ records = []
61
+
62
+ parse_lines( input ) do |record|
63
+ records << record
64
+ end
65
+
66
+ records
67
+ end
68
+ end ## method parse
69
+
70
+
71
+
72
+ private
73
+
74
+ def parse_lines( input, &block )
75
+
76
+ space = config[:space]
77
+
78
+ ## note: each line only works with \n (windows) or \r\n (unix)
79
+ ## will NOT work with \r (old mac, any others?) only!!!!
80
+ input.each_line do |line|
81
+
82
+ logger.debug "line:" if logger.debug?
83
+ logger.debug line.pretty_inspect if logger.debug?
84
+
85
+
86
+ ## note: chomp('') if is an empty string,
87
+ ## it will remove all trailing newlines from the string.
88
+ ## use line.sub(/[\n\r]*$/, '') or similar instead - why? why not?
89
+ line = line.chomp( '' )
90
+ line = line.strip ## strip leading and trailing whitespaces (space/tab) too
91
+ logger.debug line.pretty_inspect if logger.debug?
92
+
93
+ if line.empty? ## skip blank lines
94
+ logger.debug "skip blank line" if logger.debug?
95
+ next
96
+ end
97
+
98
+ if line.start_with?( "#" ) ## skip comment lines
99
+ logger.debug "skip comment line" if logger.debug?
100
+ next
101
+ end
102
+
103
+ # note: string.split defaults to split by space (e.g. /\s+/) :-)
104
+ # for just make it "explicit" with /[ \t]+/
105
+
106
+ values = line.split( /[ \t]+/ )
107
+ logger.debug values.pretty_inspect if logger.debug?
108
+
109
+ if space
110
+ ## e.g. translate _-+ etc. if configured to space
111
+ ## Man_Utd => Man Utd etc.
112
+ values = values.map {|value| value.tr(space,' ') }
113
+ end
114
+
115
+ ## note: requires block - enforce? how? why? why not?
116
+ block.call( values )
117
+ end
118
+ end # method parse_lines
119
+
120
+
121
+ end # class ParserTable
122
+ end # class CsvReader
@@ -1,24 +1,23 @@
1
- # encoding: utf-8
2
-
3
-
4
- class CsvReader
5
-
6
- class ParserYaml
7
-
8
- def parse( data, **kwargs, &block )
9
- ## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
10
-
11
- ## note: input: required each_line (string or io/file for example)
12
- ## assume data is a string or io/file handle
13
- csv = CsvYaml.new( data )
14
-
15
- if block_given?
16
- csv.each( &block )
17
- else
18
- csv.to_a
19
- end
20
- end ## method parse
21
-
22
-
23
- end # class ParserYaml
24
- end # class CsvReader
1
+
2
+
3
+ class CsvReader
4
+
5
+ class ParserYaml
6
+
7
+ def parse( data, **kwargs, &block )
8
+ ## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
9
+
10
+ ## note: input: required each_line (string or io/file for example)
11
+ ## assume data is a string or io/file handle
12
+ csv = CsvYaml.new( data )
13
+
14
+ if block_given?
15
+ csv.each( &block )
16
+ else
17
+ csv.to_a
18
+ end
19
+ end ## method parse
20
+
21
+
22
+ end # class ParserYaml
23
+ end # class CsvReader
@@ -1,4 +1,3 @@
1
- # encoding: utf-8
2
1
 
3
2
  class CsvReader
4
3
 
@@ -155,11 +154,11 @@ class CsvReader
155
154
 
156
155
  ## check array / pipeline of converters is empty (size=0 e.g. is [])
157
156
  if @converters.empty?
158
- @parser.parse( @io, kwargs, &block )
157
+ @parser.parse( @io, **kwargs, &block )
159
158
  else
160
159
  ## add "post"-processing with converters pipeline
161
160
  ## that is, convert all strings to integer, float, date, ... if wanted
162
- @parser.parse( @io, kwargs ) do |raw_record|
161
+ @parser.parse( @io, **kwargs ) do |raw_record|
163
162
  record = []
164
163
  raw_record.each_with_index do | value, i |
165
164
  record << @converters.convert( value, i )
@@ -1,4 +1,3 @@
1
- # encoding: utf-8
2
1
 
3
2
  class CsvHashReader
4
3
 
@@ -169,7 +168,7 @@ def_delegators :@io,
169
168
  kwargs[:width] = @kwargs[:width] if @parser.is_a?( ParserFixed )
170
169
 
171
170
 
172
- @parser.parse( @io, kwargs ) do |raw_values| # sep: sep
171
+ @parser.parse( @io, **kwargs ) do |raw_values| # sep: sep
173
172
  if @names.nil? ## check for (first) headers row
174
173
  if @header_converters.empty?
175
174
  @names = raw_values ## store header row / a.k.a. field/column names
@@ -1,32 +1,30 @@
1
- # encoding: utf-8
2
-
3
-
4
- class CsvReader ## note: uses a class for now - change to module - why? why not?
5
-
6
- module Version
7
- MAJOR = 1 ## todo: namespace inside version or something - why? why not??
8
- MINOR = 2
9
- PATCH = 4
10
-
11
- ## self.to_s - why? why not?
12
- end
13
-
14
- VERSION = [Version::MAJOR,
15
- Version::MINOR,
16
- Version::PATCH].join('.')
17
-
18
- def self.version ## keep (as an alternative to VERSION) - why? why not?
19
- VERSION
20
- end
21
-
22
-
23
-
24
- def self.banner
25
- "csvreader/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
26
- end
27
-
28
- def self.root
29
- File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
30
- end
31
-
32
- end # class CsvReader
1
+
2
+ class CsvReader ## note: uses a class for now - change to module - why? why not?
3
+
4
+ module Version
5
+ MAJOR = 1 ## todo: namespace inside version or something - why? why not??
6
+ MINOR = 2
7
+ PATCH = 5
8
+
9
+ ## self.to_s - why? why not?
10
+ end
11
+
12
+ VERSION = [Version::MAJOR,
13
+ Version::MINOR,
14
+ Version::PATCH].join('.')
15
+
16
+ def self.version ## keep (as an alternative to VERSION) - why? why not?
17
+ VERSION
18
+ end
19
+
20
+
21
+
22
+ def self.banner
23
+ "csvreader/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})"
24
+ end
25
+
26
+ def self.root
27
+ File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
28
+ end
29
+
30
+ end # class CsvReader
data/lib/csvreader.rb CHANGED
@@ -1,4 +1,3 @@
1
- # encoding: utf-8
2
1
 
3
2
 
4
3
  ## our own code (without "top-level" shortcuts e.g. "modular version")
@@ -1,66 +1,66 @@
1
- # encoding: utf-8
2
-
3
- ###
4
- # to run use
5
- # ruby -I ./lib -I ./test test/test_parser_formats.rb
6
-
7
-
8
- require 'helper'
9
-
10
- class TestParserFormats < MiniTest::Test
11
-
12
-
13
- def parser
14
- CsvReader::Parser
15
- end
16
-
17
-
18
- def test_parse_whitespace
19
- records = [["a", "b", "c"],
20
- ["1", "2", "3"]]
21
-
22
- ## don't care about newlines (\r\n) ??? - fix? why? why not?
23
- assert_equal records, parser.default.parse( "a,b,c\n1,2,3" )
24
- assert_equal records, parser.default.parse( "a,b,c\n1,2,3\n" )
25
- assert_equal records, parser.default.parse( " a, b ,c \n\n1,2,3\n" )
26
- assert_equal records, parser.default.parse( " a, b ,c \n \n1,2,3\n" )
27
-
28
- assert_equal [["a", "b", "c"],
29
- [""],
30
- ["1", "2", "3"]], parser.default.parse( %Q{a,b,c\n""\n1,2,3\n} )
31
- assert_equal [["", ""],
32
- [""],
33
- ["", "", ""]], parser.default.parse( %Q{,\n""\n"","",""\n} )
34
-
35
-
36
- ## strict rfc4180 - no trim leading or trailing spaces or blank lines
37
- assert_equal records, parser.strict.parse( "a,b,c\n1,2,3" )
38
- assert_equal [["a", "b", "c"],
39
- [""],
40
- ["1", "2", "3"]], parser.strict.parse( "a,b,c\n\n1,2,3" )
41
- assert_equal [[" a", " b ", "c "],
42
- [""],
43
- ["1", "2", "3"]], parser.strict.parse( " a, b ,c \n\n1,2,3" )
44
- assert_equal [[" a", " b ", "c "],
45
- [" "],
46
- ["",""],
47
- ["1", "2", "3"]], parser.strict.parse( " a, b ,c \n \n,\n1,2,3" )
48
- end
49
-
50
-
51
- def test_parse_empties
52
- assert_equal [], parser.default.parse( "\n \n \n" )
53
-
54
- ## strict rfc4180 - no trim leading or trailing spaces or blank lines
55
- assert_equal [[""],
56
- [" "],
57
- [" "]], parser.strict.parse( "\n \n \n" )
58
- assert_equal [[""],
59
- [" "],
60
- [" "]], parser.strict.parse( "\n \n " )
61
-
62
- assert_equal [[""]], parser.strict.parse( "\n" )
63
- assert_equal [], parser.strict.parse( "" )
64
- end
65
-
66
- end # class TestParserFormats
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_parser_formats.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestParserFormats < MiniTest::Test
11
+
12
+
13
+ def parser
14
+ CsvReader::Parser
15
+ end
16
+
17
+
18
+ def test_parse_whitespace
19
+ records = [["a", "b", "c"],
20
+ ["1", "2", "3"]]
21
+
22
+ ## don't care about newlines (\r\n) ??? - fix? why? why not?
23
+ assert_equal records, parser.default.parse( "a,b,c\n1,2,3" )
24
+ assert_equal records, parser.default.parse( "a,b,c\n1,2,3\n" )
25
+ assert_equal records, parser.default.parse( " a, b ,c \n\n1,2,3\n" )
26
+ assert_equal records, parser.default.parse( " a, b ,c \n \n1,2,3\n" )
27
+
28
+ assert_equal [["a", "b", "c"],
29
+ [""],
30
+ ["1", "2", "3"]], parser.default.parse( %Q{a,b,c\n""\n1,2,3\n} )
31
+ assert_equal [["", ""],
32
+ [""],
33
+ ["", "", ""]], parser.default.parse( %Q{,\n""\n"","",""\n} )
34
+
35
+
36
+ ## strict rfc4180 - no trim leading or trailing spaces or blank lines
37
+ assert_equal records, parser.strict.parse( "a,b,c\n1,2,3" )
38
+ assert_equal [["a", "b", "c"],
39
+ [""],
40
+ ["1", "2", "3"]], parser.strict.parse( "a,b,c\n\n1,2,3" )
41
+ assert_equal [[" a", " b ", "c "],
42
+ [""],
43
+ ["1", "2", "3"]], parser.strict.parse( " a, b ,c \n\n1,2,3" )
44
+ assert_equal [[" a", " b ", "c "],
45
+ [" "],
46
+ ["",""],
47
+ ["1", "2", "3"]], parser.strict.parse( " a, b ,c \n \n,\n1,2,3" )
48
+ end
49
+
50
+
51
+ def test_parse_empties
52
+ assert_equal [], parser.default.parse( "\n \n \n" )
53
+
54
+ ## strict rfc4180 - no trim leading or trailing spaces or blank lines
55
+ assert_equal [[""],
56
+ [" "],
57
+ [" "]], parser.strict.parse( "\n \n \n" )
58
+ assert_equal [[""],
59
+ [" "],
60
+ [" "]], parser.strict.parse( "\n \n " )
61
+
62
+ assert_equal [[""]], parser.strict.parse( "\n" )
63
+ assert_equal [], parser.strict.parse( "" )
64
+ end
65
+
66
+ end # class TestParserFormats