csvreader 1.2.4 → 1.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,123 +1,122 @@
1
- # encoding: utf-8
2
-
3
- class CsvReader
4
-
5
- class ParserTable
6
-
7
- ###################################
8
- ## add simple logger with debug flag/switch
9
- #
10
- # use Parser.debug = true # to turn on
11
- #
12
- # todo/fix: use logutils instead of std logger - why? why not?
13
-
14
- def self.build_logger()
15
- l = Logger.new( STDOUT )
16
- l.level = :info ## set to :info on start; note: is 0 (debug) by default
17
- l
18
- end
19
- def self.logger() @@logger ||= build_logger; end
20
- def logger() self.class.logger; end
21
-
22
-
23
-
24
-
25
- attr_reader :config ## todo/fix: change config to proper dialect class/struct - why? why not?
26
-
27
- ##
28
- ## todo/check:
29
- ## null values - include NA - why? why not?
30
- ## make null values case sensitive or add an option for case sensitive
31
- ## or better allow a proc as option for checking too!!!
32
- def initialize( space: nil )
33
- @config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
34
-
35
- ## e.g. treat/convert char to space e.g. _-+• etc
36
- ## Man_Utd => Man Utd
37
- ## or use it for leading and trailing spaces without quotes
38
- ## todo/check: only use for unquoted values? why? why not?
39
- @config[:space] = space
40
- end
41
-
42
-
43
- #########################################
44
- ## config convenience helpers
45
- def space=( value ) @config[:space]=value; end
46
-
47
-
48
-
49
-
50
-
51
- def parse( str_or_readable, **kwargs, &block )
52
-
53
- ## note: input: required each_line (string or io/file for example)
54
- ## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
55
-
56
- input = str_or_readable ## assume it's a string or io/file handle
57
-
58
- if block_given?
59
- parse_lines( input, &block )
60
- else
61
- records = []
62
-
63
- parse_lines( input ) do |record|
64
- records << record
65
- end
66
-
67
- records
68
- end
69
- end ## method parse
70
-
71
-
72
-
73
- private
74
-
75
- def parse_lines( input, &block )
76
-
77
- space = config[:space]
78
-
79
- ## note: each line only works with \n (windows) or \r\n (unix)
80
- ## will NOT work with \r (old mac, any others?) only!!!!
81
- input.each_line do |line|
82
-
83
- logger.debug "line:" if logger.debug?
84
- logger.debug line.pretty_inspect if logger.debug?
85
-
86
-
87
- ## note: chomp('') if is an empty string,
88
- ## it will remove all trailing newlines from the string.
89
- ## use line.sub(/[\n\r]*$/, '') or similar instead - why? why not?
90
- line = line.chomp( '' )
91
- line = line.strip ## strip leading and trailing whitespaces (space/tab) too
92
- logger.debug line.pretty_inspect if logger.debug?
93
-
94
- if line.empty? ## skip blank lines
95
- logger.debug "skip blank line" if logger.debug?
96
- next
97
- end
98
-
99
- if line.start_with?( "#" ) ## skip comment lines
100
- logger.debug "skip comment line" if logger.debug?
101
- next
102
- end
103
-
104
- # note: string.split defaults to split by space (e.g. /\s+/) :-)
105
- # for just make it "explicit" with /[ \t]+/
106
-
107
- values = line.split( /[ \t]+/ )
108
- logger.debug values.pretty_inspect if logger.debug?
109
-
110
- if space
111
- ## e.g. translate _-+ etc. if configured to space
112
- ## Man_Utd => Man Utd etc.
113
- values = values.map {|value| value.tr(space,' ') }
114
- end
115
-
116
- ## note: requires block - enforce? how? why? why not?
117
- block.call( values )
118
- end
119
- end # method parse_lines
120
-
121
-
122
- end # class ParserTable
123
- end # class CsvReader
1
+
2
+ class CsvReader
3
+
4
+ class ParserTable
5
+
6
+ ###################################
7
+ ## add simple logger with debug flag/switch
8
+ #
9
+ # use Parser.debug = true # to turn on
10
+ #
11
+ # todo/fix: use logutils instead of std logger - why? why not?
12
+
13
+ def self.build_logger()
14
+ l = Logger.new( STDOUT )
15
+ l.level = :info ## set to :info on start; note: is 0 (debug) by default
16
+ l
17
+ end
18
+ def self.logger() @@logger ||= build_logger; end
19
+ def logger() self.class.logger; end
20
+
21
+
22
+
23
+
24
+ attr_reader :config ## todo/fix: change config to proper dialect class/struct - why? why not?
25
+
26
+ ##
27
+ ## todo/check:
28
+ ## null values - include NA - why? why not?
29
+ ## make null values case sensitive or add an option for case sensitive
30
+ ## or better allow a proc as option for checking too!!!
31
+ def initialize( space: nil )
32
+ @config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
33
+
34
+ ## e.g. treat/convert char to space e.g. _-+• etc
35
+ ## Man_Utd => Man Utd
36
+ ## or use it for leading and trailing spaces without quotes
37
+ ## todo/check: only use for unquoted values? why? why not?
38
+ @config[:space] = space
39
+ end
40
+
41
+
42
+ #########################################
43
+ ## config convenience helpers
44
+ def space=( value ) @config[:space]=value; end
45
+
46
+
47
+
48
+
49
+
50
+ def parse( str_or_readable, **kwargs, &block )
51
+
52
+ ## note: input: required each_line (string or io/file for example)
53
+ ## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
54
+
55
+ input = str_or_readable ## assume it's a string or io/file handle
56
+
57
+ if block_given?
58
+ parse_lines( input, &block )
59
+ else
60
+ records = []
61
+
62
+ parse_lines( input ) do |record|
63
+ records << record
64
+ end
65
+
66
+ records
67
+ end
68
+ end ## method parse
69
+
70
+
71
+
72
+ private
73
+
74
+ def parse_lines( input, &block )
75
+
76
+ space = config[:space]
77
+
78
+ ## note: each line only works with \n (windows) or \r\n (unix)
79
+ ## will NOT work with \r (old mac, any others?) only!!!!
80
+ input.each_line do |line|
81
+
82
+ logger.debug "line:" if logger.debug?
83
+ logger.debug line.pretty_inspect if logger.debug?
84
+
85
+
86
+ ## note: chomp('') if is an empty string,
87
+ ## it will remove all trailing newlines from the string.
88
+ ## use line.sub(/[\n\r]*$/, '') or similar instead - why? why not?
89
+ line = line.chomp( '' )
90
+ line = line.strip ## strip leading and trailing whitespaces (space/tab) too
91
+ logger.debug line.pretty_inspect if logger.debug?
92
+
93
+ if line.empty? ## skip blank lines
94
+ logger.debug "skip blank line" if logger.debug?
95
+ next
96
+ end
97
+
98
+ if line.start_with?( "#" ) ## skip comment lines
99
+ logger.debug "skip comment line" if logger.debug?
100
+ next
101
+ end
102
+
103
+ # note: string.split defaults to split by space (e.g. /\s+/) :-)
104
+ # for just make it "explicit" with /[ \t]+/
105
+
106
+ values = line.split( /[ \t]+/ )
107
+ logger.debug values.pretty_inspect if logger.debug?
108
+
109
+ if space
110
+ ## e.g. translate _-+ etc. if configured to space
111
+ ## Man_Utd => Man Utd etc.
112
+ values = values.map {|value| value.tr(space,' ') }
113
+ end
114
+
115
+ ## note: requires block - enforce? how? why? why not?
116
+ block.call( values )
117
+ end
118
+ end # method parse_lines
119
+
120
+
121
+ end # class ParserTable
122
+ end # class CsvReader
@@ -1,24 +1,23 @@
1
- # encoding: utf-8
2
-
3
-
4
- class CsvReader
5
-
6
- class ParserYaml
7
-
8
- def parse( data, **kwargs, &block )
9
- ## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
10
-
11
- ## note: input: required each_line (string or io/file for example)
12
- ## assume data is a string or io/file handle
13
- csv = CsvYaml.new( data )
14
-
15
- if block_given?
16
- csv.each( &block )
17
- else
18
- csv.to_a
19
- end
20
- end ## method parse
21
-
22
-
23
- end # class ParserYaml
24
- end # class CsvReader
1
+
2
+
3
+ class CsvReader
4
+
5
+ class ParserYaml
6
+
7
+ def parse( data, **kwargs, &block )
8
+ ## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
9
+
10
+ ## note: input: required each_line (string or io/file for example)
11
+ ## assume data is a string or io/file handle
12
+ csv = CsvYaml.new( data )
13
+
14
+ if block_given?
15
+ csv.each( &block )
16
+ else
17
+ csv.to_a
18
+ end
19
+ end ## method parse
20
+
21
+
22
+ end # class ParserYaml
23
+ end # class CsvReader
@@ -1,4 +1,3 @@
1
- # encoding: utf-8
2
1
 
3
2
  class CsvReader
4
3
 
@@ -155,11 +154,11 @@ class CsvReader
155
154
 
156
155
  ## check array / pipeline of converters is empty (size=0 e.g. is [])
157
156
  if @converters.empty?
158
- @parser.parse( @io, kwargs, &block )
157
+ @parser.parse( @io, **kwargs, &block )
159
158
  else
160
159
  ## add "post"-processing with converters pipeline
161
160
  ## that is, convert all strings to integer, float, date, ... if wanted
162
- @parser.parse( @io, kwargs ) do |raw_record|
161
+ @parser.parse( @io, **kwargs ) do |raw_record|
163
162
  record = []
164
163
  raw_record.each_with_index do | value, i |
165
164
  record << @converters.convert( value, i )
@@ -1,4 +1,3 @@
1
- # encoding: utf-8
2
1
 
3
2
  class CsvHashReader
4
3
 
@@ -169,7 +168,7 @@ def_delegators :@io,
169
168
  kwargs[:width] = @kwargs[:width] if @parser.is_a?( ParserFixed )
170
169
 
171
170
 
172
- @parser.parse( @io, kwargs ) do |raw_values| # sep: sep
171
+ @parser.parse( @io, **kwargs ) do |raw_values| # sep: sep
173
172
  if @names.nil? ## check for (first) headers row
174
173
  if @header_converters.empty?
175
174
  @names = raw_values ## store header row / a.k.a. field/column names
@@ -1,32 +1,30 @@
1
- # encoding: utf-8
2
-
3
-
4
- class CsvReader ## note: uses a class for now - change to module - why? why not?
5
-
6
- module Version
7
- MAJOR = 1 ## todo: namespace inside version or something - why? why not??
8
- MINOR = 2
9
- PATCH = 4
10
-
11
- ## self.to_s - why? why not?
12
- end
13
-
14
- VERSION = [Version::MAJOR,
15
- Version::MINOR,
16
- Version::PATCH].join('.')
17
-
18
- def self.version ## keep (as an alternative to VERSION) - why? why not?
19
- VERSION
20
- end
21
-
22
-
23
-
24
- def self.banner
25
- "csvreader/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
26
- end
27
-
28
- def self.root
29
- File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
30
- end
31
-
32
- end # class CsvReader
1
+
2
+ class CsvReader ## note: uses a class for now - change to module - why? why not?
3
+
4
+ module Version
5
+ MAJOR = 1 ## todo: namespace inside version or something - why? why not??
6
+ MINOR = 2
7
+ PATCH = 5
8
+
9
+ ## self.to_s - why? why not?
10
+ end
11
+
12
+ VERSION = [Version::MAJOR,
13
+ Version::MINOR,
14
+ Version::PATCH].join('.')
15
+
16
+ def self.version ## keep (as an alternative to VERSION) - why? why not?
17
+ VERSION
18
+ end
19
+
20
+
21
+
22
+ def self.banner
23
+ "csvreader/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})"
24
+ end
25
+
26
+ def self.root
27
+ File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
28
+ end
29
+
30
+ end # class CsvReader
data/lib/csvreader.rb CHANGED
@@ -1,4 +1,3 @@
1
- # encoding: utf-8
2
1
 
3
2
 
4
3
  ## our own code (without "top-level" shortcuts e.g. "modular version")
@@ -1,66 +1,66 @@
1
- # encoding: utf-8
2
-
3
- ###
4
- # to run use
5
- # ruby -I ./lib -I ./test test/test_parser_formats.rb
6
-
7
-
8
- require 'helper'
9
-
10
- class TestParserFormats < MiniTest::Test
11
-
12
-
13
- def parser
14
- CsvReader::Parser
15
- end
16
-
17
-
18
- def test_parse_whitespace
19
- records = [["a", "b", "c"],
20
- ["1", "2", "3"]]
21
-
22
- ## don't care about newlines (\r\n) ??? - fix? why? why not?
23
- assert_equal records, parser.default.parse( "a,b,c\n1,2,3" )
24
- assert_equal records, parser.default.parse( "a,b,c\n1,2,3\n" )
25
- assert_equal records, parser.default.parse( " a, b ,c \n\n1,2,3\n" )
26
- assert_equal records, parser.default.parse( " a, b ,c \n \n1,2,3\n" )
27
-
28
- assert_equal [["a", "b", "c"],
29
- [""],
30
- ["1", "2", "3"]], parser.default.parse( %Q{a,b,c\n""\n1,2,3\n} )
31
- assert_equal [["", ""],
32
- [""],
33
- ["", "", ""]], parser.default.parse( %Q{,\n""\n"","",""\n} )
34
-
35
-
36
- ## strict rfc4180 - no trim leading or trailing spaces or blank lines
37
- assert_equal records, parser.strict.parse( "a,b,c\n1,2,3" )
38
- assert_equal [["a", "b", "c"],
39
- [""],
40
- ["1", "2", "3"]], parser.strict.parse( "a,b,c\n\n1,2,3" )
41
- assert_equal [[" a", " b ", "c "],
42
- [""],
43
- ["1", "2", "3"]], parser.strict.parse( " a, b ,c \n\n1,2,3" )
44
- assert_equal [[" a", " b ", "c "],
45
- [" "],
46
- ["",""],
47
- ["1", "2", "3"]], parser.strict.parse( " a, b ,c \n \n,\n1,2,3" )
48
- end
49
-
50
-
51
- def test_parse_empties
52
- assert_equal [], parser.default.parse( "\n \n \n" )
53
-
54
- ## strict rfc4180 - no trim leading or trailing spaces or blank lines
55
- assert_equal [[""],
56
- [" "],
57
- [" "]], parser.strict.parse( "\n \n \n" )
58
- assert_equal [[""],
59
- [" "],
60
- [" "]], parser.strict.parse( "\n \n " )
61
-
62
- assert_equal [[""]], parser.strict.parse( "\n" )
63
- assert_equal [], parser.strict.parse( "" )
64
- end
65
-
66
- end # class TestParserFormats
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_parser_formats.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestParserFormats < MiniTest::Test
11
+
12
+
13
+ def parser
14
+ CsvReader::Parser
15
+ end
16
+
17
+
18
+ def test_parse_whitespace
19
+ records = [["a", "b", "c"],
20
+ ["1", "2", "3"]]
21
+
22
+ ## don't care about newlines (\r\n) ??? - fix? why? why not?
23
+ assert_equal records, parser.default.parse( "a,b,c\n1,2,3" )
24
+ assert_equal records, parser.default.parse( "a,b,c\n1,2,3\n" )
25
+ assert_equal records, parser.default.parse( " a, b ,c \n\n1,2,3\n" )
26
+ assert_equal records, parser.default.parse( " a, b ,c \n \n1,2,3\n" )
27
+
28
+ assert_equal [["a", "b", "c"],
29
+ [""],
30
+ ["1", "2", "3"]], parser.default.parse( %Q{a,b,c\n""\n1,2,3\n} )
31
+ assert_equal [["", ""],
32
+ [""],
33
+ ["", "", ""]], parser.default.parse( %Q{,\n""\n"","",""\n} )
34
+
35
+
36
+ ## strict rfc4180 - no trim leading or trailing spaces or blank lines
37
+ assert_equal records, parser.strict.parse( "a,b,c\n1,2,3" )
38
+ assert_equal [["a", "b", "c"],
39
+ [""],
40
+ ["1", "2", "3"]], parser.strict.parse( "a,b,c\n\n1,2,3" )
41
+ assert_equal [[" a", " b ", "c "],
42
+ [""],
43
+ ["1", "2", "3"]], parser.strict.parse( " a, b ,c \n\n1,2,3" )
44
+ assert_equal [[" a", " b ", "c "],
45
+ [" "],
46
+ ["",""],
47
+ ["1", "2", "3"]], parser.strict.parse( " a, b ,c \n \n,\n1,2,3" )
48
+ end
49
+
50
+
51
+ def test_parse_empties
52
+ assert_equal [], parser.default.parse( "\n \n \n" )
53
+
54
+ ## strict rfc4180 - no trim leading or trailing spaces or blank lines
55
+ assert_equal [[""],
56
+ [" "],
57
+ [" "]], parser.strict.parse( "\n \n \n" )
58
+ assert_equal [[""],
59
+ [" "],
60
+ [" "]], parser.strict.parse( "\n \n " )
61
+
62
+ assert_equal [[""]], parser.strict.parse( "\n" )
63
+ assert_equal [], parser.strict.parse( "" )
64
+ end
65
+
66
+ end # class TestParserFormats