csvreader 1.2.4 → 1.2.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/{HISTORY.md → CHANGELOG.md} +3 -3
- data/Manifest.txt +1 -2
- data/README.md +682 -682
- data/Rakefile +33 -32
- data/datasets/cars11.csv +10 -10
- data/datasets/cities11.csv +12 -12
- data/datasets/customers11.csv +13 -13
- data/datasets/iris.attrib.csv +25 -25
- data/datasets/iris11.csv +163 -163
- data/datasets/lcc.attrib.csv +14 -14
- data/datasets/shakespeare.csv +9 -9
- data/lib/csvreader/base.rb +6 -2
- data/lib/csvreader/buffer.rb +0 -1
- data/lib/csvreader/builder.rb +0 -1
- data/lib/csvreader/converter.rb +0 -1
- data/lib/csvreader/parser.rb +32 -33
- data/lib/csvreader/parser_fixed.rb +105 -106
- data/lib/csvreader/parser_json.rb +23 -24
- data/lib/csvreader/parser_std.rb +582 -583
- data/lib/csvreader/parser_strict.rb +290 -291
- data/lib/csvreader/parser_tab.rb +22 -23
- data/lib/csvreader/parser_table.rb +122 -123
- data/lib/csvreader/parser_yaml.rb +23 -24
- data/lib/csvreader/reader.rb +2 -3
- data/lib/csvreader/reader_hash.rb +1 -2
- data/lib/csvreader/version.rb +30 -32
- data/lib/csvreader.rb +0 -1
- data/test/test_parser_formats.rb +66 -66
- data/test/test_parser_java.rb +208 -208
- metadata +18 -15
- data/LICENSE.md +0 -116
@@ -1,123 +1,122 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
l =
|
16
|
-
l
|
17
|
-
|
18
|
-
end
|
19
|
-
def
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
##
|
28
|
-
##
|
29
|
-
##
|
30
|
-
##
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
##
|
36
|
-
##
|
37
|
-
##
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
## note:
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
##
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
logger.debug
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
##
|
88
|
-
##
|
89
|
-
|
90
|
-
line = line.
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
#
|
105
|
-
|
106
|
-
|
107
|
-
values
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
##
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
end # class
|
123
|
-
end # class CsvReader
|
1
|
+
|
2
|
+
class CsvReader
|
3
|
+
|
4
|
+
class ParserTable
|
5
|
+
|
6
|
+
###################################
|
7
|
+
## add simple logger with debug flag/switch
|
8
|
+
#
|
9
|
+
# use Parser.debug = true # to turn on
|
10
|
+
#
|
11
|
+
# todo/fix: use logutils instead of std logger - why? why not?
|
12
|
+
|
13
|
+
def self.build_logger()
|
14
|
+
l = Logger.new( STDOUT )
|
15
|
+
l.level = :info ## set to :info on start; note: is 0 (debug) by default
|
16
|
+
l
|
17
|
+
end
|
18
|
+
def self.logger() @@logger ||= build_logger; end
|
19
|
+
def logger() self.class.logger; end
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
attr_reader :config ## todo/fix: change config to proper dialect class/struct - why? why not?
|
25
|
+
|
26
|
+
##
|
27
|
+
## todo/check:
|
28
|
+
## null values - include NA - why? why not?
|
29
|
+
## make null values case sensitive or add an option for case sensitive
|
30
|
+
## or better allow a proc as option for checking too!!!
|
31
|
+
def initialize( space: nil )
|
32
|
+
@config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
|
33
|
+
|
34
|
+
## e.g. treat/convert char to space e.g. _-+• etc
|
35
|
+
## Man_Utd => Man Utd
|
36
|
+
## or use it for leading and trailing spaces without quotes
|
37
|
+
## todo/check: only use for unquoted values? why? why not?
|
38
|
+
@config[:space] = space
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
#########################################
|
43
|
+
## config convenience helpers
|
44
|
+
def space=( value ) @config[:space]=value; end
|
45
|
+
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
|
50
|
+
def parse( str_or_readable, **kwargs, &block )
|
51
|
+
|
52
|
+
## note: input: required each_line (string or io/file for example)
|
53
|
+
## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
|
54
|
+
|
55
|
+
input = str_or_readable ## assume it's a string or io/file handle
|
56
|
+
|
57
|
+
if block_given?
|
58
|
+
parse_lines( input, &block )
|
59
|
+
else
|
60
|
+
records = []
|
61
|
+
|
62
|
+
parse_lines( input ) do |record|
|
63
|
+
records << record
|
64
|
+
end
|
65
|
+
|
66
|
+
records
|
67
|
+
end
|
68
|
+
end ## method parse
|
69
|
+
|
70
|
+
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
def parse_lines( input, &block )
|
75
|
+
|
76
|
+
space = config[:space]
|
77
|
+
|
78
|
+
## note: each line only works with \n (windows) or \r\n (unix)
|
79
|
+
## will NOT work with \r (old mac, any others?) only!!!!
|
80
|
+
input.each_line do |line|
|
81
|
+
|
82
|
+
logger.debug "line:" if logger.debug?
|
83
|
+
logger.debug line.pretty_inspect if logger.debug?
|
84
|
+
|
85
|
+
|
86
|
+
## note: chomp('') if is an empty string,
|
87
|
+
## it will remove all trailing newlines from the string.
|
88
|
+
## use line.sub(/[\n\r]*$/, '') or similar instead - why? why not?
|
89
|
+
line = line.chomp( '' )
|
90
|
+
line = line.strip ## strip leading and trailing whitespaces (space/tab) too
|
91
|
+
logger.debug line.pretty_inspect if logger.debug?
|
92
|
+
|
93
|
+
if line.empty? ## skip blank lines
|
94
|
+
logger.debug "skip blank line" if logger.debug?
|
95
|
+
next
|
96
|
+
end
|
97
|
+
|
98
|
+
if line.start_with?( "#" ) ## skip comment lines
|
99
|
+
logger.debug "skip comment line" if logger.debug?
|
100
|
+
next
|
101
|
+
end
|
102
|
+
|
103
|
+
# note: string.split defaults to split by space (e.g. /\s+/) :-)
|
104
|
+
# for just make it "explicit" with /[ \t]+/
|
105
|
+
|
106
|
+
values = line.split( /[ \t]+/ )
|
107
|
+
logger.debug values.pretty_inspect if logger.debug?
|
108
|
+
|
109
|
+
if space
|
110
|
+
## e.g. translate _-+ etc. if configured to space
|
111
|
+
## Man_Utd => Man Utd etc.
|
112
|
+
values = values.map {|value| value.tr(space,' ') }
|
113
|
+
end
|
114
|
+
|
115
|
+
## note: requires block - enforce? how? why? why not?
|
116
|
+
block.call( values )
|
117
|
+
end
|
118
|
+
end # method parse_lines
|
119
|
+
|
120
|
+
|
121
|
+
end # class ParserTable
|
122
|
+
end # class CsvReader
|
@@ -1,24 +1,23 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
##
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
end # class
|
24
|
-
end # class CsvReader
|
1
|
+
|
2
|
+
|
3
|
+
class CsvReader
|
4
|
+
|
5
|
+
class ParserYaml
|
6
|
+
|
7
|
+
def parse( data, **kwargs, &block )
|
8
|
+
## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
|
9
|
+
|
10
|
+
## note: input: required each_line (string or io/file for example)
|
11
|
+
## assume data is a string or io/file handle
|
12
|
+
csv = CsvYaml.new( data )
|
13
|
+
|
14
|
+
if block_given?
|
15
|
+
csv.each( &block )
|
16
|
+
else
|
17
|
+
csv.to_a
|
18
|
+
end
|
19
|
+
end ## method parse
|
20
|
+
|
21
|
+
|
22
|
+
end # class ParserYaml
|
23
|
+
end # class CsvReader
|
data/lib/csvreader/reader.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
# encoding: utf-8
|
2
1
|
|
3
2
|
class CsvReader
|
4
3
|
|
@@ -155,11 +154,11 @@ class CsvReader
|
|
155
154
|
|
156
155
|
## check array / pipeline of converters is empty (size=0 e.g. is [])
|
157
156
|
if @converters.empty?
|
158
|
-
@parser.parse( @io, kwargs, &block )
|
157
|
+
@parser.parse( @io, **kwargs, &block )
|
159
158
|
else
|
160
159
|
## add "post"-processing with converters pipeline
|
161
160
|
## that is, convert all strings to integer, float, date, ... if wanted
|
162
|
-
@parser.parse( @io, kwargs ) do |raw_record|
|
161
|
+
@parser.parse( @io, **kwargs ) do |raw_record|
|
163
162
|
record = []
|
164
163
|
raw_record.each_with_index do | value, i |
|
165
164
|
record << @converters.convert( value, i )
|
@@ -1,4 +1,3 @@
|
|
1
|
-
# encoding: utf-8
|
2
1
|
|
3
2
|
class CsvHashReader
|
4
3
|
|
@@ -169,7 +168,7 @@ def_delegators :@io,
|
|
169
168
|
kwargs[:width] = @kwargs[:width] if @parser.is_a?( ParserFixed )
|
170
169
|
|
171
170
|
|
172
|
-
@parser.parse( @io, kwargs ) do |raw_values| # sep: sep
|
171
|
+
@parser.parse( @io, **kwargs ) do |raw_values| # sep: sep
|
173
172
|
if @names.nil? ## check for (first) headers row
|
174
173
|
if @header_converters.empty?
|
175
174
|
@names = raw_values ## store header row / a.k.a. field/column names
|
data/lib/csvreader/version.rb
CHANGED
@@ -1,32 +1,30 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
end # class CsvReader
|
1
|
+
|
2
|
+
class CsvReader ## note: uses a class for now - change to module - why? why not?
|
3
|
+
|
4
|
+
module Version
|
5
|
+
MAJOR = 1 ## todo: namespace inside version or something - why? why not??
|
6
|
+
MINOR = 2
|
7
|
+
PATCH = 5
|
8
|
+
|
9
|
+
## self.to_s - why? why not?
|
10
|
+
end
|
11
|
+
|
12
|
+
VERSION = [Version::MAJOR,
|
13
|
+
Version::MINOR,
|
14
|
+
Version::PATCH].join('.')
|
15
|
+
|
16
|
+
def self.version ## keep (as an alternative to VERSION) - why? why not?
|
17
|
+
VERSION
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
def self.banner
|
23
|
+
"csvreader/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})"
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.root
|
27
|
+
File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
|
28
|
+
end
|
29
|
+
|
30
|
+
end # class CsvReader
|
data/lib/csvreader.rb
CHANGED
data/test/test_parser_formats.rb
CHANGED
@@ -1,66 +1,66 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
###
|
4
|
-
# to run use
|
5
|
-
# ruby -I ./lib -I ./test test/test_parser_formats.rb
|
6
|
-
|
7
|
-
|
8
|
-
require 'helper'
|
9
|
-
|
10
|
-
class TestParserFormats < MiniTest::Test
|
11
|
-
|
12
|
-
|
13
|
-
def parser
|
14
|
-
CsvReader::Parser
|
15
|
-
end
|
16
|
-
|
17
|
-
|
18
|
-
def test_parse_whitespace
|
19
|
-
records = [["a", "b", "c"],
|
20
|
-
["1", "2", "3"]]
|
21
|
-
|
22
|
-
## don't care about newlines (\r\n) ??? - fix? why? why not?
|
23
|
-
assert_equal records, parser.default.parse( "a,b,c\n1,2,3" )
|
24
|
-
assert_equal records, parser.default.parse( "a,b,c\n1,2,3\n" )
|
25
|
-
assert_equal records, parser.default.parse( " a, b ,c \n\n1,2,3\n" )
|
26
|
-
assert_equal records, parser.default.parse( " a, b ,c \n \n1,2,3\n" )
|
27
|
-
|
28
|
-
assert_equal [["a", "b", "c"],
|
29
|
-
[""],
|
30
|
-
["1", "2", "3"]], parser.default.parse( %Q{a,b,c\n""\n1,2,3\n} )
|
31
|
-
assert_equal [["", ""],
|
32
|
-
[""],
|
33
|
-
["", "", ""]], parser.default.parse( %Q{,\n""\n"","",""\n} )
|
34
|
-
|
35
|
-
|
36
|
-
## strict rfc4180 - no trim leading or trailing spaces or blank lines
|
37
|
-
assert_equal records, parser.strict.parse( "a,b,c\n1,2,3" )
|
38
|
-
assert_equal [["a", "b", "c"],
|
39
|
-
[""],
|
40
|
-
["1", "2", "3"]], parser.strict.parse( "a,b,c\n\n1,2,3" )
|
41
|
-
assert_equal [[" a", " b ", "c "],
|
42
|
-
[""],
|
43
|
-
["1", "2", "3"]], parser.strict.parse( " a, b ,c \n\n1,2,3" )
|
44
|
-
assert_equal [[" a", " b ", "c "],
|
45
|
-
[" "],
|
46
|
-
["",""],
|
47
|
-
["1", "2", "3"]], parser.strict.parse( " a, b ,c \n \n,\n1,2,3" )
|
48
|
-
end
|
49
|
-
|
50
|
-
|
51
|
-
def test_parse_empties
|
52
|
-
assert_equal [], parser.default.parse( "\n \n \n" )
|
53
|
-
|
54
|
-
## strict rfc4180 - no trim leading or trailing spaces or blank lines
|
55
|
-
assert_equal [[""],
|
56
|
-
[" "],
|
57
|
-
[" "]], parser.strict.parse( "\n \n \n" )
|
58
|
-
assert_equal [[""],
|
59
|
-
[" "],
|
60
|
-
[" "]], parser.strict.parse( "\n \n " )
|
61
|
-
|
62
|
-
assert_equal [[""]], parser.strict.parse( "\n" )
|
63
|
-
assert_equal [], parser.strict.parse( "" )
|
64
|
-
end
|
65
|
-
|
66
|
-
end # class TestParserFormats
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_parser_formats.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestParserFormats < MiniTest::Test
|
11
|
+
|
12
|
+
|
13
|
+
def parser
|
14
|
+
CsvReader::Parser
|
15
|
+
end
|
16
|
+
|
17
|
+
|
18
|
+
def test_parse_whitespace
|
19
|
+
records = [["a", "b", "c"],
|
20
|
+
["1", "2", "3"]]
|
21
|
+
|
22
|
+
## don't care about newlines (\r\n) ??? - fix? why? why not?
|
23
|
+
assert_equal records, parser.default.parse( "a,b,c\n1,2,3" )
|
24
|
+
assert_equal records, parser.default.parse( "a,b,c\n1,2,3\n" )
|
25
|
+
assert_equal records, parser.default.parse( " a, b ,c \n\n1,2,3\n" )
|
26
|
+
assert_equal records, parser.default.parse( " a, b ,c \n \n1,2,3\n" )
|
27
|
+
|
28
|
+
assert_equal [["a", "b", "c"],
|
29
|
+
[""],
|
30
|
+
["1", "2", "3"]], parser.default.parse( %Q{a,b,c\n""\n1,2,3\n} )
|
31
|
+
assert_equal [["", ""],
|
32
|
+
[""],
|
33
|
+
["", "", ""]], parser.default.parse( %Q{,\n""\n"","",""\n} )
|
34
|
+
|
35
|
+
|
36
|
+
## strict rfc4180 - no trim leading or trailing spaces or blank lines
|
37
|
+
assert_equal records, parser.strict.parse( "a,b,c\n1,2,3" )
|
38
|
+
assert_equal [["a", "b", "c"],
|
39
|
+
[""],
|
40
|
+
["1", "2", "3"]], parser.strict.parse( "a,b,c\n\n1,2,3" )
|
41
|
+
assert_equal [[" a", " b ", "c "],
|
42
|
+
[""],
|
43
|
+
["1", "2", "3"]], parser.strict.parse( " a, b ,c \n\n1,2,3" )
|
44
|
+
assert_equal [[" a", " b ", "c "],
|
45
|
+
[" "],
|
46
|
+
["",""],
|
47
|
+
["1", "2", "3"]], parser.strict.parse( " a, b ,c \n \n,\n1,2,3" )
|
48
|
+
end
|
49
|
+
|
50
|
+
|
51
|
+
def test_parse_empties
|
52
|
+
assert_equal [], parser.default.parse( "\n \n \n" )
|
53
|
+
|
54
|
+
## strict rfc4180 - no trim leading or trailing spaces or blank lines
|
55
|
+
assert_equal [[""],
|
56
|
+
[" "],
|
57
|
+
[" "]], parser.strict.parse( "\n \n \n" )
|
58
|
+
assert_equal [[""],
|
59
|
+
[" "],
|
60
|
+
[" "]], parser.strict.parse( "\n \n " )
|
61
|
+
|
62
|
+
assert_equal [[""]], parser.strict.parse( "\n" )
|
63
|
+
assert_equal [], parser.strict.parse( "" )
|
64
|
+
end
|
65
|
+
|
66
|
+
end # class TestParserFormats
|