csvreader 1.2.4 → 1.2.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/{HISTORY.md → CHANGELOG.md} +3 -3
- data/Manifest.txt +1 -2
- data/README.md +682 -682
- data/Rakefile +33 -32
- data/datasets/cars11.csv +10 -10
- data/datasets/cities11.csv +12 -12
- data/datasets/customers11.csv +13 -13
- data/datasets/iris.attrib.csv +25 -25
- data/datasets/iris11.csv +163 -163
- data/datasets/lcc.attrib.csv +14 -14
- data/datasets/shakespeare.csv +9 -9
- data/lib/csvreader/base.rb +6 -2
- data/lib/csvreader/buffer.rb +0 -1
- data/lib/csvreader/builder.rb +0 -1
- data/lib/csvreader/converter.rb +0 -1
- data/lib/csvreader/parser.rb +32 -33
- data/lib/csvreader/parser_fixed.rb +105 -106
- data/lib/csvreader/parser_json.rb +23 -24
- data/lib/csvreader/parser_std.rb +582 -583
- data/lib/csvreader/parser_strict.rb +290 -291
- data/lib/csvreader/parser_tab.rb +22 -23
- data/lib/csvreader/parser_table.rb +122 -123
- data/lib/csvreader/parser_yaml.rb +23 -24
- data/lib/csvreader/reader.rb +2 -3
- data/lib/csvreader/reader_hash.rb +1 -2
- data/lib/csvreader/version.rb +30 -32
- data/lib/csvreader.rb +0 -1
- data/test/test_parser_formats.rb +66 -66
- data/test/test_parser_java.rb +208 -208
- metadata +18 -15
- data/LICENSE.md +0 -116
data/lib/csvreader/parser.rb
CHANGED
@@ -1,33 +1,32 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
##
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
#
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
#
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
end # class
|
33
|
-
end # class CsvReader
|
1
|
+
|
2
|
+
class CsvReader
|
3
|
+
|
4
|
+
class Parser
|
5
|
+
## "forward" reference,
|
6
|
+
## see base.rb for more
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
####################################
|
11
|
+
# define errors / exceptions
|
12
|
+
# for all parsers for (re)use
|
13
|
+
|
14
|
+
class Error < StandardError
|
15
|
+
end
|
16
|
+
|
17
|
+
####
|
18
|
+
# todo/check:
|
19
|
+
# use "common" error class - why? why not?
|
20
|
+
|
21
|
+
class ParseError < Error
|
22
|
+
attr_reader :message
|
23
|
+
|
24
|
+
def initialize( message )
|
25
|
+
@message = message
|
26
|
+
end
|
27
|
+
|
28
|
+
def to_s
|
29
|
+
"*** csv parse error: #{@message}"
|
30
|
+
end
|
31
|
+
end # class ParseError
|
32
|
+
end # class CsvReader
|
@@ -1,106 +1,105 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
l =
|
16
|
-
l
|
17
|
-
|
18
|
-
end
|
19
|
-
def
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
##
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
##
|
53
|
-
##
|
54
|
-
|
55
|
-
line
|
56
|
-
logger.debug
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
## skip
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
##
|
74
|
-
##
|
75
|
-
##
|
76
|
-
##
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
value =
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
end # class
|
106
|
-
end # class CsvReader
|
1
|
+
|
2
|
+
class CsvReader
|
3
|
+
|
4
|
+
class ParserFixed
|
5
|
+
|
6
|
+
###################################
|
7
|
+
## add simple logger with debug flag/switch
|
8
|
+
#
|
9
|
+
# use Parser.debug = true # to turn on
|
10
|
+
#
|
11
|
+
# todo/fix: use logutils instead of std logger - why? why not?
|
12
|
+
|
13
|
+
def self.build_logger()
|
14
|
+
l = Logger.new( STDOUT )
|
15
|
+
l.level = :info ## set to :info on start; note: is 0 (debug) by default
|
16
|
+
l
|
17
|
+
end
|
18
|
+
def self.logger() @@logger ||= build_logger; end
|
19
|
+
def logger() self.class.logger; end
|
20
|
+
|
21
|
+
|
22
|
+
def parse( data, width:, &block )
|
23
|
+
|
24
|
+
## note: input: required each_line (string or io/file for example)
|
25
|
+
|
26
|
+
input = data ## assume it's a string or io/file handle
|
27
|
+
|
28
|
+
if block_given?
|
29
|
+
parse_lines( input, width: width, &block )
|
30
|
+
else
|
31
|
+
records = []
|
32
|
+
|
33
|
+
parse_lines( input, width: width ) do |record|
|
34
|
+
records << record
|
35
|
+
end
|
36
|
+
|
37
|
+
records
|
38
|
+
end
|
39
|
+
end ## method parse
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def parse_lines( input, width:, &block )
|
46
|
+
|
47
|
+
## note: each line only works with \n (windows) or \r\n (unix)
|
48
|
+
## will NOT work with \r (old mac, any others?) only!!!!
|
49
|
+
input.each_line do |line|
|
50
|
+
|
51
|
+
## note: chomp('') if is an empty string,
|
52
|
+
## it will remove all trailing newlines from the string.
|
53
|
+
## use line.sub(/[\n\r]*$/, '') or similar instead - why? why not?
|
54
|
+
line = line.chomp( '' )
|
55
|
+
logger.debug "line:" if logger.debug?
|
56
|
+
logger.debug line.pretty_inspect if logger.debug?
|
57
|
+
|
58
|
+
|
59
|
+
## skip empty lines and comments
|
60
|
+
if line =~ /^[ \t]*$/ ## skip blank lines (with whitespace only)
|
61
|
+
logger.debug "skip blank line" if logger.debug?
|
62
|
+
next
|
63
|
+
end
|
64
|
+
|
65
|
+
if line =~ /^[ \t]*#/ # start_with?( "#" ) -- skip comment lines (note: allow leading whitespaces)
|
66
|
+
logger.debug "skip comment line" if logger.debug?
|
67
|
+
next
|
68
|
+
end
|
69
|
+
|
70
|
+
|
71
|
+
if width.is_a?( String )
|
72
|
+
## assume it's String#unpack format e.g.
|
73
|
+
## "209231-231992395 MoreData".unpack('aa5A1A9a4Z*')
|
74
|
+
## returns an array as follows :
|
75
|
+
## ["2", "09231", "-", "231992395", " ", "MoreData"]
|
76
|
+
## see String#unpack
|
77
|
+
|
78
|
+
values = line.unpack( width )
|
79
|
+
else ## assume array with integers
|
80
|
+
values = []
|
81
|
+
offset = 0 # start position / offset
|
82
|
+
width.each_with_index do |w,i|
|
83
|
+
logger.debug "[#{i}] start: #{offset}, width: #{w}" if logger.debug?
|
84
|
+
|
85
|
+
if w < 0 ## convention - if width negative, skip column
|
86
|
+
# note: minus (-) and minus (-) equal plus (+)
|
87
|
+
## e.g. 2 - -2 = 4
|
88
|
+
offset -= w
|
89
|
+
else
|
90
|
+
value = line[offset, w]
|
91
|
+
value = value.strip if value ## note: if not nil strip; only use rstrip (for trailing only) - why? why not?
|
92
|
+
values << value
|
93
|
+
offset += w
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
## note: requires block - enforce? how? why? why not?
|
99
|
+
block.call( values )
|
100
|
+
end
|
101
|
+
end # method parse_lines
|
102
|
+
|
103
|
+
|
104
|
+
end # class ParserFixed
|
105
|
+
end # class CsvReader
|
@@ -1,24 +1,23 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
##
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
end # class
|
24
|
-
end # class CsvReader
|
1
|
+
|
2
|
+
|
3
|
+
class CsvReader
|
4
|
+
|
5
|
+
class ParserJson
|
6
|
+
|
7
|
+
def parse( data, **kwargs, &block )
|
8
|
+
## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
|
9
|
+
|
10
|
+
## note: input: required each_line (string or io/file for example)
|
11
|
+
## assume data is a string or io/file handle
|
12
|
+
csv = CsvJson.new( data )
|
13
|
+
|
14
|
+
if block_given?
|
15
|
+
csv.each( &block )
|
16
|
+
else
|
17
|
+
csv.to_a
|
18
|
+
end
|
19
|
+
end ## method parse
|
20
|
+
|
21
|
+
|
22
|
+
end # class ParserJson
|
23
|
+
end # class CsvReader
|