csvreader 1.2.4 → 1.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/{HISTORY.md → CHANGELOG.md} +3 -3
- data/Manifest.txt +1 -2
- data/README.md +682 -682
- data/Rakefile +33 -32
- data/datasets/cars11.csv +10 -10
- data/datasets/cities11.csv +12 -12
- data/datasets/customers11.csv +13 -13
- data/datasets/iris.attrib.csv +25 -25
- data/datasets/iris11.csv +163 -163
- data/datasets/lcc.attrib.csv +14 -14
- data/datasets/shakespeare.csv +9 -9
- data/lib/csvreader/base.rb +6 -2
- data/lib/csvreader/buffer.rb +0 -1
- data/lib/csvreader/builder.rb +0 -1
- data/lib/csvreader/converter.rb +0 -1
- data/lib/csvreader/parser.rb +32 -33
- data/lib/csvreader/parser_fixed.rb +105 -106
- data/lib/csvreader/parser_json.rb +23 -24
- data/lib/csvreader/parser_std.rb +582 -583
- data/lib/csvreader/parser_strict.rb +290 -291
- data/lib/csvreader/parser_tab.rb +22 -23
- data/lib/csvreader/parser_table.rb +122 -123
- data/lib/csvreader/parser_yaml.rb +23 -24
- data/lib/csvreader/reader.rb +2 -3
- data/lib/csvreader/reader_hash.rb +1 -2
- data/lib/csvreader/version.rb +30 -32
- data/lib/csvreader.rb +0 -1
- data/test/test_parser_formats.rb +66 -66
- data/test/test_parser_java.rb +208 -208
- metadata +18 -15
- data/LICENSE.md +0 -116
data/lib/csvreader/parser.rb
CHANGED
@@ -1,33 +1,32 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
##
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
#
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
#
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
end # class
|
33
|
-
end # class CsvReader
|
1
|
+
|
2
|
+
class CsvReader
|
3
|
+
|
4
|
+
class Parser
|
5
|
+
## "forward" reference,
|
6
|
+
## see base.rb for more
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
####################################
|
11
|
+
# define errors / exceptions
|
12
|
+
# for all parsers for (re)use
|
13
|
+
|
14
|
+
class Error < StandardError
|
15
|
+
end
|
16
|
+
|
17
|
+
####
|
18
|
+
# todo/check:
|
19
|
+
# use "common" error class - why? why not?
|
20
|
+
|
21
|
+
class ParseError < Error
|
22
|
+
attr_reader :message
|
23
|
+
|
24
|
+
def initialize( message )
|
25
|
+
@message = message
|
26
|
+
end
|
27
|
+
|
28
|
+
def to_s
|
29
|
+
"*** csv parse error: #{@message}"
|
30
|
+
end
|
31
|
+
end # class ParseError
|
32
|
+
end # class CsvReader
|
@@ -1,106 +1,105 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
l =
|
16
|
-
l
|
17
|
-
|
18
|
-
end
|
19
|
-
def
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
##
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
##
|
53
|
-
##
|
54
|
-
|
55
|
-
line
|
56
|
-
logger.debug
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
## skip
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
##
|
74
|
-
##
|
75
|
-
##
|
76
|
-
##
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
value =
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
end # class
|
106
|
-
end # class CsvReader
|
1
|
+
|
2
|
+
class CsvReader
|
3
|
+
|
4
|
+
class ParserFixed
|
5
|
+
|
6
|
+
###################################
|
7
|
+
## add simple logger with debug flag/switch
|
8
|
+
#
|
9
|
+
# use Parser.debug = true # to turn on
|
10
|
+
#
|
11
|
+
# todo/fix: use logutils instead of std logger - why? why not?
|
12
|
+
|
13
|
+
def self.build_logger()
|
14
|
+
l = Logger.new( STDOUT )
|
15
|
+
l.level = :info ## set to :info on start; note: is 0 (debug) by default
|
16
|
+
l
|
17
|
+
end
|
18
|
+
def self.logger() @@logger ||= build_logger; end
|
19
|
+
def logger() self.class.logger; end
|
20
|
+
|
21
|
+
|
22
|
+
def parse( data, width:, &block )
|
23
|
+
|
24
|
+
## note: input: required each_line (string or io/file for example)
|
25
|
+
|
26
|
+
input = data ## assume it's a string or io/file handle
|
27
|
+
|
28
|
+
if block_given?
|
29
|
+
parse_lines( input, width: width, &block )
|
30
|
+
else
|
31
|
+
records = []
|
32
|
+
|
33
|
+
parse_lines( input, width: width ) do |record|
|
34
|
+
records << record
|
35
|
+
end
|
36
|
+
|
37
|
+
records
|
38
|
+
end
|
39
|
+
end ## method parse
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def parse_lines( input, width:, &block )
|
46
|
+
|
47
|
+
## note: each line only works with \n (windows) or \r\n (unix)
|
48
|
+
## will NOT work with \r (old mac, any others?) only!!!!
|
49
|
+
input.each_line do |line|
|
50
|
+
|
51
|
+
## note: chomp('') if is an empty string,
|
52
|
+
## it will remove all trailing newlines from the string.
|
53
|
+
## use line.sub(/[\n\r]*$/, '') or similar instead - why? why not?
|
54
|
+
line = line.chomp( '' )
|
55
|
+
logger.debug "line:" if logger.debug?
|
56
|
+
logger.debug line.pretty_inspect if logger.debug?
|
57
|
+
|
58
|
+
|
59
|
+
## skip empty lines and comments
|
60
|
+
if line =~ /^[ \t]*$/ ## skip blank lines (with whitespace only)
|
61
|
+
logger.debug "skip blank line" if logger.debug?
|
62
|
+
next
|
63
|
+
end
|
64
|
+
|
65
|
+
if line =~ /^[ \t]*#/ # start_with?( "#" ) -- skip comment lines (note: allow leading whitespaces)
|
66
|
+
logger.debug "skip comment line" if logger.debug?
|
67
|
+
next
|
68
|
+
end
|
69
|
+
|
70
|
+
|
71
|
+
if width.is_a?( String )
|
72
|
+
## assume it's String#unpack format e.g.
|
73
|
+
## "209231-231992395 MoreData".unpack('aa5A1A9a4Z*')
|
74
|
+
## returns an array as follows :
|
75
|
+
## ["2", "09231", "-", "231992395", " ", "MoreData"]
|
76
|
+
## see String#unpack
|
77
|
+
|
78
|
+
values = line.unpack( width )
|
79
|
+
else ## assume array with integers
|
80
|
+
values = []
|
81
|
+
offset = 0 # start position / offset
|
82
|
+
width.each_with_index do |w,i|
|
83
|
+
logger.debug "[#{i}] start: #{offset}, width: #{w}" if logger.debug?
|
84
|
+
|
85
|
+
if w < 0 ## convention - if width negative, skip column
|
86
|
+
# note: minus (-) and minus (-) equal plus (+)
|
87
|
+
## e.g. 2 - -2 = 4
|
88
|
+
offset -= w
|
89
|
+
else
|
90
|
+
value = line[offset, w]
|
91
|
+
value = value.strip if value ## note: if not nil strip; only use rstrip (for trailing only) - why? why not?
|
92
|
+
values << value
|
93
|
+
offset += w
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
## note: requires block - enforce? how? why? why not?
|
99
|
+
block.call( values )
|
100
|
+
end
|
101
|
+
end # method parse_lines
|
102
|
+
|
103
|
+
|
104
|
+
end # class ParserFixed
|
105
|
+
end # class CsvReader
|
@@ -1,24 +1,23 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
##
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
end # class
|
24
|
-
end # class CsvReader
|
1
|
+
|
2
|
+
|
3
|
+
class CsvReader
|
4
|
+
|
5
|
+
class ParserJson
|
6
|
+
|
7
|
+
def parse( data, **kwargs, &block )
|
8
|
+
## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
|
9
|
+
|
10
|
+
## note: input: required each_line (string or io/file for example)
|
11
|
+
## assume data is a string or io/file handle
|
12
|
+
csv = CsvJson.new( data )
|
13
|
+
|
14
|
+
if block_given?
|
15
|
+
csv.each( &block )
|
16
|
+
else
|
17
|
+
csv.to_a
|
18
|
+
end
|
19
|
+
end ## method parse
|
20
|
+
|
21
|
+
|
22
|
+
end # class ParserJson
|
23
|
+
end # class CsvReader
|