csvreader 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +7 -1
- data/lib/csvreader.rb +12 -2
- data/lib/csvreader/buffer.rb +8 -3
- data/lib/csvreader/parser.rb +47 -336
- data/lib/csvreader/parser_std.rb +255 -0
- data/lib/csvreader/parser_strict.rb +269 -0
- data/lib/csvreader/parser_tab.rb +57 -0
- data/lib/csvreader/reader.rb +40 -100
- data/lib/csvreader/reader_hash.rb +88 -0
- data/lib/csvreader/version.rb +1 -1
- data/test/helper.rb +4 -0
- data/test/test_parser.rb +0 -3
- data/test/test_parser_formats.rb +8 -11
- data/test/test_parser_java.rb +219 -0
- data/test/{test_parser_rfc4180.rb → test_parser_strict.rb} +17 -20
- data/test/test_parser_tab.rb +48 -0
- data/test/test_reader.rb +15 -16
- metadata +9 -3
@@ -0,0 +1,57 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
class CsvReader
|
4
|
+
|
5
|
+
class ParserTab
|
6
|
+
|
7
|
+
def parse( data, **kwargs, &block )
|
8
|
+
|
9
|
+
## note: input: required each_line (string or io/file for example)
|
10
|
+
## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
|
11
|
+
|
12
|
+
input = data ## assume it's a string or io/file handle
|
13
|
+
|
14
|
+
if block_given?
|
15
|
+
parse_lines( input, &block )
|
16
|
+
else
|
17
|
+
records = []
|
18
|
+
|
19
|
+
parse_lines( input ) do |record|
|
20
|
+
records << record
|
21
|
+
end
|
22
|
+
|
23
|
+
records
|
24
|
+
end
|
25
|
+
end ## method parse
|
26
|
+
|
27
|
+
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def parse_lines( input, &block )
|
32
|
+
|
33
|
+
## note: each line only works with \n (windows) or \r\n (unix)
|
34
|
+
## will NOT work with \r (old mac, any others?) only!!!!
|
35
|
+
input.each_line do |line|
|
36
|
+
|
37
|
+
## puts "line:"
|
38
|
+
## pp line
|
39
|
+
|
40
|
+
## note: chomp('') if is an empty string,
|
41
|
+
## it will remove all trailing newlines from the string.
|
42
|
+
## use line.sub(/[\n\r]*$/, '') or similar instead - why? why not?
|
43
|
+
line = line.chomp( '' )
|
44
|
+
## pp line
|
45
|
+
|
46
|
+
# note: trailing empty fields get (auto-)trimmed by split !!!!!!!
|
47
|
+
values = line.split( "\t" )
|
48
|
+
## pp values
|
49
|
+
|
50
|
+
## note: requires block - enforce? how? why? why not?
|
51
|
+
block.call( values )
|
52
|
+
end
|
53
|
+
end # method parse_lines
|
54
|
+
|
55
|
+
|
56
|
+
end # class ParserTab
|
57
|
+
end # class CsvReader
|
data/lib/csvreader/reader.rb
CHANGED
@@ -9,35 +9,28 @@ class CsvReader
|
|
9
9
|
end
|
10
10
|
|
11
11
|
DEFAULT = new( Parser::DEFAULT )
|
12
|
+
STRICT = new( Parser::STRICT )
|
12
13
|
RFC4180 = new( Parser::RFC4180 )
|
13
14
|
EXCEL = new( Parser::EXCEL )
|
15
|
+
TAB = new( Parser::TAB )
|
14
16
|
|
15
17
|
def self.default() DEFAULT; end ## alternative alias for DEFAULT
|
18
|
+
def self.strict() STRICT; end ## alternative alias for RFC4180
|
16
19
|
def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
|
17
20
|
def self.excel() EXCEL; end ## alternative alias for EXCEL
|
21
|
+
def self.tab() TAB; end ## alternative alias for TAB
|
18
22
|
|
19
23
|
|
20
24
|
#####################
|
21
25
|
## convenience helpers defaulting to default csv dialect/format reader
|
22
26
|
##
|
23
|
-
## CsvReader.
|
24
|
-
## CsvReader::DEFAULT.
|
27
|
+
## CsvReader.parse is the same as
|
28
|
+
## CsvReader::DEFAULT.parse or CsvReader.default.parse
|
25
29
|
##
|
26
30
|
|
27
|
-
def self.parse_line( data, sep: nil,
|
28
|
-
converters: nil )
|
29
|
-
DEFAULT.parse_line( data, sep: sep, converters: converters )
|
30
|
-
end
|
31
|
-
|
32
31
|
def self.parse( data, sep: nil,
|
33
|
-
converters: nil )
|
34
|
-
DEFAULT.parse( data, sep: sep, converters: converters )
|
35
|
-
end
|
36
|
-
|
37
|
-
#### fix!!! remove - replace with parse with (optional) block!!!!!
|
38
|
-
def self.parse_lines( data, sep: nil,
|
39
|
-
converters: nil, &block )
|
40
|
-
DEFAULT.parse_lines( data, sep: sep, converters: nil, &block )
|
32
|
+
converters: nil, &block )
|
33
|
+
DEFAULT.parse( data, sep: sep, converters: converters, &block )
|
41
34
|
end
|
42
35
|
|
43
36
|
def self.read( path, sep: nil,
|
@@ -55,6 +48,20 @@ class CsvReader
|
|
55
48
|
end
|
56
49
|
|
57
50
|
|
51
|
+
############################
|
52
|
+
## note: only add parse_line convenience helper for default
|
53
|
+
## always use parse (do NOT use parse_line) - why? why not?
|
54
|
+
def self.parse_line( data, sep: nil,
|
55
|
+
converters: nil )
|
56
|
+
records = []
|
57
|
+
DEFAULT.parse( data, sep: sep, converters: converters ) do |record|
|
58
|
+
records << record
|
59
|
+
break # only parse first record
|
60
|
+
end
|
61
|
+
records.size == 0 ? nil : records.first
|
62
|
+
end
|
63
|
+
|
64
|
+
|
58
65
|
|
59
66
|
#############################
|
60
67
|
## all "high-level" reader methods
|
@@ -62,33 +69,15 @@ class CsvReader
|
|
62
69
|
## note: allow "overriding" of separator
|
63
70
|
## if sep is not nil otherwise use default dialect/format separator
|
64
71
|
|
72
|
+
def parse( data, sep: nil,
|
73
|
+
converters: nil, &block )
|
74
|
+
kwargs = {
|
75
|
+
## converters: converters ## todo: add converters
|
76
|
+
}
|
77
|
+
## note: only add separator if present/defined (not nil)
|
78
|
+
kwargs[:sep] = sep if sep && @parser.respond_to?( :'sep=' )
|
65
79
|
|
66
|
-
|
67
|
-
## todo/fix: "unify" parse and parse_lines !!!
|
68
|
-
## check for block_given? - why? why not?
|
69
|
-
|
70
|
-
def parse( data, sep: nil, limit: nil,
|
71
|
-
converters: nil )
|
72
|
-
sep = @parser.config[:sep] if sep.nil?
|
73
|
-
@parser.parse( data, sep: sep, limit: limit )
|
74
|
-
end
|
75
|
-
|
76
|
-
#### fix!!! remove - replace with parse with (optional) block!!!!!
|
77
|
-
def parse_lines( data, sep: nil,
|
78
|
-
converters: nil, &block )
|
79
|
-
sep = @parser.config[:sep] if sep.nil?
|
80
|
-
@parser.parse_lines( data, sep: sep, &block )
|
81
|
-
end
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
def parse_line( data, sep: nil,
|
86
|
-
converters: nil )
|
87
|
-
records = parse( data, sep: sep, limit: 1 )
|
88
|
-
|
89
|
-
## unwrap record if empty return nil - why? why not?
|
90
|
-
## return empty record e.g. [] - why? why not?
|
91
|
-
records.size == 0 ? nil : records.first
|
80
|
+
@parser.parse( data, kwargs, &block )
|
92
81
|
end
|
93
82
|
|
94
83
|
def read( path, sep: nil,
|
@@ -103,75 +92,26 @@ class CsvReader
|
|
103
92
|
def foreach( path, sep: nil,
|
104
93
|
converters: nil, &block )
|
105
94
|
File.open( path, 'r:bom|utf-8' ) do |file|
|
106
|
-
|
95
|
+
parse( file, sep: sep, &block )
|
107
96
|
end
|
108
97
|
end
|
109
98
|
|
110
|
-
|
111
|
-
|
112
99
|
def header( path, sep: nil ) ## use header or headers - or use both (with alias)?
|
113
100
|
# read first lines (only)
|
114
101
|
# and parse with csv to get header from csv library itself
|
115
102
|
|
116
|
-
|
103
|
+
records = []
|
117
104
|
File.open( path, 'r:bom|utf-8' ) do |file|
|
118
|
-
|
105
|
+
parse( file, sep: sep ) do |record|
|
106
|
+
records << record
|
107
|
+
break ## only parse/read first record
|
108
|
+
end
|
119
109
|
end
|
120
110
|
|
121
|
-
|
111
|
+
## unwrap record if empty return nil - why? why not?
|
112
|
+
## return empty record e.g. [] - why? why not?
|
113
|
+
## returns nil for empty (for now) - why? why not?
|
114
|
+
records.size == 0 ? nil : records.first
|
122
115
|
end # method self.header
|
123
116
|
|
124
117
|
end # class CsvReader
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
class CsvHashReader
|
130
|
-
|
131
|
-
|
132
|
-
def self.parse( data, sep: nil, headers: nil )
|
133
|
-
|
134
|
-
## pass in headers as array e.g. ['A', 'B', 'C']
|
135
|
-
names = headers ? headers : nil
|
136
|
-
|
137
|
-
records = []
|
138
|
-
CsvReader.parse_lines( data ) do |values| # sep: sep
|
139
|
-
if names.nil?
|
140
|
-
names = values ## store header row / a.k.a. field/column names
|
141
|
-
else
|
142
|
-
record = names.zip( values ).to_h ## todo/fix: check for more values than names/headers!!!
|
143
|
-
records << record
|
144
|
-
end
|
145
|
-
end
|
146
|
-
records
|
147
|
-
end
|
148
|
-
|
149
|
-
|
150
|
-
def self.read( path, sep: nil, headers: nil )
|
151
|
-
txt = File.open( path, 'r:bom|utf-8' ).read
|
152
|
-
parse( txt, sep: sep, headers: headers )
|
153
|
-
end
|
154
|
-
|
155
|
-
|
156
|
-
def self.foreach( path, sep: nil, headers: nil, &block )
|
157
|
-
|
158
|
-
## pass in headers as array e.g. ['A', 'B', 'C']
|
159
|
-
names = headers ? headers : nil
|
160
|
-
|
161
|
-
CsvReader.foreach( path ) do |values| # sep: sep
|
162
|
-
if names.nil?
|
163
|
-
names = values ## store header row / a.k.a. field/column names
|
164
|
-
else
|
165
|
-
record = names.zip( values ).to_h ## todo/fix: check for more values than names/headers!!!
|
166
|
-
block.call( record )
|
167
|
-
end
|
168
|
-
end
|
169
|
-
end
|
170
|
-
|
171
|
-
|
172
|
-
def self.header( path, sep: nil ) ## add header too? why? why not?
|
173
|
-
## same as "classic" header method - delegate/reuse :-)
|
174
|
-
CsvReader.header( path, sep: sep )
|
175
|
-
end
|
176
|
-
|
177
|
-
end # class CsvHashReader
|
@@ -0,0 +1,88 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
class CsvHashReader
|
4
|
+
|
5
|
+
def initialize( parser )
|
6
|
+
@parser = parser
|
7
|
+
end
|
8
|
+
|
9
|
+
DEFAULT = new( CsvReader::Parser::DEFAULT )
|
10
|
+
STRICT = new( CsvReader::Parser::STRICT )
|
11
|
+
RFC4180 = new( CsvReader::Parser::RFC4180 )
|
12
|
+
|
13
|
+
def self.default() DEFAULT; end ## alternative alias for DEFAULT
|
14
|
+
def self.strict() STRICT; end ## alternative alias for STRICT
|
15
|
+
def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
def self.parse( data, sep: nil, headers: nil, &block )
|
20
|
+
DEFAULT.parse( data, sep: sep, headers: headers, &block )
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.read( path, sep: nil, headers: nil )
|
24
|
+
DEFAULT.read( path, sep: sep, headers: headers )
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.foreach( path, sep: nil, headers: nil, &block )
|
28
|
+
DEFAULT.foreach( path,sep: sep, headers: headers, &block )
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
|
33
|
+
#############################
|
34
|
+
## all "high-level" reader methods
|
35
|
+
##
|
36
|
+
|
37
|
+
def parse( data, sep: nil, headers: nil, &block )
|
38
|
+
if block_given?
|
39
|
+
parse_lines( data, sep: sep, headers: headers, &block )
|
40
|
+
else
|
41
|
+
records = []
|
42
|
+
parse_lines( data, sep: sep, headers: headers ) do |record|
|
43
|
+
records << record
|
44
|
+
end
|
45
|
+
records
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
def read( path, sep: nil, headers: nil )
|
51
|
+
txt = File.open( path, 'r:bom|utf-8' ).read
|
52
|
+
parse( txt, sep: sep, headers: headers )
|
53
|
+
end
|
54
|
+
|
55
|
+
|
56
|
+
def foreach( path, sep: nil, headers: nil, &block )
|
57
|
+
File.open( path, 'r:bom|utf-8' ) do |file|
|
58
|
+
parse_lines( file, sep: sep, headers: headers, &block )
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
|
63
|
+
private
|
64
|
+
|
65
|
+
####################
|
66
|
+
## parse_lines helper method to keep in one (central) place only (for easy editing/changing)
|
67
|
+
## - builds key/value pairs
|
68
|
+
|
69
|
+
def parse_lines( data, sep: nil, headers: nil, &block)
|
70
|
+
## pass in headers as array e.g. ['A', 'B', 'C']
|
71
|
+
names = headers ? headers : nil
|
72
|
+
|
73
|
+
kwargs = {
|
74
|
+
## converters: converters ## todo: add converters
|
75
|
+
}
|
76
|
+
kwargs[:sep] = sep if sep && @parser.respond_to?( :'sep=' ) ## note: only add separator if present/defined (not nil)
|
77
|
+
|
78
|
+
@parser.parse( data, kwargs ) do |values| # sep: sep
|
79
|
+
if names.nil?
|
80
|
+
names = values ## store header row / a.k.a. field/column names
|
81
|
+
else
|
82
|
+
record = names.zip( values ).to_h ## todo/fix: check for more values than names/headers!!!
|
83
|
+
block.call( record )
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
end # class CsvHashReader
|
data/lib/csvreader/version.rb
CHANGED
data/test/helper.rb
CHANGED
data/test/test_parser.rb
CHANGED
data/test/test_parser_formats.rb
CHANGED
@@ -9,9 +9,6 @@ require 'helper'
|
|
9
9
|
|
10
10
|
class TestParserFormats < MiniTest::Test
|
11
11
|
|
12
|
-
def setup
|
13
|
-
CsvReader::Parser.logger.level = :debug ## turn on "global" logging - move to helper - why? why not?
|
14
|
-
end
|
15
12
|
|
16
13
|
def parser
|
17
14
|
CsvReader::Parser
|
@@ -37,17 +34,17 @@ def test_parse_whitespace
|
|
37
34
|
|
38
35
|
|
39
36
|
## strict rfc4180 - no trim leading or trailing spaces or blank lines
|
40
|
-
assert_equal records, parser.
|
37
|
+
assert_equal records, parser.strict.parse( "a,b,c\n1,2,3" )
|
41
38
|
assert_equal [["a", "b", "c"],
|
42
39
|
[""],
|
43
|
-
["1", "2", "3"]], parser.
|
40
|
+
["1", "2", "3"]], parser.strict.parse( "a,b,c\n\n1,2,3" )
|
44
41
|
assert_equal [[" a", " b ", "c "],
|
45
42
|
[""],
|
46
|
-
["1", "2", "3"]], parser.
|
43
|
+
["1", "2", "3"]], parser.strict.parse( " a, b ,c \n\n1,2,3" )
|
47
44
|
assert_equal [[" a", " b ", "c "],
|
48
45
|
[" "],
|
49
46
|
["",""],
|
50
|
-
["1", "2", "3"]], parser.
|
47
|
+
["1", "2", "3"]], parser.strict.parse( " a, b ,c \n \n,\n1,2,3" )
|
51
48
|
end
|
52
49
|
|
53
50
|
|
@@ -57,13 +54,13 @@ def test_parse_empties
|
|
57
54
|
## strict rfc4180 - no trim leading or trailing spaces or blank lines
|
58
55
|
assert_equal [[""],
|
59
56
|
[" "],
|
60
|
-
[" "]], parser.
|
57
|
+
[" "]], parser.strict.parse( "\n \n \n" )
|
61
58
|
assert_equal [[""],
|
62
59
|
[" "],
|
63
|
-
[" "]], parser.
|
60
|
+
[" "]], parser.strict.parse( "\n \n " )
|
64
61
|
|
65
|
-
assert_equal [[""]], parser.
|
66
|
-
assert_equal [], parser.
|
62
|
+
assert_equal [[""]], parser.strict.parse( "\n" )
|
63
|
+
assert_equal [], parser.strict.parse( "" )
|
67
64
|
end
|
68
65
|
|
69
66
|
end # class TestParserFormats
|
@@ -0,0 +1,219 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_parser_java.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
##########################
|
11
|
+
# try some tests from apache java (commons) csv reader
|
12
|
+
# see https://github.com/apache/commons-csv/blob/master/src/test/java/org/apache/commons/csv/LexerTest.java
|
13
|
+
|
14
|
+
|
15
|
+
class TestParserJava < MiniTest::Test
|
16
|
+
|
17
|
+
|
18
|
+
LF = "\n" ## \n == ASCII 0x0A (hex) 10 (dec) = LF (Newline/line feed)
|
19
|
+
CR = "\r" ## \r == ASCII 0x0D (hex) 13 (dec) = CR (Carriage return)
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
def parser
|
24
|
+
CsvReader::Parser
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_surrounding_spaces_are_deleted
|
28
|
+
assert_equal [["noSpaces",
|
29
|
+
"leadingSpaces",
|
30
|
+
"trailingSpaces",
|
31
|
+
"surroundingSpaces",
|
32
|
+
"",
|
33
|
+
"",
|
34
|
+
""]], parser.default.parse( "noSpaces, leadingSpaces,trailingSpaces , surroundingSpaces , ,," )
|
35
|
+
end
|
36
|
+
|
37
|
+
|
38
|
+
def test_surrounding_tabs_are_deleted
|
39
|
+
assert_equal [["noTabs",
|
40
|
+
"leadingTab",
|
41
|
+
"trailingTab",
|
42
|
+
"surroundingTabs",
|
43
|
+
"",
|
44
|
+
"",
|
45
|
+
""]], parser.default.parse( "noTabs,\tleadingTab,trailingTab\t,\tsurroundingTabs\t,\t\t,," )
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_ignore_empty_lines
|
49
|
+
assert_equal [[ "first", "line", "" ],
|
50
|
+
[ "second", "line" ],
|
51
|
+
[ "third line" ],
|
52
|
+
[ "last", "line" ]],
|
53
|
+
parser.default.parse( "first,line,\n" + "\n" + "\n" +
|
54
|
+
"second,line\n" + "\n" + "\n" +
|
55
|
+
"third line \n" + "\n" + "\n" +
|
56
|
+
"last, line \n" + "\n" + "\n" + "\n" )
|
57
|
+
end
|
58
|
+
|
59
|
+
|
60
|
+
def test_comments
|
61
|
+
assert_equal [["first", "line", "" ],
|
62
|
+
["second", "line", "tokenWith#no-comment" ],
|
63
|
+
["third", "line", "#no-comment" ]],
|
64
|
+
parser.default.parse( "first,line,\n" +
|
65
|
+
"second,line,tokenWith#no-comment\n" +
|
66
|
+
"# comment line \n" +
|
67
|
+
"third,line,#no-comment\n" +
|
68
|
+
"# penultimate comment\n" +
|
69
|
+
"# Final comment\n" )
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
|
76
|
+
def test_comments_and_empty_lines
|
77
|
+
parser.strict.comment = '#'
|
78
|
+
|
79
|
+
assert_equal [[ "1", "2", "3", "" ], ## 1
|
80
|
+
[ "" ], ## 1b
|
81
|
+
[ "" ], ## 1c
|
82
|
+
[ "a", "b x", "c#no-comment" ], ## 2
|
83
|
+
[ "" ], ## 4
|
84
|
+
[ "" ], ## 4b
|
85
|
+
[ "d", "e", "#no-comment" ], ## 5
|
86
|
+
[ "" ], ## 5b
|
87
|
+
[ "" ], ## 5c
|
88
|
+
[ "" ], ## 6b
|
89
|
+
[ "" ] ## 6c
|
90
|
+
],
|
91
|
+
parser.strict.parse(
|
92
|
+
"1,2,3,\n" + ## 1
|
93
|
+
"\n" + ## 1b
|
94
|
+
"\n" + ## 1c
|
95
|
+
"a,b x,c#no-comment\n" + ## 2
|
96
|
+
"#foo\n" + ## 3
|
97
|
+
"\n" + ## 4
|
98
|
+
"\n" + ## 4b
|
99
|
+
"d,e,#no-comment\n" + ## 5
|
100
|
+
"\n" + ## 5b
|
101
|
+
"\n" + ## 5c
|
102
|
+
"# penultimate comment\n" + ## 6
|
103
|
+
"\n" + ## 6b
|
104
|
+
"\n" + ## 6c
|
105
|
+
"# Final comment\n" ## 7
|
106
|
+
)
|
107
|
+
|
108
|
+
parser.strict.comment = false ## reset to defaults
|
109
|
+
end
|
110
|
+
|
111
|
+
|
112
|
+
def test_backslash_with_escaping
|
113
|
+
## simple token with escaping enabled
|
114
|
+
assert_equal [[ "a", ",", "b\\" ],
|
115
|
+
[ ",", "\nc", "d\r" ],
|
116
|
+
[ "e" ]], parser.default.parse( "a,\\,,b\\\\\n" +
|
117
|
+
"\\,,\\\nc,d\\\r\n" +
|
118
|
+
"e" )
|
119
|
+
|
120
|
+
|
121
|
+
parser.strict.escape = "\\"
|
122
|
+
assert_equal [[ "a", ",", "b\\" ],
|
123
|
+
[ ",", "\nc", "d\r" ],
|
124
|
+
[ "e" ]], parser.strict.parse( "a,\\,,b\\\\\n" +
|
125
|
+
"\\,,\\\nc,d\\\r\n" +
|
126
|
+
"e" )
|
127
|
+
parser.strict.escape = false
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
def test_backslash_without_escaping
|
132
|
+
## simple token with escaping not enabled
|
133
|
+
assert_equal [[ "a",
|
134
|
+
"\\", ## an unquoted single backslash is not an escape char
|
135
|
+
"",
|
136
|
+
"b\\" ## an unquoted single backslash is not an escape char
|
137
|
+
],
|
138
|
+
[ "\\", "", "" ]], parser.strict.parse( "a,\\,,b\\\n" +
|
139
|
+
"\\,," )
|
140
|
+
|
141
|
+
end
|
142
|
+
|
143
|
+
|
144
|
+
|
145
|
+
def test_next_token4
|
146
|
+
## encapsulator tokenizer (single line)
|
147
|
+
assert_equal [[ "a", "foo", "b" ],
|
148
|
+
[ "a", " foo", "b" ],
|
149
|
+
[ "a", "foo ", "b" ],
|
150
|
+
[ "a", " foo ", "b" ]],
|
151
|
+
parser.default.parse( "a,\"foo\",b\n" +
|
152
|
+
"a, \" foo\",b\n" +
|
153
|
+
"a,\"foo \" ,b\n" +
|
154
|
+
"a, \" foo \" ,b" )
|
155
|
+
end
|
156
|
+
|
157
|
+
|
158
|
+
def test_next_token5
|
159
|
+
## encapsulator tokenizer (multi line, delimiter in string)
|
160
|
+
assert_equal [[ "a", "foo\n", "b" ],
|
161
|
+
[ "foo\n baar ,,," ],
|
162
|
+
[ "\n\t \n" ]],
|
163
|
+
parser.default.parse( "a,\"foo\n\",b\n" +
|
164
|
+
"\"foo\n baar ,,,\"\n" +
|
165
|
+
"\"\n\t \n\"" )
|
166
|
+
end
|
167
|
+
|
168
|
+
|
169
|
+
def test_separator_is_tab
|
170
|
+
parser.strict.sep = "\t"
|
171
|
+
assert_equal [["one",
|
172
|
+
"two",
|
173
|
+
"",
|
174
|
+
"four ",
|
175
|
+
" five",
|
176
|
+
" six" ]], parser.strict.parse( "one\ttwo\t\tfour \t five\t six" )
|
177
|
+
parser.strict.sep = "," ## reset back to comma
|
178
|
+
end
|
179
|
+
|
180
|
+
|
181
|
+
|
182
|
+
|
183
|
+
def test_escaped_cr
|
184
|
+
assert_equal [[ "character" + CR + "Escaped" ]],
|
185
|
+
parser.default.parse( "character\\" + CR + "Escaped" )
|
186
|
+
end
|
187
|
+
|
188
|
+
|
189
|
+
def test_cr
|
190
|
+
assert_equal [[ "character" ],
|
191
|
+
[ "NotEscaped" ]],
|
192
|
+
parser.default.parse( "character" + CR + "NotEscaped" )
|
193
|
+
end
|
194
|
+
|
195
|
+
|
196
|
+
|
197
|
+
def test_escaped_lf
|
198
|
+
assert_equal [[ "character" + LF + "Escaped" ]],
|
199
|
+
parser.default.parse( "character\\" + LF + "Escaped" )
|
200
|
+
end
|
201
|
+
|
202
|
+
def test_lf
|
203
|
+
assert_equal [[ "character" ],
|
204
|
+
[ "NotEscaped" ]],
|
205
|
+
parser.default.parse( "character" + LF + "NotEscaped" )
|
206
|
+
end
|
207
|
+
|
208
|
+
|
209
|
+
|
210
|
+
def test_escaped_mysql_null_value
|
211
|
+
## MySQL uses \N to symbolize null values. We have to restore this
|
212
|
+
|
213
|
+
## note: "unknown escape sequences e.g. \N get passed "through" as-is (unescaped)"
|
214
|
+
## only supports \n \r (sep e.g \, or \t) (quote e.g. \") for now - any others?
|
215
|
+
assert_equal [[ "character\\NEscaped" ]],
|
216
|
+
parser.default.parse( "character\\NEscaped" )
|
217
|
+
end
|
218
|
+
|
219
|
+
end # class TestParserJava
|