censive 0.18 → 0.20
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/censive.gemspec +1 -1
- data/diagram/NFA to Regex.pdf +0 -0
- data/diagram/censive@ce9d51d.png +0 -0
- data/diagram/csv-ragel.dot +24 -0
- data/diagram/csv.dot +57 -0
- data/diagram/csv.png +0 -0
- data/diagram/csv.rl +45 -0
- data/diagram/csv.svg +270 -0
- data/diagram/diagram.dot +26 -0
- data/diagram/diagram.rl +50 -0
- data/lib/censive.rb +127 -97
- data/lib/censive.rb-20230208182732 +266 -0
- data/lib/censive.rb-20230208195221 +276 -0
- data/lib/censive.rb-20230209050227 +282 -0
- data/lib/flay.rb +227 -0
- data/lib/test-censive.rb +12 -0
- data/lib/test-csv.rb +12 -0
- metadata +17 -2
data/lib/censive.rb
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
# censive - A quick and lightweight CSV handling library for Ruby
|
5
5
|
#
|
6
6
|
# Author: Steve Shreeve (steve.shreeve@gmail.com)
|
7
|
-
# Date: Feb
|
7
|
+
# Date: Feb 10, 2023
|
8
8
|
#
|
9
9
|
# https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
|
10
10
|
# https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
|
@@ -14,14 +14,22 @@
|
|
14
14
|
# GOALS:
|
15
15
|
# 1. Faster than Ruby's default CSV library
|
16
16
|
# 2. Lightweight code with streamlined and optimized logic
|
17
|
-
# 3. Support most non-compliant CSV variations (
|
17
|
+
# 3. Support most non-compliant CSV variations (@excel, @relax, etc)
|
18
|
+
# 4. Support most commonly used CSV options (@sep, @quote, @strip, @drop, etc)
|
18
19
|
#
|
19
|
-
# TODO:
|
20
|
+
# TODO:
|
21
|
+
# 1. Support IO streaming
|
22
|
+
# 2. Review all encodings, we may be losing speed when mixing encodings
|
23
|
+
# 3. Huge speedup possible if our @unquoted regex reads beyond @cr?@lf's
|
24
|
+
# 4. Will using String#freeze give us a speed up?
|
25
|
+
# 5. Implement support for scan_until(string) <= right now only regex is valid
|
20
26
|
# ============================================================================
|
21
27
|
|
22
28
|
require "strscan"
|
23
29
|
|
24
30
|
class Censive < StringScanner
|
31
|
+
attr :encoding
|
32
|
+
|
25
33
|
def self.parse(...)
|
26
34
|
new(...).parse
|
27
35
|
end
|
@@ -34,86 +42,84 @@ class Censive < StringScanner
|
|
34
42
|
end
|
35
43
|
end
|
36
44
|
|
37
|
-
def initialize(str=
|
38
|
-
drop: false , # drop trailing empty
|
39
|
-
encoding:
|
45
|
+
def initialize(str=nil,
|
46
|
+
drop: false , # drop trailing empty columns?
|
47
|
+
encoding: nil , # character encoding
|
40
48
|
excel: false , # literals ="01" formulas =A1 + B2 http://bit.ly/3Y7jIvc
|
41
49
|
mode: :compact, # export mode: compact or full
|
42
|
-
out:
|
50
|
+
out: nil , # output stream, needs to respond to <<
|
43
51
|
quote: '"' , # quote character
|
44
52
|
relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
|
45
53
|
rowsep: "\n" , # row separator for export
|
46
54
|
sep: "," , # column separator character
|
47
|
-
strip: false , # strip
|
48
|
-
**opts
|
55
|
+
strip: false , # strip columns when reading
|
56
|
+
**opts # grab bag
|
49
57
|
)
|
50
|
-
#
|
58
|
+
# initialize data source
|
59
|
+
if str && str.size < 100 && File.readable?(str)
|
60
|
+
str = File.open(str, encoding ? "r:#{encoding}" : "r").read
|
61
|
+
else
|
62
|
+
str ||= ""
|
63
|
+
str = str.encode(encoding) if encoding
|
64
|
+
end
|
65
|
+
super(str)
|
66
|
+
reset
|
67
|
+
|
68
|
+
# config options
|
69
|
+
@cheat = true
|
51
70
|
@drop = drop
|
52
|
-
@encoding = encoding
|
71
|
+
@encoding = str.encoding
|
53
72
|
@excel = excel
|
54
73
|
@mode = mode
|
55
|
-
@out = out
|
56
|
-
@quote = quote
|
74
|
+
@out = out || $stdout
|
57
75
|
@relax = relax
|
76
|
+
@strip = strip
|
77
|
+
|
78
|
+
# config strings
|
79
|
+
@quote = quote
|
58
80
|
@rowsep = rowsep
|
59
81
|
@sep = sep
|
60
|
-
@strip = strip
|
61
82
|
|
62
|
-
#
|
63
|
-
@cr
|
64
|
-
@lf
|
65
|
-
@es
|
66
|
-
@eq
|
67
|
-
|
68
|
-
|
69
|
-
@
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
end
|
75
|
-
|
76
|
-
|
83
|
+
# static strings
|
84
|
+
@cr = "\r"
|
85
|
+
@lf = "\n"
|
86
|
+
@es = ""
|
87
|
+
@eq = "="
|
88
|
+
|
89
|
+
# combinations
|
90
|
+
@esc = (@quote * 2)
|
91
|
+
@seq = [@sep, @eq].join # used for parsing in excel mode
|
92
|
+
|
93
|
+
# regexes
|
94
|
+
@eoc = /(?=#{"\\" + @sep}|#{@cr}|#{@lf}|\z)/o # end of cell
|
95
|
+
@eol = /#{@cr}#{@lf}?|#{@lf}/o # end of line
|
96
|
+
@escapes = /(#{@quote})|#{"\\"+@sep}|#{@cr}|#{@lf}/o
|
97
|
+
@quotable = /#{"\\"+@sep}|#{@cr}|#{@lf}/o
|
98
|
+
@quotes = /#{@quote}/o
|
99
|
+
@seps = /#{@sep}+/o
|
100
|
+
@quoted = @excel ? /(?:=)?#{@quote}/o : @quote
|
101
|
+
@unquoted = /[^#{@sep}#{@cr}#{@lf}][^#{@quote}#{@cr}#{@lf}]*/o
|
102
|
+
@leadzero = /\A0\d*\z/
|
77
103
|
end
|
78
104
|
|
79
105
|
def reset(str=nil)
|
80
|
-
self.string = str if str
|
81
|
-
super()
|
82
106
|
@rows = nil
|
83
107
|
@cols = @cells = 0
|
84
|
-
end
|
85
108
|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
excel = true if @excel && scan(@eq)
|
90
|
-
|
91
|
-
if scan(@quote) # consume quoted cell
|
92
|
-
token = ""
|
93
|
-
while true
|
94
|
-
token << (scan_until(/#{@quote}/o) or bomb "unclosed quote")[0..-2]
|
95
|
-
token << @quote and next if scan(@quote)
|
96
|
-
break if scan(@eoc)
|
97
|
-
@relax or bomb "invalid character after quote"
|
98
|
-
token << @quote + (scan_until(/#{@quote}/o) or bomb "bad inline quote")
|
99
|
-
end
|
100
|
-
elsif scan(@sep) then return @es
|
101
|
-
elsif scan(@eol) then return nil
|
102
|
-
else # consume unquoted cell
|
103
|
-
token = scan_until(@eoc) or bomb "unexpected character"
|
104
|
-
token.prepend(@eq) if excel
|
105
|
-
end
|
106
|
-
scan(@sep)
|
107
|
-
@strip ? token.strip : token
|
108
|
-
end
|
109
|
-
|
110
|
-
def bomb(msg)
|
111
|
-
abort "\n#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
|
109
|
+
self.string = str if str
|
110
|
+
@encoding = string.encoding
|
111
|
+
super()
|
112
112
|
end
|
113
113
|
|
114
114
|
# ==[ Parser ]==
|
115
115
|
|
116
116
|
def parse
|
117
|
+
|
118
|
+
# TODO: crazy optimization if NO QUOTES in rest
|
119
|
+
# unless rest.include?(@quote)
|
120
|
+
# @rows = rest...
|
121
|
+
# end
|
122
|
+
|
117
123
|
@rows = []
|
118
124
|
while row = next_row
|
119
125
|
@rows << row
|
@@ -125,18 +131,71 @@ class Censive < StringScanner
|
|
125
131
|
end
|
126
132
|
|
127
133
|
def next_row
|
134
|
+
if @cheat and line = scan_until(@eol)
|
135
|
+
row = line.chomp!.split(@sep, -1)
|
136
|
+
row.each do |col|
|
137
|
+
next if (saw = col.count(@quote)).zero?
|
138
|
+
next if (saw == 2) && col.delete_prefix!(@quote) && col.delete_suffix!(@quote)
|
139
|
+
@cheat = false
|
140
|
+
break
|
141
|
+
end if line.include?(@quote)
|
142
|
+
@cheat and return @strip ? row.each(&:strip!) : row
|
143
|
+
unscan
|
144
|
+
end
|
145
|
+
|
128
146
|
token = next_token or return
|
129
|
-
row = [
|
130
|
-
row
|
147
|
+
row = []
|
148
|
+
row.push(*token)
|
149
|
+
row.push(*token) while token = next_token
|
131
150
|
row
|
132
151
|
end
|
133
152
|
|
153
|
+
def next_token
|
154
|
+
if scan(@quoted) # quoted cell
|
155
|
+
token = ""
|
156
|
+
while true
|
157
|
+
token << (scan_until(@quotes) or bomb "unclosed quote")[0..-2]
|
158
|
+
token << @quote and next if scan(@quote)
|
159
|
+
scan(@eoc) and break
|
160
|
+
@relax or bomb "invalid character after quote"
|
161
|
+
token << @quote + (scan_until(@quotes) or bomb "bad inline quote")
|
162
|
+
end
|
163
|
+
scan(@sep)
|
164
|
+
@strip ? token.strip : token
|
165
|
+
elsif match = scan(@unquoted) # unquoted cell(s)
|
166
|
+
if check(@quote) && !match.chomp!(@sep) # if we see a stray quote
|
167
|
+
unless @excel && match.chomp!(@seq) # unless an excel literal, fix it
|
168
|
+
match << (scan_until(@eoc) or bomb "stray quote")
|
169
|
+
scan(@sep)
|
170
|
+
end
|
171
|
+
end
|
172
|
+
tokens = match.split(@sep, -1)
|
173
|
+
@strip ? tokens.map!(&:strip) : tokens
|
174
|
+
elsif scan(@sep)
|
175
|
+
match = scan(@seps)
|
176
|
+
match ? match.split(@sep, -1) : @es
|
177
|
+
else
|
178
|
+
scan(@eol)
|
179
|
+
nil
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
def each
|
184
|
+
@rows ||= parse
|
185
|
+
@rows.each {|row| yield row }
|
186
|
+
end
|
187
|
+
|
188
|
+
def export(**opts)
|
189
|
+
out = opts.empty? ? self : self.class.writer(**opts)
|
190
|
+
each {|row| out << row }
|
191
|
+
end
|
192
|
+
|
134
193
|
# ==[ Helpers ]==
|
135
194
|
|
136
195
|
# returns 2 (must be quoted and escaped), 1 (must be quoted), 0 (neither)
|
137
196
|
def grok(str)
|
138
|
-
if idx = str.index(
|
139
|
-
$1 ? 2 : str.index(
|
197
|
+
if idx = str.index(@escapes)
|
198
|
+
$1 ? 2 : str.index(@quotes, idx) ? 2 : 1
|
140
199
|
else
|
141
200
|
0
|
142
201
|
end
|
@@ -156,11 +215,11 @@ class Censive < StringScanner
|
|
156
215
|
row
|
157
216
|
when 1
|
158
217
|
row.map do |col|
|
159
|
-
col.match?(
|
218
|
+
col.match?(@quotable) ? "#{q}#{col}#{q}" : col
|
160
219
|
end
|
161
220
|
else
|
162
221
|
row.map do |col|
|
163
|
-
@excel && col =~
|
222
|
+
@excel && col =~ @leadzero ? "=#{q}#{col}#{q}" :
|
164
223
|
case grok(col)
|
165
224
|
when 0 then col
|
166
225
|
when 1 then "#{q}#{col}#{q}"
|
@@ -171,7 +230,7 @@ class Censive < StringScanner
|
|
171
230
|
when :full
|
172
231
|
if @excel
|
173
232
|
row.map do |col|
|
174
|
-
col =~
|
233
|
+
col =~ @leadzero ? "=#{q}#{col}#{q}" : "#{q}#{col.gsub(q, @esc)}#{q}"
|
175
234
|
end
|
176
235
|
else
|
177
236
|
row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
|
@@ -181,16 +240,6 @@ class Censive < StringScanner
|
|
181
240
|
@out << out + @rowsep
|
182
241
|
end
|
183
242
|
|
184
|
-
def each
|
185
|
-
@rows ||= parse
|
186
|
-
@rows.each {|row| yield row }
|
187
|
-
end
|
188
|
-
|
189
|
-
def export(**opts)
|
190
|
-
out = opts.empty? ? self : self.class.writer(**opts)
|
191
|
-
each {|row| out << row }
|
192
|
-
end
|
193
|
-
|
194
243
|
def stats
|
195
244
|
wide = string.size.to_s.size
|
196
245
|
puts "%#{wide}d rows" % @rows.size
|
@@ -198,27 +247,8 @@ class Censive < StringScanner
|
|
198
247
|
puts "%#{wide}d cells" % @cells
|
199
248
|
puts "%#{wide}d bytes" % string.size
|
200
249
|
end
|
201
|
-
end
|
202
250
|
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
csv = Censive.new(raw, excel: true, relax: true)
|
207
|
-
csv.export # (sep: ",", excel: true)
|
251
|
+
def bomb(msg)
|
252
|
+
abort "\n#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
|
253
|
+
end
|
208
254
|
end
|
209
|
-
|
210
|
-
__END__
|
211
|
-
Name,Age,Shoe
|
212
|
-
Alice,27,5
|
213
|
-
Bob,33,10 1/2
|
214
|
-
Charlie or "Chuck",=B2 + B3,9
|
215
|
-
"Doug E Fresh",="007",10
|
216
|
-
Subtotal,=sum(B2:B5),="01234"
|
217
|
-
|
218
|
-
# first line works in "relax" mode, bottom line is compliant
|
219
|
-
123,"CHO, JOELLE "JOJO"",456
|
220
|
-
123,"CHO, JOELLE ""JOJO""",456
|
221
|
-
|
222
|
-
# Excel mode checking
|
223
|
-
=,=x,x=,="x",="","","=",123,0123,="123",="0123"
|
224
|
-
,=x,x=,x,,,,,,=,,123,="0123",123,,="0123" # <= a little off
|
@@ -0,0 +1,266 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# ============================================================================
|
4
|
+
# censive - A quick and lightweight CSV handling library for Ruby
|
5
|
+
#
|
6
|
+
# Author: Steve Shreeve (steve.shreeve@gmail.com)
|
7
|
+
# Date: Feb 8, 2023
|
8
|
+
#
|
9
|
+
# https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
|
10
|
+
# https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
|
11
|
+
#
|
12
|
+
# Thanks to Sutou Kouhei (kou) for his excellent advice on using scan
|
13
|
+
# ============================================================================
|
14
|
+
# GOALS:
|
15
|
+
# 1. Faster than Ruby's default CSV library
|
16
|
+
# 2. Lightweight code with streamlined and optimized logic
|
17
|
+
# 3. Support most non-compliant CSV variations (eg - @excel, @relax, @strip)
|
18
|
+
#
|
19
|
+
# TODO:
|
20
|
+
# 1. Support IO streaming
|
21
|
+
# 2. Review all encodings, we may be losing speed when mixing encodings
|
22
|
+
# 3. Huge speedup possible if our @unquoted regex reads beyond @cr?@lf's
|
23
|
+
# 4. Will using String#freeze give us a speed up?
|
24
|
+
# 5. Implement support for scan_until(string) <= right now only regex is valid
|
25
|
+
# ============================================================================
|
26
|
+
|
27
|
+
require "strscan"
|
28
|
+
|
29
|
+
class Censive < StringScanner
|
30
|
+
attr :encoding
|
31
|
+
|
32
|
+
def self.parse(...)
|
33
|
+
new(...).parse
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.writer(obj=nil, **opts, &code)
|
37
|
+
case obj
|
38
|
+
when String then File.open(obj, "w") {|io| yield new(out: io, **opts, &code) }
|
39
|
+
when IO,nil then new(out: obj, **opts, &code)
|
40
|
+
else abort "#{File.basename($0)}: invalid #{obj.class} object in writer"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def initialize(str=nil,
|
45
|
+
drop: false , # drop trailing empty fields?
|
46
|
+
encoding: nil , # character encoding
|
47
|
+
excel: false , # literals ="01" formulas =A1 + B2 http://bit.ly/3Y7jIvc
|
48
|
+
mode: :compact, # export mode: compact or full
|
49
|
+
out: nil , # output stream, needs to respond to <<
|
50
|
+
quote: '"' , # quote character
|
51
|
+
relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
|
52
|
+
rowsep: "\n" , # row separator for export
|
53
|
+
sep: "," , # column separator character
|
54
|
+
strip: false , # strip fields when reading
|
55
|
+
**opts # grab bag
|
56
|
+
)
|
57
|
+
# initialize data source
|
58
|
+
if str && str.size < 100 && File.readable?(str)
|
59
|
+
str = File.open(str, encoding ? "r:#{encoding}" : "r").read
|
60
|
+
else
|
61
|
+
str ||= ""
|
62
|
+
str = str.encode(encoding) if encoding
|
63
|
+
end
|
64
|
+
super(str)
|
65
|
+
reset
|
66
|
+
|
67
|
+
# config options
|
68
|
+
@drop = drop
|
69
|
+
@encoding = str.encoding
|
70
|
+
@excel = excel
|
71
|
+
@mode = mode
|
72
|
+
@out = out || $stdout
|
73
|
+
@relax = relax
|
74
|
+
@strip = strip
|
75
|
+
|
76
|
+
# config strings
|
77
|
+
@quote = quote
|
78
|
+
@rowsep = rowsep
|
79
|
+
@sep = sep
|
80
|
+
|
81
|
+
# static strings
|
82
|
+
@cr = "\r"
|
83
|
+
@lf = "\n"
|
84
|
+
@es = ""
|
85
|
+
@eq = "="
|
86
|
+
|
87
|
+
# combinations
|
88
|
+
@esc = (@quote * 2)
|
89
|
+
@seq = [@sep, @eq].join # used for parsing in excel mode
|
90
|
+
|
91
|
+
#!# TODO: come up with a clean way to escape/encode all this
|
92
|
+
#!# TODO: maybe define @tokens = "#{@quote}#{@sep}#{@cr}#{@lf}", etc.
|
93
|
+
|
94
|
+
# regexes
|
95
|
+
@eoc = /(?=#{"\\" + @sep}|#{@cr}|#{@lf}|\z)/o # end of cell
|
96
|
+
@eol = /#{@cr}#{@lf}?|#{@lf}/o # end of line
|
97
|
+
@escapes = /(#{@quote})|#{"\\"+@sep}|#{@cr}|#{@lf}/o
|
98
|
+
@quotable = /#{"\\"+@sep}|#{@cr}|#{@lf}/o
|
99
|
+
@quotes = /#{@quote}/o
|
100
|
+
@seps = /#{@sep}+/o
|
101
|
+
@quoted = @excel ? /(?:=)?#{@quote}/o : @quote
|
102
|
+
@unquoted = /[^#{@sep}#{@cr}#{@lf}][^#{@quote}#{@cr}#{@lf}]*/o
|
103
|
+
@leadzero = /\A0\d*\z/
|
104
|
+
end
|
105
|
+
|
106
|
+
def reset(str=nil)
|
107
|
+
@rows = nil
|
108
|
+
@cols = @cells = 0
|
109
|
+
|
110
|
+
#!# TODO: reset all encodings?
|
111
|
+
self.string = str if str
|
112
|
+
@encoding = string.encoding
|
113
|
+
super()
|
114
|
+
end
|
115
|
+
|
116
|
+
# ==[ Parser ]==
|
117
|
+
|
118
|
+
def parse
|
119
|
+
@rows = []
|
120
|
+
while row = next_row
|
121
|
+
@rows << row
|
122
|
+
count = row.size
|
123
|
+
@cols = count if count > @cols
|
124
|
+
@cells += count
|
125
|
+
end
|
126
|
+
@rows
|
127
|
+
end
|
128
|
+
|
129
|
+
def next_row
|
130
|
+
token = next_token or return
|
131
|
+
row = []
|
132
|
+
row.push(*token)
|
133
|
+
row.push(*token) while token = next_token
|
134
|
+
row
|
135
|
+
end
|
136
|
+
|
137
|
+
def next_token
|
138
|
+
if scan(@quoted) # quoted cell
|
139
|
+
token = ""
|
140
|
+
while true
|
141
|
+
token << (scan_until(@quotes) or bomb "unclosed quote")[0..-2]
|
142
|
+
token << @quote and next if scan(@quote)
|
143
|
+
scan(@eoc) and break
|
144
|
+
@relax or bomb "invalid character after quote"
|
145
|
+
token << @quote + (scan_until(@quotes) or bomb "bad inline quote")
|
146
|
+
end
|
147
|
+
scan(@sep)
|
148
|
+
@strip ? token.strip : token
|
149
|
+
elsif match = scan(@unquoted) # unquoted cell(s)
|
150
|
+
if check(@quote) && !match.chomp!(@sep) # if we see a stray quote
|
151
|
+
unless @excel && match.chomp!(@seq) # unless an excel literal, fix it
|
152
|
+
match << (scan_until(@eoc) or bomb "stray quote")
|
153
|
+
scan(@sep)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
tokens = match.split(@sep, -1)
|
157
|
+
@strip ? tokens.map!(&:strip) : tokens
|
158
|
+
elsif scan(@sep)
|
159
|
+
match = scan(@seps)
|
160
|
+
match ? match.split(@sep, -1) : @es
|
161
|
+
else
|
162
|
+
scan(@eol)
|
163
|
+
nil
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
def each
|
168
|
+
@rows ||= parse
|
169
|
+
@rows.each {|row| yield row }
|
170
|
+
end
|
171
|
+
|
172
|
+
def export(**opts)
|
173
|
+
out = opts.empty? ? self : self.class.writer(**opts)
|
174
|
+
each {|row| out << row }
|
175
|
+
end
|
176
|
+
|
177
|
+
# ==[ Helpers ]==
|
178
|
+
|
179
|
+
# returns 2 (must be quoted and escaped), 1 (must be quoted), 0 (neither)
|
180
|
+
def grok(str)
|
181
|
+
if idx = str.index(@escapes)
|
182
|
+
$1 ? 2 : str.index(@quotes, idx) ? 2 : 1
|
183
|
+
else
|
184
|
+
0
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
# output a row
|
189
|
+
def <<(row)
|
190
|
+
|
191
|
+
# drop trailing empty columns
|
192
|
+
row.pop while row.last.empty? if @drop
|
193
|
+
|
194
|
+
s,q = @sep, @quote
|
195
|
+
out = case @mode
|
196
|
+
when :compact
|
197
|
+
case @excel ? 2 : grok(row.join)
|
198
|
+
when 0
|
199
|
+
row
|
200
|
+
when 1
|
201
|
+
row.map do |col|
|
202
|
+
col.match?(@quotable) ? "#{q}#{col}#{q}" : col
|
203
|
+
end
|
204
|
+
else
|
205
|
+
row.map do |col|
|
206
|
+
@excel && col =~ @leadzero ? "=#{q}#{col}#{q}" :
|
207
|
+
case grok(col)
|
208
|
+
when 0 then col
|
209
|
+
when 1 then "#{q}#{col}#{q}"
|
210
|
+
else "#{q}#{col.gsub(q, @esc)}#{q}"
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
214
|
+
when :full
|
215
|
+
if @excel
|
216
|
+
row.map do |col|
|
217
|
+
col =~ @leadzero ? "=#{q}#{col}#{q}" : "#{q}#{col.gsub(q, @esc)}#{q}"
|
218
|
+
end
|
219
|
+
else
|
220
|
+
row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
|
221
|
+
end
|
222
|
+
end.join(s)
|
223
|
+
|
224
|
+
@out << out + @rowsep
|
225
|
+
end
|
226
|
+
|
227
|
+
def stats
|
228
|
+
wide = string.size.to_s.size
|
229
|
+
puts "%#{wide}d rows" % @rows.size
|
230
|
+
puts "%#{wide}d columns" % @cols
|
231
|
+
puts "%#{wide}d cells" % @cells
|
232
|
+
puts "%#{wide}d bytes" % string.size
|
233
|
+
end
|
234
|
+
|
235
|
+
def bomb(msg)
|
236
|
+
abort "\n#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
if __FILE__ == $0
|
241
|
+
raw = DATA.gets("\n\n").chomp
|
242
|
+
# raw = File.read(ARGV.first || "lc-2023.csv")
|
243
|
+
csv = Censive.new(raw, excel: true, relax: true)
|
244
|
+
csv.export # (excel: true) # sep: "|")
|
245
|
+
end
|
246
|
+
|
247
|
+
__END__
|
248
|
+
"Don",="007",10,"Ed"
|
249
|
+
Name,Age,,,Shoe,,,
|
250
|
+
"Alice",27,5
|
251
|
+
Bob,33,10 1/2
|
252
|
+
Charlie or "Chuck",=B2 + B3,9
|
253
|
+
Subtotal,=sum(B2:B5),="01234"
|
254
|
+
|
255
|
+
A,B,C,D
|
256
|
+
A,B,"C",D
|
257
|
+
A,B,C",D
|
258
|
+
A,B,"C",D
|
259
|
+
|
260
|
+
# first line works in "relax" mode, bottom line is compliant
|
261
|
+
123,"CHO, JOELLE "JOJO"",456
|
262
|
+
123,"CHO, JOELLE ""JOJO""",456
|
263
|
+
|
264
|
+
# Excel mode checking
|
265
|
+
=,=x,x=,="x",="","","=",123,0123,="123",="0123"
|
266
|
+
,=x,x=,x,,,,,,=,,123,="0123",123,,="0123" # <= a little off
|