censive 0.19 → 0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/censive.gemspec +1 -1
- data/diagram/NFA to Regex.pdf +0 -0
- data/diagram/censive@ce9d51d.png +0 -0
- data/diagram/csv-ragel.dot +24 -0
- data/diagram/csv.dot +57 -0
- data/diagram/csv.png +0 -0
- data/diagram/csv.rl +45 -0
- data/diagram/csv.svg +270 -0
- data/diagram/diagram.dot +26 -0
- data/diagram/diagram.rl +50 -0
- data/lib/censive.rb +124 -91
- data/lib/censive.rb-20230208182732 +266 -0
- data/lib/censive.rb-20230208195221 +276 -0
- data/lib/censive.rb-20230209050227 +282 -0
- data/lib/flay.rb +227 -0
- data/lib/test-censive.rb +12 -0
- data/lib/test-csv.rb +12 -0
- metadata +17 -2
data/lib/censive.rb
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
# censive - A quick and lightweight CSV handling library for Ruby
|
5
5
|
#
|
6
6
|
# Author: Steve Shreeve (steve.shreeve@gmail.com)
|
7
|
-
# Date: Feb
|
7
|
+
# Date: Feb 10, 2023
|
8
8
|
#
|
9
9
|
# https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
|
10
10
|
# https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
|
@@ -14,14 +14,22 @@
|
|
14
14
|
# GOALS:
|
15
15
|
# 1. Faster than Ruby's default CSV library
|
16
16
|
# 2. Lightweight code with streamlined and optimized logic
|
17
|
-
# 3. Support most non-compliant CSV variations (
|
17
|
+
# 3. Support most non-compliant CSV variations (@excel, @relax, etc)
|
18
|
+
# 4. Support most commonly used CSV options (@sep, @quote, @strip, @drop, etc)
|
18
19
|
#
|
19
|
-
# TODO:
|
20
|
+
# TODO:
|
21
|
+
# 1. Support IO streaming
|
22
|
+
# 2. Review all encodings, we may be losing speed when mixing encodings
|
23
|
+
# 3. Huge speedup possible if our @unquoted regex reads beyond @cr?@lf's
|
24
|
+
# 4. Will using String#freeze give us a speed up?
|
25
|
+
# 5. Implement support for scan_until(string) <= right now only regex is valid
|
20
26
|
# ============================================================================
|
21
27
|
|
22
28
|
require "strscan"
|
23
29
|
|
24
30
|
class Censive < StringScanner
|
31
|
+
attr :encoding
|
32
|
+
|
25
33
|
def self.parse(...)
|
26
34
|
new(...).parse
|
27
35
|
end
|
@@ -34,83 +42,84 @@ class Censive < StringScanner
|
|
34
42
|
end
|
35
43
|
end
|
36
44
|
|
37
|
-
def initialize(str=
|
38
|
-
drop: false , # drop trailing empty
|
39
|
-
encoding:
|
45
|
+
def initialize(str=nil,
|
46
|
+
drop: false , # drop trailing empty columns?
|
47
|
+
encoding: nil , # character encoding
|
40
48
|
excel: false , # literals ="01" formulas =A1 + B2 http://bit.ly/3Y7jIvc
|
41
49
|
mode: :compact, # export mode: compact or full
|
42
|
-
out:
|
50
|
+
out: nil , # output stream, needs to respond to <<
|
43
51
|
quote: '"' , # quote character
|
44
52
|
relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
|
45
53
|
rowsep: "\n" , # row separator for export
|
46
54
|
sep: "," , # column separator character
|
47
|
-
strip: false , # strip
|
48
|
-
**opts
|
55
|
+
strip: false , # strip columns when reading
|
56
|
+
**opts # grab bag
|
49
57
|
)
|
50
|
-
# data source
|
51
|
-
str
|
58
|
+
# initialize data source
|
59
|
+
if str && str.size < 100 && File.readable?(str)
|
60
|
+
str = File.open(str, encoding ? "r:#{encoding}" : "r").read
|
61
|
+
else
|
62
|
+
str ||= ""
|
63
|
+
str = str.encode(encoding) if encoding
|
64
|
+
end
|
52
65
|
super(str)
|
53
66
|
reset
|
54
67
|
|
55
|
-
# options
|
68
|
+
# config options
|
69
|
+
@cheat = true
|
56
70
|
@drop = drop
|
71
|
+
@encoding = str.encoding
|
57
72
|
@excel = excel
|
58
73
|
@mode = mode
|
59
|
-
@out = out
|
60
|
-
@quote = quote
|
74
|
+
@out = out || $stdout
|
61
75
|
@relax = relax
|
76
|
+
@strip = strip
|
77
|
+
|
78
|
+
# config strings
|
79
|
+
@quote = quote
|
62
80
|
@rowsep = rowsep
|
63
81
|
@sep = sep
|
64
|
-
@strip = strip
|
65
82
|
|
66
|
-
#
|
67
|
-
@cr
|
68
|
-
@lf
|
69
|
-
@es
|
70
|
-
@eq
|
71
|
-
|
72
|
-
|
73
|
-
@
|
83
|
+
# static strings
|
84
|
+
@cr = "\r"
|
85
|
+
@lf = "\n"
|
86
|
+
@es = ""
|
87
|
+
@eq = "="
|
88
|
+
|
89
|
+
# combinations
|
90
|
+
@esc = (@quote * 2)
|
91
|
+
@seq = [@sep, @eq].join # used for parsing in excel mode
|
92
|
+
|
93
|
+
# regexes
|
94
|
+
@eoc = /(?=#{"\\" + @sep}|#{@cr}|#{@lf}|\z)/o # end of cell
|
95
|
+
@eol = /#{@cr}#{@lf}?|#{@lf}/o # end of line
|
96
|
+
@escapes = /(#{@quote})|#{"\\"+@sep}|#{@cr}|#{@lf}/o
|
97
|
+
@quotable = /#{"\\"+@sep}|#{@cr}|#{@lf}/o
|
98
|
+
@quotes = /#{@quote}/o
|
99
|
+
@seps = /#{@sep}+/o
|
100
|
+
@quoted = @excel ? /(?:=)?#{@quote}/o : @quote
|
101
|
+
@unquoted = /[^#{@sep}#{@cr}#{@lf}][^#{@quote}#{@cr}#{@lf}]*/o
|
102
|
+
@leadzero = /\A0\d*\z/
|
74
103
|
end
|
75
104
|
|
76
105
|
def reset(str=nil)
|
77
|
-
self.string = str if str
|
78
|
-
super()
|
79
106
|
@rows = nil
|
80
107
|
@cols = @cells = 0
|
81
|
-
end
|
82
|
-
|
83
|
-
# ==[ Lexer ]==
|
84
|
-
|
85
|
-
def next_token
|
86
|
-
excel = true if @excel && scan(@eq)
|
87
|
-
|
88
|
-
if scan(@quote) # consume quoted cell
|
89
|
-
token = ""
|
90
|
-
while true
|
91
|
-
token << (scan_until(/#{@quote}/o) or bomb "unclosed quote")[0..-2]
|
92
|
-
token << @quote and next if scan(@quote)
|
93
|
-
break if scan(@eoc)
|
94
|
-
@relax or bomb "invalid character after quote"
|
95
|
-
token << @quote + (scan_until(/#{@quote}/o) or bomb "bad inline quote")
|
96
|
-
end
|
97
|
-
elsif scan(@sep) then return @es
|
98
|
-
elsif scan(@eol) then return nil
|
99
|
-
else # consume unquoted cell
|
100
|
-
token = scan_until(@eoc) or bomb "unexpected character"
|
101
|
-
token.prepend(@eq) if excel
|
102
|
-
end
|
103
|
-
scan(@sep)
|
104
|
-
@strip ? token.strip : token
|
105
|
-
end
|
106
108
|
|
107
|
-
|
108
|
-
|
109
|
+
self.string = str if str
|
110
|
+
@encoding = string.encoding
|
111
|
+
super()
|
109
112
|
end
|
110
113
|
|
111
114
|
# ==[ Parser ]==
|
112
115
|
|
113
116
|
def parse
|
117
|
+
|
118
|
+
# TODO: crazy optimization if NO QUOTES in rest
|
119
|
+
# unless rest.include?(@quote)
|
120
|
+
# @rows = rest...
|
121
|
+
# end
|
122
|
+
|
114
123
|
@rows = []
|
115
124
|
while row = next_row
|
116
125
|
@rows << row
|
@@ -122,18 +131,71 @@ class Censive < StringScanner
|
|
122
131
|
end
|
123
132
|
|
124
133
|
def next_row
|
134
|
+
if @cheat and line = scan_until(@eol)
|
135
|
+
row = line.chomp!.split(@sep, -1)
|
136
|
+
row.each do |col|
|
137
|
+
next if (saw = col.count(@quote)).zero?
|
138
|
+
next if (saw == 2) && col.delete_prefix!(@quote) && col.delete_suffix!(@quote)
|
139
|
+
@cheat = false
|
140
|
+
break
|
141
|
+
end if line.include?(@quote)
|
142
|
+
@cheat and return @strip ? row.each(&:strip!) : row
|
143
|
+
unscan
|
144
|
+
end
|
145
|
+
|
125
146
|
token = next_token or return
|
126
|
-
row = [
|
127
|
-
row
|
147
|
+
row = []
|
148
|
+
row.push(*token)
|
149
|
+
row.push(*token) while token = next_token
|
128
150
|
row
|
129
151
|
end
|
130
152
|
|
153
|
+
def next_token
|
154
|
+
if scan(@quoted) # quoted cell
|
155
|
+
token = ""
|
156
|
+
while true
|
157
|
+
token << (scan_until(@quotes) or bomb "unclosed quote")[0..-2]
|
158
|
+
token << @quote and next if scan(@quote)
|
159
|
+
scan(@eoc) and break
|
160
|
+
@relax or bomb "invalid character after quote"
|
161
|
+
token << @quote + (scan_until(@quotes) or bomb "bad inline quote")
|
162
|
+
end
|
163
|
+
scan(@sep)
|
164
|
+
@strip ? token.strip : token
|
165
|
+
elsif match = scan(@unquoted) # unquoted cell(s)
|
166
|
+
if check(@quote) && !match.chomp!(@sep) # if we see a stray quote
|
167
|
+
unless @excel && match.chomp!(@seq) # unless an excel literal, fix it
|
168
|
+
match << (scan_until(@eoc) or bomb "stray quote")
|
169
|
+
scan(@sep)
|
170
|
+
end
|
171
|
+
end
|
172
|
+
tokens = match.split(@sep, -1)
|
173
|
+
@strip ? tokens.map!(&:strip) : tokens
|
174
|
+
elsif scan(@sep)
|
175
|
+
match = scan(@seps)
|
176
|
+
match ? match.split(@sep, -1) : @es
|
177
|
+
else
|
178
|
+
scan(@eol)
|
179
|
+
nil
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
def each
|
184
|
+
@rows ||= parse
|
185
|
+
@rows.each {|row| yield row }
|
186
|
+
end
|
187
|
+
|
188
|
+
def export(**opts)
|
189
|
+
out = opts.empty? ? self : self.class.writer(**opts)
|
190
|
+
each {|row| out << row }
|
191
|
+
end
|
192
|
+
|
131
193
|
# ==[ Helpers ]==
|
132
194
|
|
133
195
|
# returns 2 (must be quoted and escaped), 1 (must be quoted), 0 (neither)
|
134
196
|
def grok(str)
|
135
|
-
if idx = str.index(
|
136
|
-
$1 ? 2 : str.index(
|
197
|
+
if idx = str.index(@escapes)
|
198
|
+
$1 ? 2 : str.index(@quotes, idx) ? 2 : 1
|
137
199
|
else
|
138
200
|
0
|
139
201
|
end
|
@@ -153,11 +215,11 @@ class Censive < StringScanner
|
|
153
215
|
row
|
154
216
|
when 1
|
155
217
|
row.map do |col|
|
156
|
-
col.match?(
|
218
|
+
col.match?(@quotable) ? "#{q}#{col}#{q}" : col
|
157
219
|
end
|
158
220
|
else
|
159
221
|
row.map do |col|
|
160
|
-
@excel && col =~
|
222
|
+
@excel && col =~ @leadzero ? "=#{q}#{col}#{q}" :
|
161
223
|
case grok(col)
|
162
224
|
when 0 then col
|
163
225
|
when 1 then "#{q}#{col}#{q}"
|
@@ -168,7 +230,7 @@ class Censive < StringScanner
|
|
168
230
|
when :full
|
169
231
|
if @excel
|
170
232
|
row.map do |col|
|
171
|
-
col =~
|
233
|
+
col =~ @leadzero ? "=#{q}#{col}#{q}" : "#{q}#{col.gsub(q, @esc)}#{q}"
|
172
234
|
end
|
173
235
|
else
|
174
236
|
row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
|
@@ -178,16 +240,6 @@ class Censive < StringScanner
|
|
178
240
|
@out << out + @rowsep
|
179
241
|
end
|
180
242
|
|
181
|
-
def each
|
182
|
-
@rows ||= parse
|
183
|
-
@rows.each {|row| yield row }
|
184
|
-
end
|
185
|
-
|
186
|
-
def export(**opts)
|
187
|
-
out = opts.empty? ? self : self.class.writer(**opts)
|
188
|
-
each {|row| out << row }
|
189
|
-
end
|
190
|
-
|
191
243
|
def stats
|
192
244
|
wide = string.size.to_s.size
|
193
245
|
puts "%#{wide}d rows" % @rows.size
|
@@ -195,27 +247,8 @@ class Censive < StringScanner
|
|
195
247
|
puts "%#{wide}d cells" % @cells
|
196
248
|
puts "%#{wide}d bytes" % string.size
|
197
249
|
end
|
198
|
-
end
|
199
250
|
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
csv = Censive.new(raw, excel: true, relax: true)
|
204
|
-
csv.export # (sep: ",", excel: true)
|
251
|
+
def bomb(msg)
|
252
|
+
abort "\n#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
|
253
|
+
end
|
205
254
|
end
|
206
|
-
|
207
|
-
__END__
|
208
|
-
Name,Age,Shoe
|
209
|
-
Alice,27,5
|
210
|
-
Bob,33,10 1/2
|
211
|
-
Charlie or "Chuck",=B2 + B3,9
|
212
|
-
"Doug E Fresh",="007",10
|
213
|
-
Subtotal,=sum(B2:B5),="01234"
|
214
|
-
|
215
|
-
# first line works in "relax" mode, bottom line is compliant
|
216
|
-
123,"CHO, JOELLE "JOJO"",456
|
217
|
-
123,"CHO, JOELLE ""JOJO""",456
|
218
|
-
|
219
|
-
# Excel mode checking
|
220
|
-
=,=x,x=,="x",="","","=",123,0123,="123",="0123"
|
221
|
-
,=x,x=,x,,,,,,=,,123,="0123",123,,="0123" # <= a little off
|
@@ -0,0 +1,266 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# ============================================================================
|
4
|
+
# censive - A quick and lightweight CSV handling library for Ruby
|
5
|
+
#
|
6
|
+
# Author: Steve Shreeve (steve.shreeve@gmail.com)
|
7
|
+
# Date: Feb 8, 2023
|
8
|
+
#
|
9
|
+
# https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
|
10
|
+
# https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
|
11
|
+
#
|
12
|
+
# Thanks to Sutou Kouhei (kou) for his excellent advice on using scan
|
13
|
+
# ============================================================================
|
14
|
+
# GOALS:
|
15
|
+
# 1. Faster than Ruby's default CSV library
|
16
|
+
# 2. Lightweight code with streamlined and optimized logic
|
17
|
+
# 3. Support most non-compliant CSV variations (eg - @excel, @relax, @strip)
|
18
|
+
#
|
19
|
+
# TODO:
|
20
|
+
# 1. Support IO streaming
|
21
|
+
# 2. Review all encodings, we may be losing speed when mixing encodings
|
22
|
+
# 3. Huge speedup possible if our @unquoted regex reads beyond @cr?@lf's
|
23
|
+
# 4. Will using String#freeze give us a speed up?
|
24
|
+
# 5. Implement support for scan_until(string) <= right now only regex is valid
|
25
|
+
# ============================================================================
|
26
|
+
|
27
|
+
require "strscan"
|
28
|
+
|
29
|
+
class Censive < StringScanner
|
30
|
+
attr :encoding
|
31
|
+
|
32
|
+
def self.parse(...)
|
33
|
+
new(...).parse
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.writer(obj=nil, **opts, &code)
|
37
|
+
case obj
|
38
|
+
when String then File.open(obj, "w") {|io| yield new(out: io, **opts, &code) }
|
39
|
+
when IO,nil then new(out: obj, **opts, &code)
|
40
|
+
else abort "#{File.basename($0)}: invalid #{obj.class} object in writer"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def initialize(str=nil,
|
45
|
+
drop: false , # drop trailing empty fields?
|
46
|
+
encoding: nil , # character encoding
|
47
|
+
excel: false , # literals ="01" formulas =A1 + B2 http://bit.ly/3Y7jIvc
|
48
|
+
mode: :compact, # export mode: compact or full
|
49
|
+
out: nil , # output stream, needs to respond to <<
|
50
|
+
quote: '"' , # quote character
|
51
|
+
relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
|
52
|
+
rowsep: "\n" , # row separator for export
|
53
|
+
sep: "," , # column separator character
|
54
|
+
strip: false , # strip fields when reading
|
55
|
+
**opts # grab bag
|
56
|
+
)
|
57
|
+
# initialize data source
|
58
|
+
if str && str.size < 100 && File.readable?(str)
|
59
|
+
str = File.open(str, encoding ? "r:#{encoding}" : "r").read
|
60
|
+
else
|
61
|
+
str ||= ""
|
62
|
+
str = str.encode(encoding) if encoding
|
63
|
+
end
|
64
|
+
super(str)
|
65
|
+
reset
|
66
|
+
|
67
|
+
# config options
|
68
|
+
@drop = drop
|
69
|
+
@encoding = str.encoding
|
70
|
+
@excel = excel
|
71
|
+
@mode = mode
|
72
|
+
@out = out || $stdout
|
73
|
+
@relax = relax
|
74
|
+
@strip = strip
|
75
|
+
|
76
|
+
# config strings
|
77
|
+
@quote = quote
|
78
|
+
@rowsep = rowsep
|
79
|
+
@sep = sep
|
80
|
+
|
81
|
+
# static strings
|
82
|
+
@cr = "\r"
|
83
|
+
@lf = "\n"
|
84
|
+
@es = ""
|
85
|
+
@eq = "="
|
86
|
+
|
87
|
+
# combinations
|
88
|
+
@esc = (@quote * 2)
|
89
|
+
@seq = [@sep, @eq].join # used for parsing in excel mode
|
90
|
+
|
91
|
+
#!# TODO: come up with a clean way to escape/encode all this
|
92
|
+
#!# TODO: maybe define @tokens = "#{@quote}#{@sep}#{@cr}#{@lf}", etc.
|
93
|
+
|
94
|
+
# regexes
|
95
|
+
@eoc = /(?=#{"\\" + @sep}|#{@cr}|#{@lf}|\z)/o # end of cell
|
96
|
+
@eol = /#{@cr}#{@lf}?|#{@lf}/o # end of line
|
97
|
+
@escapes = /(#{@quote})|#{"\\"+@sep}|#{@cr}|#{@lf}/o
|
98
|
+
@quotable = /#{"\\"+@sep}|#{@cr}|#{@lf}/o
|
99
|
+
@quotes = /#{@quote}/o
|
100
|
+
@seps = /#{@sep}+/o
|
101
|
+
@quoted = @excel ? /(?:=)?#{@quote}/o : @quote
|
102
|
+
@unquoted = /[^#{@sep}#{@cr}#{@lf}][^#{@quote}#{@cr}#{@lf}]*/o
|
103
|
+
@leadzero = /\A0\d*\z/
|
104
|
+
end
|
105
|
+
|
106
|
+
def reset(str=nil)
|
107
|
+
@rows = nil
|
108
|
+
@cols = @cells = 0
|
109
|
+
|
110
|
+
#!# TODO: reset all encodings?
|
111
|
+
self.string = str if str
|
112
|
+
@encoding = string.encoding
|
113
|
+
super()
|
114
|
+
end
|
115
|
+
|
116
|
+
# ==[ Parser ]==
|
117
|
+
|
118
|
+
def parse
|
119
|
+
@rows = []
|
120
|
+
while row = next_row
|
121
|
+
@rows << row
|
122
|
+
count = row.size
|
123
|
+
@cols = count if count > @cols
|
124
|
+
@cells += count
|
125
|
+
end
|
126
|
+
@rows
|
127
|
+
end
|
128
|
+
|
129
|
+
def next_row
|
130
|
+
token = next_token or return
|
131
|
+
row = []
|
132
|
+
row.push(*token)
|
133
|
+
row.push(*token) while token = next_token
|
134
|
+
row
|
135
|
+
end
|
136
|
+
|
137
|
+
def next_token
|
138
|
+
if scan(@quoted) # quoted cell
|
139
|
+
token = ""
|
140
|
+
while true
|
141
|
+
token << (scan_until(@quotes) or bomb "unclosed quote")[0..-2]
|
142
|
+
token << @quote and next if scan(@quote)
|
143
|
+
scan(@eoc) and break
|
144
|
+
@relax or bomb "invalid character after quote"
|
145
|
+
token << @quote + (scan_until(@quotes) or bomb "bad inline quote")
|
146
|
+
end
|
147
|
+
scan(@sep)
|
148
|
+
@strip ? token.strip : token
|
149
|
+
elsif match = scan(@unquoted) # unquoted cell(s)
|
150
|
+
if check(@quote) && !match.chomp!(@sep) # if we see a stray quote
|
151
|
+
unless @excel && match.chomp!(@seq) # unless an excel literal, fix it
|
152
|
+
match << (scan_until(@eoc) or bomb "stray quote")
|
153
|
+
scan(@sep)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
tokens = match.split(@sep, -1)
|
157
|
+
@strip ? tokens.map!(&:strip) : tokens
|
158
|
+
elsif scan(@sep)
|
159
|
+
match = scan(@seps)
|
160
|
+
match ? match.split(@sep, -1) : @es
|
161
|
+
else
|
162
|
+
scan(@eol)
|
163
|
+
nil
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
def each
|
168
|
+
@rows ||= parse
|
169
|
+
@rows.each {|row| yield row }
|
170
|
+
end
|
171
|
+
|
172
|
+
def export(**opts)
|
173
|
+
out = opts.empty? ? self : self.class.writer(**opts)
|
174
|
+
each {|row| out << row }
|
175
|
+
end
|
176
|
+
|
177
|
+
# ==[ Helpers ]==
|
178
|
+
|
179
|
+
# returns 2 (must be quoted and escaped), 1 (must be quoted), 0 (neither)
|
180
|
+
def grok(str)
|
181
|
+
if idx = str.index(@escapes)
|
182
|
+
$1 ? 2 : str.index(@quotes, idx) ? 2 : 1
|
183
|
+
else
|
184
|
+
0
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
# output a row
|
189
|
+
def <<(row)
|
190
|
+
|
191
|
+
# drop trailing empty columns
|
192
|
+
row.pop while row.last.empty? if @drop
|
193
|
+
|
194
|
+
s,q = @sep, @quote
|
195
|
+
out = case @mode
|
196
|
+
when :compact
|
197
|
+
case @excel ? 2 : grok(row.join)
|
198
|
+
when 0
|
199
|
+
row
|
200
|
+
when 1
|
201
|
+
row.map do |col|
|
202
|
+
col.match?(@quotable) ? "#{q}#{col}#{q}" : col
|
203
|
+
end
|
204
|
+
else
|
205
|
+
row.map do |col|
|
206
|
+
@excel && col =~ @leadzero ? "=#{q}#{col}#{q}" :
|
207
|
+
case grok(col)
|
208
|
+
when 0 then col
|
209
|
+
when 1 then "#{q}#{col}#{q}"
|
210
|
+
else "#{q}#{col.gsub(q, @esc)}#{q}"
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
214
|
+
when :full
|
215
|
+
if @excel
|
216
|
+
row.map do |col|
|
217
|
+
col =~ @leadzero ? "=#{q}#{col}#{q}" : "#{q}#{col.gsub(q, @esc)}#{q}"
|
218
|
+
end
|
219
|
+
else
|
220
|
+
row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
|
221
|
+
end
|
222
|
+
end.join(s)
|
223
|
+
|
224
|
+
@out << out + @rowsep
|
225
|
+
end
|
226
|
+
|
227
|
+
def stats
|
228
|
+
wide = string.size.to_s.size
|
229
|
+
puts "%#{wide}d rows" % @rows.size
|
230
|
+
puts "%#{wide}d columns" % @cols
|
231
|
+
puts "%#{wide}d cells" % @cells
|
232
|
+
puts "%#{wide}d bytes" % string.size
|
233
|
+
end
|
234
|
+
|
235
|
+
def bomb(msg)
|
236
|
+
abort "\n#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
if __FILE__ == $0
|
241
|
+
raw = DATA.gets("\n\n").chomp
|
242
|
+
# raw = File.read(ARGV.first || "lc-2023.csv")
|
243
|
+
csv = Censive.new(raw, excel: true, relax: true)
|
244
|
+
csv.export # (excel: true) # sep: "|")
|
245
|
+
end
|
246
|
+
|
247
|
+
__END__
|
248
|
+
"Don",="007",10,"Ed"
|
249
|
+
Name,Age,,,Shoe,,,
|
250
|
+
"Alice",27,5
|
251
|
+
Bob,33,10 1/2
|
252
|
+
Charlie or "Chuck",=B2 + B3,9
|
253
|
+
Subtotal,=sum(B2:B5),="01234"
|
254
|
+
|
255
|
+
A,B,C,D
|
256
|
+
A,B,"C",D
|
257
|
+
A,B,C",D
|
258
|
+
A,B,"C",D
|
259
|
+
|
260
|
+
# first line works in "relax" mode, bottom line is compliant
|
261
|
+
123,"CHO, JOELLE "JOJO"",456
|
262
|
+
123,"CHO, JOELLE ""JOJO""",456
|
263
|
+
|
264
|
+
# Excel mode checking
|
265
|
+
=,=x,x=,="x",="","","=",123,0123,="123",="0123"
|
266
|
+
,=x,x=,x,,,,,,=,,123,="0123",123,,="0123" # <= a little off
|