censive 0.19 → 0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/censive.gemspec +1 -1
- data/diagram/NFA to Regex.pdf +0 -0
- data/diagram/censive@ce9d51d.png +0 -0
- data/diagram/csv-ragel.dot +24 -0
- data/diagram/csv.dot +57 -0
- data/diagram/csv.png +0 -0
- data/diagram/csv.rl +45 -0
- data/diagram/csv.svg +270 -0
- data/diagram/diagram.dot +26 -0
- data/diagram/diagram.rl +50 -0
- data/lib/censive.rb +124 -91
- data/lib/censive.rb-20230208182732 +266 -0
- data/lib/censive.rb-20230208195221 +276 -0
- data/lib/censive.rb-20230209050227 +282 -0
- data/lib/flay.rb +227 -0
- data/lib/test-censive.rb +12 -0
- data/lib/test-csv.rb +12 -0
- metadata +17 -2
@@ -0,0 +1,276 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# ============================================================================
|
4
|
+
# censive - A quick and lightweight CSV handling library for Ruby
|
5
|
+
#
|
6
|
+
# Author: Steve Shreeve (steve.shreeve@gmail.com)
|
7
|
+
# Date: Feb 8, 2023
|
8
|
+
#
|
9
|
+
# https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
|
10
|
+
# https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
|
11
|
+
#
|
12
|
+
# Thanks to Sutou Kouhei (kou) for his excellent advice on using scan
|
13
|
+
# ============================================================================
|
14
|
+
# GOALS:
|
15
|
+
# 1. Faster than Ruby's default CSV library
|
16
|
+
# 2. Lightweight code with streamlined and optimized logic
|
17
|
+
# 3. Support most non-compliant CSV variations (eg - @excel, @relax, @strip)
|
18
|
+
#
|
19
|
+
# TODO:
|
20
|
+
# 1. Support IO streaming
|
21
|
+
# 2. Review all encodings, we may be losing speed when mixing encodings
|
22
|
+
# 3. Huge speedup possible if our @unquoted regex reads beyond @cr?@lf's
|
23
|
+
# 4. Will using String#freeze give us a speed up?
|
24
|
+
# 5. Implement support for scan_until(string) <= right now only regex is valid
|
25
|
+
# ============================================================================
|
26
|
+
|
27
|
+
require "strscan"
|
28
|
+
|
29
|
+
class Censive < StringScanner
|
30
|
+
attr :encoding
|
31
|
+
|
32
|
+
def self.parse(...)
|
33
|
+
new(...).parse
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.writer(obj=nil, **opts, &code)
|
37
|
+
case obj
|
38
|
+
when String then File.open(obj, "w") {|io| yield new(out: io, **opts, &code) }
|
39
|
+
when IO,nil then new(out: obj, **opts, &code)
|
40
|
+
else abort "#{File.basename($0)}: invalid #{obj.class} object in writer"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def initialize(str=nil,
|
45
|
+
drop: false , # drop trailing empty fields?
|
46
|
+
encoding: nil , # character encoding
|
47
|
+
excel: false , # literals ="01" formulas =A1 + B2 http://bit.ly/3Y7jIvc
|
48
|
+
mode: :compact, # export mode: compact or full
|
49
|
+
out: nil , # output stream, needs to respond to <<
|
50
|
+
quote: '"' , # quote character
|
51
|
+
relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
|
52
|
+
rowsep: "\n" , # row separator for export
|
53
|
+
sep: "," , # column separator character
|
54
|
+
strip: false , # strip fields when reading
|
55
|
+
**opts # grab bag
|
56
|
+
)
|
57
|
+
# initialize data source
|
58
|
+
if str && str.size < 100 && File.readable?(str)
|
59
|
+
str = File.open(str, encoding ? "r:#{encoding}" : "r").read
|
60
|
+
else
|
61
|
+
str ||= ""
|
62
|
+
str = str.encode(encoding) if encoding
|
63
|
+
end
|
64
|
+
super(str)
|
65
|
+
reset
|
66
|
+
|
67
|
+
# config options
|
68
|
+
@drop = drop
|
69
|
+
@encoding = str.encoding
|
70
|
+
@excel = excel
|
71
|
+
@mode = mode
|
72
|
+
@out = out || $stdout
|
73
|
+
@relax = relax
|
74
|
+
@strip = strip
|
75
|
+
|
76
|
+
# config strings
|
77
|
+
@quote = quote
|
78
|
+
@rowsep = rowsep
|
79
|
+
@sep = sep
|
80
|
+
|
81
|
+
# static strings
|
82
|
+
@cr = "\r"
|
83
|
+
@lf = "\n"
|
84
|
+
@es = ""
|
85
|
+
@eq = "="
|
86
|
+
|
87
|
+
# combinations
|
88
|
+
@esc = (@quote * 2)
|
89
|
+
@seq = [@sep, @eq].join # used for parsing in excel mode
|
90
|
+
|
91
|
+
#!# TODO: come up with a clean way to escape/encode all this
|
92
|
+
#!# TODO: maybe define @tokens = "#{@quote}#{@sep}#{@cr}#{@lf}", etc.
|
93
|
+
|
94
|
+
# regexes
|
95
|
+
@eoc = /(?=#{"\\" + @sep}|#{@cr}|#{@lf}|\z)/o # end of cell
|
96
|
+
@eol = /#{@cr}#{@lf}?|#{@lf}/o # end of line
|
97
|
+
@escapes = /(#{@quote})|#{"\\"+@sep}|#{@cr}|#{@lf}/o
|
98
|
+
@quotable = /#{"\\"+@sep}|#{@cr}|#{@lf}/o
|
99
|
+
@quotes = /#{@quote}/o
|
100
|
+
@seps = /#{@sep}+/o
|
101
|
+
@quoted = @excel ? /(?:=)?#{@quote}/o : @quote
|
102
|
+
@unquoted = /[^#{@sep}#{@cr}#{@lf}][^#{@quote}]*/o
|
103
|
+
@leadzero = /\A0\d*\z/
|
104
|
+
end
|
105
|
+
|
106
|
+
def reset(str=nil)
|
107
|
+
@rows = nil
|
108
|
+
@cols = @cells = 0
|
109
|
+
|
110
|
+
#!# TODO: reset all encodings?
|
111
|
+
self.string = str if str
|
112
|
+
@encoding = string.encoding
|
113
|
+
super()
|
114
|
+
end
|
115
|
+
|
116
|
+
# ==[ Parser ]==
|
117
|
+
|
118
|
+
def parse
|
119
|
+
@rows = []
|
120
|
+
@hold = []
|
121
|
+
while row = next_row
|
122
|
+
@rows << row
|
123
|
+
count = row.size
|
124
|
+
@cols = count if count > @cols
|
125
|
+
@cells += count
|
126
|
+
end
|
127
|
+
@rows
|
128
|
+
end
|
129
|
+
|
130
|
+
def next_row
|
131
|
+
token = next_token or return
|
132
|
+
row = []
|
133
|
+
row.push(*token)
|
134
|
+
row.push(*token) while token = next_token
|
135
|
+
row
|
136
|
+
end
|
137
|
+
|
138
|
+
def next_token
|
139
|
+
@hold.empty? or return @hold.shift
|
140
|
+
if scan(@quoted) # quoted cell
|
141
|
+
token = ""
|
142
|
+
while true
|
143
|
+
token << (scan_until(@quotes) or bomb "unclosed quote")[0..-2]
|
144
|
+
token << @quote and next if scan(@quote)
|
145
|
+
scan(@eoc) and break
|
146
|
+
@relax or bomb "invalid character after quote"
|
147
|
+
token << @quote + (scan_until(@quotes) or bomb "bad inline quote")
|
148
|
+
end
|
149
|
+
scan(@sep)
|
150
|
+
@strip ? token.strip : token
|
151
|
+
elsif match = scan(@unquoted) # unquoted cell(s)
|
152
|
+
if check(@quote) && !match.chomp!(@sep) && !match.end_with?(@cr, @lf)
|
153
|
+
unless @excel && match.chomp!(@seq) # unless an excel literal, fix it
|
154
|
+
match << (scan_until(@eoc) or bomb "stray quote")
|
155
|
+
scan(@sep)
|
156
|
+
end
|
157
|
+
end
|
158
|
+
match.split(@eol, -1).each_with_index do |line, i|
|
159
|
+
if line.empty?
|
160
|
+
@hold.push(nil)
|
161
|
+
else
|
162
|
+
@hold.push(nil) if i > 0
|
163
|
+
cells = line.split(@sep, -1)
|
164
|
+
@hold.push(@strip ? cells.map!(&:strip) : cells)
|
165
|
+
end
|
166
|
+
end
|
167
|
+
@hold.shift
|
168
|
+
elsif scan(@sep)
|
169
|
+
match = scan(@seps)
|
170
|
+
match ? match.split(@sep, -1) : @es
|
171
|
+
else
|
172
|
+
scan(@eol)
|
173
|
+
nil
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def each
|
178
|
+
@rows ||= parse
|
179
|
+
@rows.each {|row| yield row }
|
180
|
+
end
|
181
|
+
|
182
|
+
def export(**opts)
|
183
|
+
out = opts.empty? ? self : self.class.writer(**opts)
|
184
|
+
each {|row| out << row }
|
185
|
+
end
|
186
|
+
|
187
|
+
# ==[ Helpers ]==
|
188
|
+
|
189
|
+
# returns 2 (must be quoted and escaped), 1 (must be quoted), 0 (neither)
|
190
|
+
def grok(str)
|
191
|
+
if idx = str.index(@escapes)
|
192
|
+
$1 ? 2 : str.index(@quotes, idx) ? 2 : 1
|
193
|
+
else
|
194
|
+
0
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
# output a row
|
199
|
+
def <<(row)
|
200
|
+
|
201
|
+
# drop trailing empty columns
|
202
|
+
row.pop while row.last.empty? if @drop
|
203
|
+
|
204
|
+
s,q = @sep, @quote
|
205
|
+
out = case @mode
|
206
|
+
when :compact
|
207
|
+
case @excel ? 2 : grok(row.join)
|
208
|
+
when 0
|
209
|
+
row
|
210
|
+
when 1
|
211
|
+
row.map do |col|
|
212
|
+
col.match?(@quotable) ? "#{q}#{col}#{q}" : col
|
213
|
+
end
|
214
|
+
else
|
215
|
+
row.map do |col|
|
216
|
+
@excel && col =~ @leadzero ? "=#{q}#{col}#{q}" :
|
217
|
+
case grok(col)
|
218
|
+
when 0 then col
|
219
|
+
when 1 then "#{q}#{col}#{q}"
|
220
|
+
else "#{q}#{col.gsub(q, @esc)}#{q}"
|
221
|
+
end
|
222
|
+
end
|
223
|
+
end
|
224
|
+
when :full
|
225
|
+
if @excel
|
226
|
+
row.map do |col|
|
227
|
+
col =~ @leadzero ? "=#{q}#{col}#{q}" : "#{q}#{col.gsub(q, @esc)}#{q}"
|
228
|
+
end
|
229
|
+
else
|
230
|
+
row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
|
231
|
+
end
|
232
|
+
end.join(s)
|
233
|
+
|
234
|
+
@out << out + @rowsep
|
235
|
+
end
|
236
|
+
|
237
|
+
def stats
|
238
|
+
wide = string.size.to_s.size
|
239
|
+
puts "%#{wide}d rows" % @rows.size
|
240
|
+
puts "%#{wide}d columns" % @cols
|
241
|
+
puts "%#{wide}d cells" % @cells
|
242
|
+
puts "%#{wide}d bytes" % string.size
|
243
|
+
end
|
244
|
+
|
245
|
+
def bomb(msg)
|
246
|
+
abort "\n#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
250
|
+
if __FILE__ == $0
|
251
|
+
raw = DATA.gets("\n\n").chomp
|
252
|
+
# raw = File.read(ARGV.first || "lc-2023.csv")
|
253
|
+
csv = Censive.new(raw, excel: true, relax: true)
|
254
|
+
csv.export # (excel: true) # sep: "|")
|
255
|
+
end
|
256
|
+
|
257
|
+
__END__
|
258
|
+
"Don",="007",10,11,"Ed",20
|
259
|
+
Name,Age,,,Shoe,,,
|
260
|
+
"Alice",27,5
|
261
|
+
Bob,33,10 1/2
|
262
|
+
Charlie or "Chuck",=B2 + B3,9
|
263
|
+
Subtotal,=sum(B2:B5),="01234"
|
264
|
+
|
265
|
+
A,B,C,D
|
266
|
+
A,B,"C",D
|
267
|
+
A,B,C",D
|
268
|
+
A,B,"C",D
|
269
|
+
|
270
|
+
# first line works in "relax" mode, bottom line is compliant
|
271
|
+
123,"CHO, JOELLE "JOJO"",456
|
272
|
+
123,"CHO, JOELLE ""JOJO""",456
|
273
|
+
|
274
|
+
# Excel mode checking
|
275
|
+
=,=x,x=,="x",="","","=",123,0123,="123",="0123"
|
276
|
+
,=x,x=,x,,,,,,=,,123,="0123",123,,="0123" # <= a little off
|
@@ -0,0 +1,282 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# ============================================================================
|
4
|
+
# censive - A quick and lightweight CSV handling library for Ruby
|
5
|
+
#
|
6
|
+
# Author: Steve Shreeve (steve.shreeve@gmail.com)
|
7
|
+
# Date: Feb 9, 2023
|
8
|
+
#
|
9
|
+
# https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
|
10
|
+
# https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
|
11
|
+
#
|
12
|
+
# Thanks to Sutou Kouhei (kou) for his excellent advice on using scan
|
13
|
+
# ============================================================================
|
14
|
+
# GOALS:
|
15
|
+
# 1. Faster than Ruby's default CSV library
|
16
|
+
# 2. Lightweight code with streamlined and optimized logic
|
17
|
+
# 3. Support most non-compliant CSV variations (@excel, @relax, etc)
|
18
|
+
# 4. Support most commonly used CSV options (@sep, @quote, @strip, @drop, etc)
|
19
|
+
#
|
20
|
+
# TODO:
|
21
|
+
# 1. Support IO streaming
|
22
|
+
# 2. Review all encodings, we may be losing speed when mixing encodings
|
23
|
+
# 3. Huge speedup possible if our @unquoted regex reads beyond @cr?@lf's
|
24
|
+
# 4. Will using String#freeze give us a speed up?
|
25
|
+
# 5. Implement support for scan_until(string) <= right now only regex is valid
|
26
|
+
# ============================================================================
|
27
|
+
|
28
|
+
require "strscan"
|
29
|
+
|
30
|
+
class Censive < StringScanner
|
31
|
+
attr :encoding
|
32
|
+
|
33
|
+
def self.parse(...)
|
34
|
+
new(...).parse
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.writer(obj=nil, **opts, &code)
|
38
|
+
case obj
|
39
|
+
when String then File.open(obj, "w") {|io| yield new(out: io, **opts, &code) }
|
40
|
+
when IO,nil then new(out: obj, **opts, &code)
|
41
|
+
else abort "#{File.basename($0)}: invalid #{obj.class} object in writer"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def initialize(str=nil,
|
46
|
+
drop: false , # drop trailing empty columns?
|
47
|
+
encoding: nil , # character encoding
|
48
|
+
excel: false , # literals ="01" formulas =A1 + B2 http://bit.ly/3Y7jIvc
|
49
|
+
mode: :compact, # export mode: compact or full
|
50
|
+
out: nil , # output stream, needs to respond to <<
|
51
|
+
quote: '"' , # quote character
|
52
|
+
relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
|
53
|
+
rowsep: "\n" , # row separator for export
|
54
|
+
sep: "," , # column separator character
|
55
|
+
strip: false , # strip columns when reading
|
56
|
+
**opts # grab bag
|
57
|
+
)
|
58
|
+
# initialize data source
|
59
|
+
if str && str.size < 100 && File.readable?(str)
|
60
|
+
str = File.open(str, encoding ? "r:#{encoding}" : "r").read
|
61
|
+
else
|
62
|
+
str ||= ""
|
63
|
+
str = str.encode(encoding) if encoding
|
64
|
+
end
|
65
|
+
super(str)
|
66
|
+
reset
|
67
|
+
|
68
|
+
# config options
|
69
|
+
@cheat = true
|
70
|
+
@drop = drop
|
71
|
+
@encoding = str.encoding
|
72
|
+
@excel = excel
|
73
|
+
@mode = mode
|
74
|
+
@out = out || $stdout
|
75
|
+
@relax = relax
|
76
|
+
@strip = strip
|
77
|
+
|
78
|
+
# config strings
|
79
|
+
@quote = quote
|
80
|
+
@rowsep = rowsep
|
81
|
+
@sep = sep
|
82
|
+
|
83
|
+
# static strings
|
84
|
+
@cr = "\r"
|
85
|
+
@lf = "\n"
|
86
|
+
@es = ""
|
87
|
+
@eq = "="
|
88
|
+
|
89
|
+
# combinations
|
90
|
+
@esc = (@quote * 2)
|
91
|
+
@seq = [@sep, @eq].join # used for parsing in excel mode
|
92
|
+
|
93
|
+
#!# TODO: come up with a clean way to escape/encode all this
|
94
|
+
#!# TODO: maybe define @tokens = "#{@quote}#{@sep}#{@cr}#{@lf}", etc.
|
95
|
+
|
96
|
+
# regexes
|
97
|
+
@eoc = /(?=#{"\\" + @sep}|#{@cr}|#{@lf}|\z)/o # end of cell
|
98
|
+
@eol = /#{@cr}#{@lf}?|#{@lf}/o # end of line
|
99
|
+
@escapes = /(#{@quote})|#{"\\"+@sep}|#{@cr}|#{@lf}/o
|
100
|
+
@quotable = /#{"\\"+@sep}|#{@cr}|#{@lf}/o
|
101
|
+
@quotes = /#{@quote}/o
|
102
|
+
@seps = /#{@sep}+/o
|
103
|
+
@quoted = @excel ? /(?:=)?#{@quote}/o : @quote
|
104
|
+
@unquoted = /[^#{@sep}#{@cr}#{@lf}][^#{@quote}#{@cr}#{@lf}]*/o
|
105
|
+
@leadzero = /\A0\d*\z/
|
106
|
+
end
|
107
|
+
|
108
|
+
def reset(str=nil)
|
109
|
+
@rows = nil
|
110
|
+
@cols = @cells = 0
|
111
|
+
|
112
|
+
#!# TODO: reset all encodings?
|
113
|
+
self.string = str if str
|
114
|
+
@encoding = string.encoding
|
115
|
+
super()
|
116
|
+
end
|
117
|
+
|
118
|
+
# ==[ Parser ]==
|
119
|
+
|
120
|
+
def parse
|
121
|
+
@rows = []
|
122
|
+
while row = next_row
|
123
|
+
@rows << row
|
124
|
+
count = row.size
|
125
|
+
@cols = count if count > @cols
|
126
|
+
@cells += count
|
127
|
+
end
|
128
|
+
@rows
|
129
|
+
end
|
130
|
+
|
131
|
+
def next_row
|
132
|
+
if @cheat and line = scan_until(@eol)
|
133
|
+
row = line.chomp!.split(@sep, -1)
|
134
|
+
row.each do |col|
|
135
|
+
next if (saw = col.count(@quote)).zero?
|
136
|
+
next if (saw == 2) && col.delete_prefix!(@quote) && col.delete_suffix!(@quote)
|
137
|
+
@cheat = false
|
138
|
+
break
|
139
|
+
end if line.include?(@quote)
|
140
|
+
@cheat and return @strip ? row.each(&:strip!) : row
|
141
|
+
unscan
|
142
|
+
end
|
143
|
+
|
144
|
+
token = next_token or return
|
145
|
+
row = []
|
146
|
+
row.push(*token)
|
147
|
+
row.push(*token) while token = next_token
|
148
|
+
row
|
149
|
+
end
|
150
|
+
|
151
|
+
def next_token
|
152
|
+
if scan(@quoted) # quoted cell
|
153
|
+
token = ""
|
154
|
+
while true
|
155
|
+
token << (scan_until(@quotes) or bomb "unclosed quote")[0..-2]
|
156
|
+
token << @quote and next if scan(@quote)
|
157
|
+
scan(@eoc) and break
|
158
|
+
@relax or bomb "invalid character after quote"
|
159
|
+
token << @quote + (scan_until(@quotes) or bomb "bad inline quote")
|
160
|
+
end
|
161
|
+
scan(@sep)
|
162
|
+
@strip ? token.strip : token
|
163
|
+
elsif match = scan(@unquoted) # unquoted cell(s)
|
164
|
+
if check(@quote) && !match.chomp!(@sep) # if we see a stray quote
|
165
|
+
unless @excel && match.chomp!(@seq) # unless an excel literal, fix it
|
166
|
+
match << (scan_until(@eoc) or bomb "stray quote")
|
167
|
+
scan(@sep)
|
168
|
+
end
|
169
|
+
end
|
170
|
+
tokens = match.split(@sep, -1)
|
171
|
+
@strip ? tokens.map!(&:strip) : tokens
|
172
|
+
elsif scan(@sep)
|
173
|
+
match = scan(@seps)
|
174
|
+
match ? match.split(@sep, -1) : @es
|
175
|
+
else
|
176
|
+
scan(@eol)
|
177
|
+
nil
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
def each
|
182
|
+
@rows ||= parse
|
183
|
+
@rows.each {|row| yield row }
|
184
|
+
end
|
185
|
+
|
186
|
+
def export(**opts)
|
187
|
+
out = opts.empty? ? self : self.class.writer(**opts)
|
188
|
+
each {|row| out << row }
|
189
|
+
end
|
190
|
+
|
191
|
+
# ==[ Helpers ]==
|
192
|
+
|
193
|
+
# returns 2 (must be quoted and escaped), 1 (must be quoted), 0 (neither)
|
194
|
+
def grok(str)
|
195
|
+
if idx = str.index(@escapes)
|
196
|
+
$1 ? 2 : str.index(@quotes, idx) ? 2 : 1
|
197
|
+
else
|
198
|
+
0
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
# output a row
|
203
|
+
def <<(row)
|
204
|
+
|
205
|
+
# drop trailing empty columns
|
206
|
+
row.pop while row.last.empty? if @drop
|
207
|
+
|
208
|
+
s,q = @sep, @quote
|
209
|
+
out = case @mode
|
210
|
+
when :compact
|
211
|
+
case @excel ? 2 : grok(row.join)
|
212
|
+
when 0
|
213
|
+
row
|
214
|
+
when 1
|
215
|
+
row.map do |col|
|
216
|
+
col.match?(@quotable) ? "#{q}#{col}#{q}" : col
|
217
|
+
end
|
218
|
+
else
|
219
|
+
row.map do |col|
|
220
|
+
@excel && col =~ @leadzero ? "=#{q}#{col}#{q}" :
|
221
|
+
case grok(col)
|
222
|
+
when 0 then col
|
223
|
+
when 1 then "#{q}#{col}#{q}"
|
224
|
+
else "#{q}#{col.gsub(q, @esc)}#{q}"
|
225
|
+
end
|
226
|
+
end
|
227
|
+
end
|
228
|
+
when :full
|
229
|
+
if @excel
|
230
|
+
row.map do |col|
|
231
|
+
col =~ @leadzero ? "=#{q}#{col}#{q}" : "#{q}#{col.gsub(q, @esc)}#{q}"
|
232
|
+
end
|
233
|
+
else
|
234
|
+
row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
|
235
|
+
end
|
236
|
+
end.join(s)
|
237
|
+
|
238
|
+
@out << out + @rowsep
|
239
|
+
end
|
240
|
+
|
241
|
+
def stats
|
242
|
+
wide = string.size.to_s.size
|
243
|
+
puts "%#{wide}d rows" % @rows.size
|
244
|
+
puts "%#{wide}d columns" % @cols
|
245
|
+
puts "%#{wide}d cells" % @cells
|
246
|
+
puts "%#{wide}d bytes" % string.size
|
247
|
+
end
|
248
|
+
|
249
|
+
def bomb(msg)
|
250
|
+
abort "\n#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
if __FILE__ == $0
|
255
|
+
# raw = DATA.gets("\n\n").chomp
|
256
|
+
# raw = File.read(ARGV.first || "lc-2023.csv")
|
257
|
+
raw = File.open("KEN_ALL.CSV", "r:cp932").read
|
258
|
+
|
259
|
+
csv = Censive.new(raw, excel: true, relax: true)
|
260
|
+
csv.export # (excel: true) # sep: "|")
|
261
|
+
end
|
262
|
+
|
263
|
+
__END__
|
264
|
+
"Don",="007",10,"Ed"
|
265
|
+
Name,Age,,,Shoe,,,
|
266
|
+
"Alice",27,5
|
267
|
+
Bob,33,10 1/2
|
268
|
+
Charlie or "Chuck",=B2 + B3,9
|
269
|
+
Subtotal,=sum(B2:B5),="01234"
|
270
|
+
|
271
|
+
A,B,C,D
|
272
|
+
A,B,"C",D
|
273
|
+
A,B,C",D
|
274
|
+
A,B,"C",D
|
275
|
+
|
276
|
+
# first line works in "relax" mode, bottom line is compliant
|
277
|
+
123,"CHO, JOELLE "JOJO"",456
|
278
|
+
123,"CHO, JOELLE ""JOJO""",456
|
279
|
+
|
280
|
+
# Excel mode checking
|
281
|
+
=,=x,x=,="x",="","","=",123,0123,="123",="0123"
|
282
|
+
,=x,x=,x,,,,,,=,,123,="0123",123,,="0123" # <= a little off
|