censive 0.10 → 0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +9 -0
- data/censive.gemspec +3 -2
- data/lib/censive.rb +78 -120
- metadata +18 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 3343af27996e08f7c83ae7327e60fc8c225095c457be3141a471f105d596e165
|
|
4
|
+
data.tar.gz: 4ccdfe64a1314628ca9070c5a50678c29933ba87c11b160ec25494a21772090b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6b5e4d4e3f3f0ec4898fe91ef0f4033c068c68f79fe1acf0f41e45a9d96380751fed6a852920e2a9aa1ae2e477750d972a2838ba5fc7212ea940af534560d183
|
|
7
|
+
data.tar.gz: f58137689118ff3546d9d973ffdd5f9a7f686aa61e4d813e2adf7557b6d0e6303b8f9d9d21bc084ba82f73c50119c522049a066757ab7cead0d5d69dc6037d63
|
data/README.md
CHANGED
data/censive.gemspec
CHANGED
|
@@ -2,13 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
Gem::Specification.new do |s|
|
|
4
4
|
s.name = "censive"
|
|
5
|
-
s.version = "0.
|
|
5
|
+
s.version = "0.12"
|
|
6
6
|
s.author = "Steve Shreeve"
|
|
7
7
|
s.email = "steve.shreeve@gmail.com"
|
|
8
|
-
s.summary =
|
|
8
|
+
s.summary =
|
|
9
9
|
s.description = "A quick and lightweight CSV handling library for Ruby"
|
|
10
10
|
s.homepage = "https://github.com/shreeve/censive"
|
|
11
11
|
s.license = "MIT"
|
|
12
12
|
s.files = `git ls-files`.split("\n") - %w[.gitignore]
|
|
13
13
|
s.executables = `(cd bin 2>&1) > /dev/null && git ls-files .`.split("\n")
|
|
14
|
+
s.add_dependency "strscan", ">= 3.0.6"
|
|
14
15
|
end
|
data/lib/censive.rb
CHANGED
|
@@ -1,37 +1,36 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
2
|
|
|
3
|
-
#
|
|
3
|
+
# ============================================================================
|
|
4
4
|
# censive - A quick and lightweight CSV handling library for Ruby
|
|
5
5
|
#
|
|
6
6
|
# Author: Steve Shreeve (steve.shreeve@gmail.com)
|
|
7
|
-
# Date:
|
|
8
|
-
#
|
|
9
|
-
# Thanks to https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
|
|
10
|
-
# and, also https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
|
|
11
|
-
# ==============================================================================
|
|
12
|
-
# The goals are:
|
|
7
|
+
# Date: Feb 3, 2023
|
|
13
8
|
#
|
|
9
|
+
# https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
|
|
10
|
+
# https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
|
|
11
|
+
# https://github.com/ruby/strscan/issues/53 for details
|
|
12
|
+
# https://github.com/ruby/strscan/pull/54 for code
|
|
13
|
+
# ============================================================================
|
|
14
|
+
# GOALS:
|
|
14
15
|
# 1. Faster than Ruby's default CSV library
|
|
15
|
-
# 2. Lightweight code base with streamlined
|
|
16
|
-
#
|
|
17
|
-
# To consider:
|
|
18
|
-
#
|
|
19
|
-
# 1. Option to support IO streaming
|
|
20
|
-
# 2. Option to strip whitespace
|
|
21
|
-
# 3. Option to support headers in readers and writers
|
|
22
|
-
# 4. Confirm file encodings such as UTF-8, UTF-16, etc.
|
|
16
|
+
# 2. Lightweight code base with streamlined logic
|
|
17
|
+
# 3. Support for most non-compliant CSV variations (eg - @relax, @excel)
|
|
23
18
|
#
|
|
24
|
-
#
|
|
25
|
-
#
|
|
19
|
+
# TODO:
|
|
20
|
+
# 1. Support IO streaming
|
|
21
|
+
# 2. Add option to strip whitespace
|
|
22
|
+
# 3. Support CSV headers in first row
|
|
23
|
+
# ============================================================================
|
|
26
24
|
|
|
25
|
+
require 'bundler/setup'
|
|
27
26
|
require 'strscan'
|
|
28
27
|
|
|
29
28
|
class Censive < StringScanner
|
|
30
29
|
|
|
31
|
-
def self.writer(obj
|
|
30
|
+
def self.writer(obj=nil, **opts, &code)
|
|
32
31
|
case obj
|
|
33
32
|
when String then File.open(path, 'w') {|file| yield new(out: obj, **opts, &code) }
|
|
34
|
-
when IO
|
|
33
|
+
when IO,nil then new(out: obj, **opts, &code)
|
|
35
34
|
else abort "#{File.basename($0)}: invalid #{obj.class} object in writer"
|
|
36
35
|
end
|
|
37
36
|
end
|
|
@@ -39,7 +38,7 @@ class Censive < StringScanner
|
|
|
39
38
|
def initialize(str=nil,
|
|
40
39
|
drop: false , # drop trailing empty fields?
|
|
41
40
|
eol: "\n" , # line endings for exports
|
|
42
|
-
excel: false , # literals
|
|
41
|
+
excel: false , # literals ="01" formulas =A1 + B2 http://bit.ly/3Y7jIvc
|
|
43
42
|
mode: :compact, # export mode: compact or full
|
|
44
43
|
out: nil , # output stream, needs to respond to <<
|
|
45
44
|
quote: '"' , # quote character
|
|
@@ -51,81 +50,67 @@ class Censive < StringScanner
|
|
|
51
50
|
reset
|
|
52
51
|
|
|
53
52
|
@drop = drop
|
|
54
|
-
@eol = eol
|
|
53
|
+
@eol = eol
|
|
55
54
|
@excel = excel
|
|
56
55
|
@mode = mode
|
|
57
|
-
@out = out
|
|
58
|
-
@quote = quote
|
|
56
|
+
@out = out || $stdout
|
|
57
|
+
@quote = quote
|
|
59
58
|
@relax = relax
|
|
60
|
-
@sep = sep
|
|
59
|
+
@sep = sep
|
|
61
60
|
|
|
62
|
-
@
|
|
63
|
-
@
|
|
64
|
-
@
|
|
65
|
-
@eq = "="
|
|
66
|
-
@esc = (@quote * 2)
|
|
67
|
-
|
|
68
|
-
@tokens = [@sep,@quote,@cr,@lf,@es,nil]
|
|
61
|
+
@cr = "\r"
|
|
62
|
+
@lf = "\n"
|
|
63
|
+
@es = ""
|
|
64
|
+
@eq = "="
|
|
65
|
+
@esc = (@quote * 2)
|
|
69
66
|
end
|
|
70
67
|
|
|
71
68
|
def reset(str=nil)
|
|
72
69
|
self.string = str if str
|
|
73
70
|
super()
|
|
74
|
-
@char =
|
|
75
|
-
@flag = nil
|
|
76
|
-
|
|
71
|
+
@char = curr_char
|
|
77
72
|
@rows = nil
|
|
78
73
|
@cols = @cells = 0
|
|
79
74
|
end
|
|
80
75
|
|
|
81
76
|
# ==[ Lexer ]==
|
|
82
77
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
def next_token
|
|
78
|
+
# pure ruby versions for debugging
|
|
79
|
+
# def curr_char; @char = string[pos]; end
|
|
80
|
+
# def next_char; scan(/./m); @char = string[pos]; end
|
|
89
81
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
when @es then @flag = nil; [@cr,@lf,@es,nil].include?(@char) and return @es
|
|
93
|
-
when @cr then @flag = nil; next_char == @lf and next_char
|
|
94
|
-
when @lf then @flag = nil; next_char
|
|
95
|
-
else @flag = nil
|
|
96
|
-
end if @flag
|
|
82
|
+
def curr_char; @char = currchar; end
|
|
83
|
+
def next_char; @char = nextchar; end
|
|
97
84
|
|
|
98
|
-
|
|
85
|
+
def next_token
|
|
99
86
|
if @excel && @char == @eq
|
|
100
|
-
|
|
87
|
+
excel = true
|
|
101
88
|
next_char
|
|
102
89
|
end
|
|
103
90
|
|
|
104
|
-
if @
|
|
91
|
+
if @char == @quote # consume quoted cell
|
|
92
|
+
match = ""
|
|
93
|
+
while true
|
|
94
|
+
next_char
|
|
95
|
+
match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
|
|
96
|
+
match << @quote and next if next_char == @quote
|
|
97
|
+
break if [@sep,@cr,@lf,@es,nil].include?(@char)
|
|
98
|
+
@relax or bomb "invalid character after quote"
|
|
99
|
+
match << @quote + scan_until(/(?=#{@quote})/o) + @quote
|
|
100
|
+
end
|
|
101
|
+
next_char if @char == @sep
|
|
102
|
+
match
|
|
103
|
+
elsif [@sep,@cr,@lf,@es,nil].include?(@char)
|
|
105
104
|
case @char
|
|
106
|
-
when @
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
|
|
111
|
-
case next_char
|
|
112
|
-
when @sep then @flag = @es; next_char; break
|
|
113
|
-
when @quote then match << @quote
|
|
114
|
-
when @cr,@lf,@es,nil then break
|
|
115
|
-
else @relax ? match << (@quote + @char) : bomb("invalid character after quote")
|
|
116
|
-
end
|
|
117
|
-
end
|
|
118
|
-
match
|
|
119
|
-
when @sep then @flag = @es; next_char; @es
|
|
120
|
-
when @cr then @flag = @cr; nil
|
|
121
|
-
when @lf then @flag = @lf; nil
|
|
122
|
-
when @es,nil then nil
|
|
105
|
+
when @sep then next_char; @es
|
|
106
|
+
when @cr then next_char == @lf and next_char; nil
|
|
107
|
+
when @lf then next_char; nil
|
|
108
|
+
else nil
|
|
123
109
|
end
|
|
124
110
|
else # consume unquoted cell
|
|
125
111
|
match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
|
|
126
|
-
match
|
|
127
|
-
|
|
128
|
-
@char == @sep and @flag = @es and next_char
|
|
112
|
+
match.prepend(@eq) if excel
|
|
113
|
+
next_char if curr_char == @sep
|
|
129
114
|
match
|
|
130
115
|
end
|
|
131
116
|
end
|
|
@@ -156,10 +141,10 @@ class Censive < StringScanner
|
|
|
156
141
|
|
|
157
142
|
# ==[ Helpers ]==
|
|
158
143
|
|
|
159
|
-
#
|
|
144
|
+
# returns 2 (must be quoted and escaped), 1 (must be quoted), 0 (neither)
|
|
160
145
|
def grok(str)
|
|
161
|
-
if
|
|
162
|
-
$1 ? 2 : str.index(/#{@quote}/o,
|
|
146
|
+
if idx = str.index(/(#{@quote})|#{@sep}|#{@cr}|#{@lf}/o) #!# FIXME: regex injection?
|
|
147
|
+
$1 ? 2 : str.index(/#{@quote}/o, idx) ? 2 : 1
|
|
163
148
|
else
|
|
164
149
|
0
|
|
165
150
|
end
|
|
@@ -167,17 +152,14 @@ class Censive < StringScanner
|
|
|
167
152
|
|
|
168
153
|
# output a row
|
|
169
154
|
def <<(row)
|
|
170
|
-
@out or return super
|
|
171
155
|
|
|
172
156
|
# drop trailing empty columns
|
|
173
157
|
row.pop while row.last.empty? if @drop
|
|
174
158
|
|
|
175
|
-
#!# FIXME: Excel output needs to protect 0-leading numbers
|
|
176
|
-
|
|
177
159
|
s,q = @sep, @quote
|
|
178
160
|
out = case @mode
|
|
179
161
|
when :compact
|
|
180
|
-
case grok(row.join)
|
|
162
|
+
case @excel ? 2 : grok(row.join)
|
|
181
163
|
when 0
|
|
182
164
|
row
|
|
183
165
|
when 1
|
|
@@ -186,6 +168,7 @@ class Censive < StringScanner
|
|
|
186
168
|
end
|
|
187
169
|
else
|
|
188
170
|
row.map do |col|
|
|
171
|
+
@excel && col =~ /\A0\d*\z/ ? "=#{q}#{col}#{q}" :
|
|
189
172
|
case grok(col)
|
|
190
173
|
when 0 then col
|
|
191
174
|
when 1 then "#{q}#{col}#{q}"
|
|
@@ -194,10 +177,15 @@ class Censive < StringScanner
|
|
|
194
177
|
end
|
|
195
178
|
end
|
|
196
179
|
when :full
|
|
197
|
-
|
|
180
|
+
if @excel
|
|
181
|
+
row.map do |col|
|
|
182
|
+
col =~ /\A0\d*\z/ ? "=#{q}#{col}#{q}" : "#{q}#{col.gsub(q, @esc)}#{q}"
|
|
183
|
+
end
|
|
184
|
+
else
|
|
185
|
+
row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
|
|
186
|
+
end
|
|
198
187
|
end.join(s)
|
|
199
188
|
|
|
200
|
-
# add line ending
|
|
201
189
|
@out << out + @eol
|
|
202
190
|
end
|
|
203
191
|
|
|
@@ -206,8 +194,8 @@ class Censive < StringScanner
|
|
|
206
194
|
@rows.each {|row| yield row }
|
|
207
195
|
end
|
|
208
196
|
|
|
209
|
-
def export(
|
|
210
|
-
out = self.class.writer(
|
|
197
|
+
def export(**opts)
|
|
198
|
+
out = opts.empty? ? self : self.class.writer(**opts)
|
|
211
199
|
each {|row| out << row }
|
|
212
200
|
end
|
|
213
201
|
|
|
@@ -220,12 +208,11 @@ class Censive < StringScanner
|
|
|
220
208
|
end
|
|
221
209
|
end
|
|
222
210
|
|
|
223
|
-
# ==[ Command line ]==
|
|
224
|
-
|
|
225
211
|
if __FILE__ == $0
|
|
226
212
|
raw = DATA.gets("\n\n").chomp
|
|
227
|
-
|
|
228
|
-
csv
|
|
213
|
+
# raw = File.read(ARGV.first || "lc-2023.csv")
|
|
214
|
+
csv = Censive.new(raw, excel: true, relax: true)
|
|
215
|
+
csv.export # (sep: ",", excel: true)
|
|
229
216
|
end
|
|
230
217
|
|
|
231
218
|
__END__
|
|
@@ -236,39 +223,10 @@ Charlie or "Chuck",=B2 + B3,9
|
|
|
236
223
|
"Doug E Fresh",="007",10
|
|
237
224
|
Subtotal,=sum(B2:B5),="01234"
|
|
238
225
|
|
|
226
|
+
# first line works in "relax" mode, bottom line is compliant
|
|
227
|
+
123,"CHO, JOELLE "JOJO"",456
|
|
228
|
+
123,"CHO, JOELLE ""JOJO""",456
|
|
239
229
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
data = File.read(path)
|
|
244
|
-
|
|
245
|
-
out = Censive.writer
|
|
246
|
-
|
|
247
|
-
Censive.new(data, sep: "\t", quote: "'").each do |row|
|
|
248
|
-
p row
|
|
249
|
-
end
|
|
250
|
-
|
|
251
|
-
Censive.reader(path, sep: "\t", quote: "'").each {|r| p r}
|
|
252
|
-
|
|
253
|
-
# STDOUT.sync = true
|
|
254
|
-
#
|
|
255
|
-
# data = File.read('1.csv')
|
|
256
|
-
#
|
|
257
|
-
# Censive.writer('out.csv') do |out|
|
|
258
|
-
# Censive.new(data, relax: true, excel: true).each do |row|
|
|
259
|
-
# out << row
|
|
260
|
-
# end
|
|
261
|
-
# end
|
|
262
|
-
#
|
|
263
|
-
# __END__
|
|
264
|
-
#
|
|
265
|
-
# ARGV << "z.csv" if ARGV.empty?
|
|
266
|
-
#
|
|
267
|
-
# path = ARGV.first
|
|
268
|
-
# data = File.read(path)
|
|
269
|
-
#
|
|
270
|
-
# csv = Censive.new(data)
|
|
271
|
-
#
|
|
272
|
-
# data.size > 1e6 ? csv.parse : csv.parse.each {|cols| p cols }
|
|
273
|
-
#
|
|
274
|
-
# csv.stats
|
|
230
|
+
# Excel mode checking
|
|
231
|
+
=,=x,x=,="x",="","","=",123,0123,="123",="0123"
|
|
232
|
+
,=x,x=,x,,,,,,=,,123,="0123",123,,="0123" # <= a little off
|
metadata
CHANGED
|
@@ -1,15 +1,29 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: censive
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: '0.
|
|
4
|
+
version: '0.12'
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Steve Shreeve
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2023-02-
|
|
12
|
-
dependencies:
|
|
11
|
+
date: 2023-02-04 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: strscan
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - ">="
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: 3.0.6
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - ">="
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: 3.0.6
|
|
13
27
|
description: A quick and lightweight CSV handling library for Ruby
|
|
14
28
|
email: steve.shreeve@gmail.com
|
|
15
29
|
executables: []
|
|
@@ -40,7 +54,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
40
54
|
- !ruby/object:Gem::Version
|
|
41
55
|
version: '0'
|
|
42
56
|
requirements: []
|
|
43
|
-
rubygems_version: 3.4.
|
|
57
|
+
rubygems_version: 3.4.6
|
|
44
58
|
signing_key:
|
|
45
59
|
specification_version: 4
|
|
46
60
|
summary: A quick and lightweight CSV handling library for Ruby
|