censive 0.10 → 0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +9 -0
- data/censive.gemspec +3 -2
- data/lib/censive.rb +71 -119
- metadata +17 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 91f3884b92c79f8cca37066a420c872b612c37ed736f61fa7d11c0f0fd861d8e
|
|
4
|
+
data.tar.gz: 71789d006309c1d87e681a7078e718342cb36e1f8a18690c59e51595c49d2e59
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d5fe33889abe08bc6aa57bb4dec2d65ef3e8935da5aa209a0bcb6060696de07cf44189ad9be6f43ffcd2897672668bd3aff7e8b713f5070199627e40edd3704f
|
|
7
|
+
data.tar.gz: 2c79d30b4682da07800c19eb93067c5c72d471f9abc7b824f7525b3d36b8fb1108d0e733706852f750baac57360af2d5b77527a1daab871e2313d558928ce240
|
data/README.md
CHANGED
data/censive.gemspec
CHANGED
|
@@ -2,13 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
Gem::Specification.new do |s|
|
|
4
4
|
s.name = "censive"
|
|
5
|
-
s.version = "0.
|
|
5
|
+
s.version = "0.11"
|
|
6
6
|
s.author = "Steve Shreeve"
|
|
7
7
|
s.email = "steve.shreeve@gmail.com"
|
|
8
|
-
s.summary =
|
|
8
|
+
s.summary =
|
|
9
9
|
s.description = "A quick and lightweight CSV handling library for Ruby"
|
|
10
10
|
s.homepage = "https://github.com/shreeve/censive"
|
|
11
11
|
s.license = "MIT"
|
|
12
12
|
s.files = `git ls-files`.split("\n") - %w[.gitignore]
|
|
13
13
|
s.executables = `(cd bin 2>&1) > /dev/null && git ls-files .`.split("\n")
|
|
14
|
+
s.add_dependency "strscan", ">= 3.0.6"
|
|
14
15
|
end
|
data/lib/censive.rb
CHANGED
|
@@ -1,37 +1,36 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
2
|
|
|
3
|
-
#
|
|
3
|
+
# ============================================================================
|
|
4
4
|
# censive - A quick and lightweight CSV handling library for Ruby
|
|
5
5
|
#
|
|
6
6
|
# Author: Steve Shreeve (steve.shreeve@gmail.com)
|
|
7
|
-
# Date:
|
|
8
|
-
#
|
|
9
|
-
# Thanks to https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
|
|
10
|
-
# and, also https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
|
|
11
|
-
# ==============================================================================
|
|
12
|
-
# The goals are:
|
|
7
|
+
# Date: Feb 3, 2023
|
|
13
8
|
#
|
|
9
|
+
# https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
|
|
10
|
+
# https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
|
|
11
|
+
# https://github.com/ruby/strscan/issues/53 for details
|
|
12
|
+
# https://github.com/ruby/strscan/pull/54 for code
|
|
13
|
+
# ============================================================================
|
|
14
|
+
# GOALS:
|
|
14
15
|
# 1. Faster than Ruby's default CSV library
|
|
15
|
-
# 2. Lightweight code base with streamlined
|
|
16
|
-
#
|
|
17
|
-
# To consider:
|
|
16
|
+
# 2. Lightweight code base with streamlined logic
|
|
17
|
+
# 3. Support for most non-compliant CSV variations (eg - @relax, @excel)
|
|
18
18
|
#
|
|
19
|
-
#
|
|
20
|
-
#
|
|
21
|
-
#
|
|
22
|
-
#
|
|
23
|
-
#
|
|
24
|
-
# NOTE: Only getch and scan_until advance strscan's position
|
|
25
|
-
# ==============================================================================
|
|
19
|
+
# TODO:
|
|
20
|
+
# 1. Support IO streaming
|
|
21
|
+
# 2. Add option to strip whitespace
|
|
22
|
+
# 3. Support CSV headers in first row
|
|
23
|
+
# ============================================================================
|
|
26
24
|
|
|
25
|
+
require 'bundler/setup'
|
|
27
26
|
require 'strscan'
|
|
28
27
|
|
|
29
28
|
class Censive < StringScanner
|
|
30
29
|
|
|
31
|
-
def self.writer(obj
|
|
30
|
+
def self.writer(obj=nil, **opts, &code)
|
|
32
31
|
case obj
|
|
33
32
|
when String then File.open(path, 'w') {|file| yield new(out: obj, **opts, &code) }
|
|
34
|
-
when IO
|
|
33
|
+
when IO,nil then new(out: obj, **opts, &code)
|
|
35
34
|
else abort "#{File.basename($0)}: invalid #{obj.class} object in writer"
|
|
36
35
|
end
|
|
37
36
|
end
|
|
@@ -39,7 +38,7 @@ class Censive < StringScanner
|
|
|
39
38
|
def initialize(str=nil,
|
|
40
39
|
drop: false , # drop trailing empty fields?
|
|
41
40
|
eol: "\n" , # line endings for exports
|
|
42
|
-
excel: false , # literals
|
|
41
|
+
excel: false , # literals ="01" formulas =A1 + B2 http://bit.ly/3Y7jIvc
|
|
43
42
|
mode: :compact, # export mode: compact or full
|
|
44
43
|
out: nil , # output stream, needs to respond to <<
|
|
45
44
|
quote: '"' , # quote character
|
|
@@ -51,81 +50,67 @@ class Censive < StringScanner
|
|
|
51
50
|
reset
|
|
52
51
|
|
|
53
52
|
@drop = drop
|
|
54
|
-
@eol = eol
|
|
53
|
+
@eol = eol
|
|
55
54
|
@excel = excel
|
|
56
55
|
@mode = mode
|
|
57
|
-
@out = out
|
|
58
|
-
@quote = quote
|
|
56
|
+
@out = out || $stdout
|
|
57
|
+
@quote = quote
|
|
59
58
|
@relax = relax
|
|
60
|
-
@sep = sep
|
|
61
|
-
|
|
62
|
-
@es = "" .freeze
|
|
63
|
-
@cr = "\r" .freeze
|
|
64
|
-
@lf = "\n" .freeze
|
|
65
|
-
@eq = "=" .freeze
|
|
66
|
-
@esc = (@quote * 2).freeze
|
|
59
|
+
@sep = sep
|
|
67
60
|
|
|
68
|
-
@
|
|
61
|
+
@cr = "\r"
|
|
62
|
+
@lf = "\n"
|
|
63
|
+
@es = ""
|
|
64
|
+
@eq = "="
|
|
65
|
+
@esc = (@quote * 2)
|
|
69
66
|
end
|
|
70
67
|
|
|
71
68
|
def reset(str=nil)
|
|
72
69
|
self.string = str if str
|
|
73
70
|
super()
|
|
74
|
-
@char =
|
|
75
|
-
@flag = nil
|
|
76
|
-
|
|
71
|
+
@char = curr_char
|
|
77
72
|
@rows = nil
|
|
78
73
|
@cols = @cells = 0
|
|
79
74
|
end
|
|
80
75
|
|
|
81
76
|
# ==[ Lexer ]==
|
|
82
77
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
def next_token
|
|
78
|
+
# pure ruby versions for debugging
|
|
79
|
+
# def curr_char; @char = string[pos]; end
|
|
80
|
+
# def next_char; scan(/./m); @char = string[pos]; end
|
|
89
81
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
when @es then @flag = nil; [@cr,@lf,@es,nil].include?(@char) and return @es
|
|
93
|
-
when @cr then @flag = nil; next_char == @lf and next_char
|
|
94
|
-
when @lf then @flag = nil; next_char
|
|
95
|
-
else @flag = nil
|
|
96
|
-
end if @flag
|
|
82
|
+
def curr_char; @char = currchar; end
|
|
83
|
+
def next_char; @char = nextchar; end
|
|
97
84
|
|
|
98
|
-
|
|
85
|
+
def next_token
|
|
99
86
|
if @excel && @char == @eq
|
|
100
|
-
|
|
87
|
+
excel = true
|
|
101
88
|
next_char
|
|
102
89
|
end
|
|
103
90
|
|
|
104
|
-
if @
|
|
91
|
+
if @char == @quote # consume quoted cell
|
|
92
|
+
match = ""
|
|
93
|
+
while true
|
|
94
|
+
next_char
|
|
95
|
+
match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
|
|
96
|
+
match << @quote and next if next_char == @quote
|
|
97
|
+
break if [@sep,@cr,@lf,@es,nil].include?(@char)
|
|
98
|
+
@relax or bomb "invalid character after quote"
|
|
99
|
+
match << @quote + scan_until(/(?=#{@quote})/o) + @quote
|
|
100
|
+
end
|
|
101
|
+
next_char if @char == @sep
|
|
102
|
+
match
|
|
103
|
+
elsif [@sep,@cr,@lf,@es,nil].include?(@char)
|
|
105
104
|
case @char
|
|
106
|
-
when @
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
|
|
111
|
-
case next_char
|
|
112
|
-
when @sep then @flag = @es; next_char; break
|
|
113
|
-
when @quote then match << @quote
|
|
114
|
-
when @cr,@lf,@es,nil then break
|
|
115
|
-
else @relax ? match << (@quote + @char) : bomb("invalid character after quote")
|
|
116
|
-
end
|
|
117
|
-
end
|
|
118
|
-
match
|
|
119
|
-
when @sep then @flag = @es; next_char; @es
|
|
120
|
-
when @cr then @flag = @cr; nil
|
|
121
|
-
when @lf then @flag = @lf; nil
|
|
122
|
-
when @es,nil then nil
|
|
105
|
+
when @sep then next_char; @es
|
|
106
|
+
when @cr then next_char == @lf and next_char; nil
|
|
107
|
+
when @lf then next_char; nil
|
|
108
|
+
else nil
|
|
123
109
|
end
|
|
124
110
|
else # consume unquoted cell
|
|
125
111
|
match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
|
|
126
|
-
match
|
|
127
|
-
|
|
128
|
-
@char == @sep and @flag = @es and next_char
|
|
112
|
+
match.prepend(@eq) if excel
|
|
113
|
+
next_char if curr_char == @sep
|
|
129
114
|
match
|
|
130
115
|
end
|
|
131
116
|
end
|
|
@@ -156,10 +141,10 @@ class Censive < StringScanner
|
|
|
156
141
|
|
|
157
142
|
# ==[ Helpers ]==
|
|
158
143
|
|
|
159
|
-
#
|
|
144
|
+
# returns 2 (must be quoted and escaped), 1 (must be quoted), 0 (neither)
|
|
160
145
|
def grok(str)
|
|
161
|
-
if
|
|
162
|
-
$1 ? 2 : str.index(/#{@quote}/o,
|
|
146
|
+
if idx = str.index(/(#{@quote})|#{@sep}|#{@cr}|#{@lf}/o) #!# FIXME: regex injection is possible
|
|
147
|
+
$1 ? 2 : str.index(/#{@quote}/o, idx) ? 2 : 1
|
|
163
148
|
else
|
|
164
149
|
0
|
|
165
150
|
end
|
|
@@ -167,17 +152,14 @@ class Censive < StringScanner
|
|
|
167
152
|
|
|
168
153
|
# output a row
|
|
169
154
|
def <<(row)
|
|
170
|
-
@out or return super
|
|
171
155
|
|
|
172
156
|
# drop trailing empty columns
|
|
173
157
|
row.pop while row.last.empty? if @drop
|
|
174
158
|
|
|
175
|
-
#!# FIXME: Excel output needs to protect 0-leading numbers
|
|
176
|
-
|
|
177
159
|
s,q = @sep, @quote
|
|
178
160
|
out = case @mode
|
|
179
161
|
when :compact
|
|
180
|
-
case grok(row.join)
|
|
162
|
+
case @excel ? 2 : grok(row.join)
|
|
181
163
|
when 0
|
|
182
164
|
row
|
|
183
165
|
when 1
|
|
@@ -186,6 +168,7 @@ class Censive < StringScanner
|
|
|
186
168
|
end
|
|
187
169
|
else
|
|
188
170
|
row.map do |col|
|
|
171
|
+
@excel && col =~ /\A0\d*\z/ ? "=#{q}#{col}#{q}" :
|
|
189
172
|
case grok(col)
|
|
190
173
|
when 0 then col
|
|
191
174
|
when 1 then "#{q}#{col}#{q}"
|
|
@@ -197,7 +180,6 @@ class Censive < StringScanner
|
|
|
197
180
|
row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
|
|
198
181
|
end.join(s)
|
|
199
182
|
|
|
200
|
-
# add line ending
|
|
201
183
|
@out << out + @eol
|
|
202
184
|
end
|
|
203
185
|
|
|
@@ -206,8 +188,8 @@ class Censive < StringScanner
|
|
|
206
188
|
@rows.each {|row| yield row }
|
|
207
189
|
end
|
|
208
190
|
|
|
209
|
-
def export(
|
|
210
|
-
out = self.class.writer(
|
|
191
|
+
def export(**opts)
|
|
192
|
+
out = opts.empty? ? self : self.class.writer(**opts)
|
|
211
193
|
each {|row| out << row }
|
|
212
194
|
end
|
|
213
195
|
|
|
@@ -220,12 +202,11 @@ class Censive < StringScanner
|
|
|
220
202
|
end
|
|
221
203
|
end
|
|
222
204
|
|
|
223
|
-
# ==[ Command line ]==
|
|
224
|
-
|
|
225
205
|
if __FILE__ == $0
|
|
226
206
|
raw = DATA.gets("\n\n").chomp
|
|
227
|
-
|
|
228
|
-
csv
|
|
207
|
+
# raw = File.read(ARGV.first || "lc-2023.csv")
|
|
208
|
+
csv = Censive.new(raw, excel: true, relax: true)
|
|
209
|
+
csv.export # (sep: ",", excel: true)
|
|
229
210
|
end
|
|
230
211
|
|
|
231
212
|
__END__
|
|
@@ -236,39 +217,10 @@ Charlie or "Chuck",=B2 + B3,9
|
|
|
236
217
|
"Doug E Fresh",="007",10
|
|
237
218
|
Subtotal,=sum(B2:B5),="01234"
|
|
238
219
|
|
|
220
|
+
# first line works in "relax" mode, bottom line is compliant
|
|
221
|
+
123,"CHO, JOELLE "JOJO"",456
|
|
222
|
+
123,"CHO, JOELLE ""JOJO""",456
|
|
239
223
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
data = File.read(path)
|
|
244
|
-
|
|
245
|
-
out = Censive.writer
|
|
246
|
-
|
|
247
|
-
Censive.new(data, sep: "\t", quote: "'").each do |row|
|
|
248
|
-
p row
|
|
249
|
-
end
|
|
250
|
-
|
|
251
|
-
Censive.reader(path, sep: "\t", quote: "'").each {|r| p r}
|
|
252
|
-
|
|
253
|
-
# STDOUT.sync = true
|
|
254
|
-
#
|
|
255
|
-
# data = File.read('1.csv')
|
|
256
|
-
#
|
|
257
|
-
# Censive.writer('out.csv') do |out|
|
|
258
|
-
# Censive.new(data, relax: true, excel: true).each do |row|
|
|
259
|
-
# out << row
|
|
260
|
-
# end
|
|
261
|
-
# end
|
|
262
|
-
#
|
|
263
|
-
# __END__
|
|
264
|
-
#
|
|
265
|
-
# ARGV << "z.csv" if ARGV.empty?
|
|
266
|
-
#
|
|
267
|
-
# path = ARGV.first
|
|
268
|
-
# data = File.read(path)
|
|
269
|
-
#
|
|
270
|
-
# csv = Censive.new(data)
|
|
271
|
-
#
|
|
272
|
-
# data.size > 1e6 ? csv.parse : csv.parse.each {|cols| p cols }
|
|
273
|
-
#
|
|
274
|
-
# csv.stats
|
|
224
|
+
# Excel mode checking
|
|
225
|
+
=,=x,x=,="x",="","","=",123,0123,="123",="0123"
|
|
226
|
+
,=x,x=,x,,,,,,=,,123,="0123",123,,="0123" # <= a little off
|
metadata
CHANGED
|
@@ -1,15 +1,29 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: censive
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: '0.
|
|
4
|
+
version: '0.11'
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Steve Shreeve
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2023-02-
|
|
12
|
-
dependencies:
|
|
11
|
+
date: 2023-02-04 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: strscan
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - ">="
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: 3.0.6
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - ">="
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: 3.0.6
|
|
13
27
|
description: A quick and lightweight CSV handling library for Ruby
|
|
14
28
|
email: steve.shreeve@gmail.com
|
|
15
29
|
executables: []
|