censive 0.6 → 0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/censive.gemspec +3 -3
- data/lib/censive.rb +77 -63
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6419efcdc9274ea8bcf7b8527001e33f8bdfea348dfd911686cab36984d507da
|
4
|
+
data.tar.gz: 3b59aead54517fd64d7ece3eaa6f459e301e1e48f1ae34772a7128c61fb739f2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7910c09e76a81ed27870ea52fb6c8aea0316ed213c53a026d98adc64f93349477e6acab0a93b88c6f184ce1d317634ecdca9290d50bff9b117b98bedd3ac7b86
|
7
|
+
data.tar.gz: 358ab985947d486b5f486b1f7e9c1f591e3b8e906b9eab59a4ed151e5f5d9652c211f2d2a4ee36f0543227e2ae5e33ba57f1e4c178f6f7e72e05c14d7b46895f
|
data/README.md
CHANGED
data/censive.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = "censive"
|
5
|
-
s.version = "0.
|
5
|
+
s.version = "0.8"
|
6
6
|
s.author = "Steve Shreeve"
|
7
7
|
s.email = "steve.shreeve@gmail.com"
|
8
|
-
s.summary = "A quick and lightweight
|
9
|
-
s.description = "A quick and lightweight
|
8
|
+
s.summary = "A quick and lightweight CSV handling library for Ruby"
|
9
|
+
s.description = "A quick and lightweight CSV handling library for Ruby"
|
10
10
|
s.homepage = "https://github.com/shreeve/censive"
|
11
11
|
s.license = "MIT"
|
12
12
|
s.files = `git ls-files`.split("\n") - %w[.gitignore]
|
data/lib/censive.rb
CHANGED
@@ -5,6 +5,8 @@
|
|
5
5
|
#
|
6
6
|
# Author: Steve Shreeve (steve.shreeve@gmail.com)
|
7
7
|
# Date: Jan 30, 2023
|
8
|
+
#
|
9
|
+
# Thanks: Crystal's CSV library, see https://crystal-lang.org/api/1.7.2/CSV.html
|
8
10
|
# ==============================================================================
|
9
11
|
# The goals are:
|
10
12
|
#
|
@@ -15,10 +17,7 @@
|
|
15
17
|
#
|
16
18
|
# 1. Option to support IO streaming
|
17
19
|
# 2. Option to strip whitespace
|
18
|
-
# 3.
|
19
|
-
# 4. Option to force quotes in output
|
20
|
-
# 5. Option to allow reading excel CSV (="Text" for cells)
|
21
|
-
# 6. Confirm file encodings such as UTF-8, UTF-16, etc.
|
20
|
+
# 3. Confirm file encodings such as UTF-8, UTF-16, etc.
|
22
21
|
#
|
23
22
|
# NOTE: Only getch and scan_until advance strscan's position
|
24
23
|
# ==============================================================================
|
@@ -36,81 +35,98 @@ class Censive < StringScanner
|
|
36
35
|
def initialize(str=nil,
|
37
36
|
sep: ',' , # column separator character
|
38
37
|
quote: '"' , # quote character
|
39
|
-
|
40
|
-
mode: :compact, # export mode: compact or full
|
38
|
+
|
41
39
|
drop: false , # enable to drop trailing separators
|
42
|
-
eol: "\n"
|
40
|
+
eol: "\n" , # desired line endings for exports
|
41
|
+
excel: false , # allow ,="0123" style columns
|
42
|
+
mode: :compact, # export mode: compact or full
|
43
|
+
out: nil , # output IO/file
|
44
|
+
relax: false , # relax parsing of quotes
|
45
|
+
|
46
|
+
**opts # grab bag
|
43
47
|
)
|
44
48
|
super(str || '')
|
45
49
|
reset
|
46
50
|
|
47
|
-
@sep
|
48
|
-
@quote
|
49
|
-
|
50
|
-
@
|
51
|
-
@
|
52
|
-
@
|
53
|
-
|
54
|
-
@
|
55
|
-
|
56
|
-
@
|
57
|
-
@
|
51
|
+
@sep = sep .freeze
|
52
|
+
@quote = quote.freeze
|
53
|
+
|
54
|
+
@drop = drop
|
55
|
+
@eol = eol.freeze
|
56
|
+
@mode = mode
|
57
|
+
@out = out
|
58
|
+
@relax = relax
|
59
|
+
|
60
|
+
@es = "" .freeze
|
61
|
+
@cr = "\r" .freeze
|
62
|
+
@lf = "\n" .freeze
|
63
|
+
@eq = "=" .freeze
|
64
|
+
@esc = (@quote * 2).freeze
|
65
|
+
|
66
|
+
@tokens = [@sep,@quote,@cr,@lf,@es,nil]
|
67
|
+
@tokens << @eq if excel # See http://bit.ly/3Y7jIvc
|
58
68
|
end
|
59
69
|
|
60
70
|
def reset(str=nil)
|
61
71
|
self.string = str if str
|
62
72
|
super()
|
63
|
-
@char
|
64
|
-
@flag
|
73
|
+
@char = peek(1)
|
74
|
+
@flag = nil
|
65
75
|
|
66
|
-
@rows
|
67
|
-
@cols
|
76
|
+
@rows = nil
|
77
|
+
@cols = @cells = 0
|
68
78
|
end
|
69
79
|
|
70
80
|
# ==[ Lexer ]==
|
71
81
|
|
72
82
|
def next_char
|
73
83
|
getch
|
74
|
-
@char =
|
84
|
+
@char = peek(1)
|
75
85
|
end
|
76
86
|
|
77
87
|
def next_token
|
78
88
|
case @flag
|
79
|
-
when @es then @flag = nil; [@cr,@lf,nil].include?(@char) and return @es
|
89
|
+
when @es then @flag = nil; [@cr,@lf,@es,nil].include?(@char) and return @es
|
80
90
|
when @cr then @flag = nil; next_char == @lf and next_char
|
81
91
|
when @lf then @flag = nil; next_char
|
82
92
|
end if @flag
|
83
93
|
|
84
|
-
if
|
94
|
+
if @tokens.include?(@char)
|
85
95
|
case @char
|
86
|
-
when @quote #
|
96
|
+
when @quote, @eq # consume quoted cell
|
97
|
+
@char == @eq and next_char # excel mode: allows ,="012",
|
87
98
|
match = ""
|
88
99
|
while true
|
89
100
|
getch # consume the quote (optimized by not calling next_char)
|
90
101
|
match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
|
91
102
|
case next_char
|
92
|
-
when @sep
|
93
|
-
when @quote
|
94
|
-
when @cr,@lf,nil then break
|
95
|
-
else
|
103
|
+
when @sep then @flag = @es; next_char; break
|
104
|
+
when @quote then match << @quote
|
105
|
+
when @cr,@lf,@es,nil then break
|
106
|
+
else
|
107
|
+
if @relax
|
108
|
+
match << @quote + @char
|
109
|
+
else
|
110
|
+
bomb "invalid character after quote"
|
111
|
+
end
|
96
112
|
end
|
97
113
|
end
|
98
114
|
match
|
99
|
-
when @sep
|
100
|
-
when @cr
|
101
|
-
when @lf
|
102
|
-
when nil
|
115
|
+
when @sep then @flag = @es; next_char; @es
|
116
|
+
when @cr then @flag = @cr; nil
|
117
|
+
when @lf then @flag = @lf; nil
|
118
|
+
when @es,nil then nil
|
103
119
|
end
|
104
|
-
else #
|
120
|
+
else # consume unquoted cell
|
105
121
|
match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
|
106
|
-
@char =
|
122
|
+
@char = peek(1)
|
107
123
|
@char == @sep and @flag = @es and next_char
|
108
124
|
match
|
109
125
|
end
|
110
126
|
end
|
111
127
|
|
112
128
|
def bomb(msg)
|
113
|
-
abort "#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
|
129
|
+
abort "\n#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
|
114
130
|
end
|
115
131
|
|
116
132
|
# ==[ Parser ]==
|
@@ -135,7 +151,7 @@ class Censive < StringScanner
|
|
135
151
|
|
136
152
|
# ==[ Helpers ]==
|
137
153
|
|
138
|
-
# grok returns 2
|
154
|
+
# grok returns: 2 for seps and quotes, 1 for seps only, and 0 for neither
|
139
155
|
def grok(str)
|
140
156
|
if pos = str.index(/(#{@quote})|#{@sep}/o)
|
141
157
|
$1 ? 2 : str.index(/#{@quote}/o, pos) ? 2 : 1
|
@@ -147,6 +163,9 @@ class Censive < StringScanner
|
|
147
163
|
def <<(row)
|
148
164
|
@out or return super
|
149
165
|
|
166
|
+
# drop trailing seps, if specified
|
167
|
+
row.pop while row.last.empty? if @drop
|
168
|
+
|
150
169
|
# most compact export format
|
151
170
|
s,q = @sep, @quote
|
152
171
|
out = case @mode
|
@@ -167,9 +186,6 @@ class Censive < StringScanner
|
|
167
186
|
row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
|
168
187
|
end.join(s)
|
169
188
|
|
170
|
-
# drop trailing seps, if specified
|
171
|
-
out.gsub!(/#{s}+\z/,'') if @drop
|
172
|
-
|
173
189
|
# write output, using desired line endings
|
174
190
|
@out << out + @eol
|
175
191
|
end
|
@@ -188,31 +204,29 @@ class Censive < StringScanner
|
|
188
204
|
end
|
189
205
|
end
|
190
206
|
|
191
|
-
|
207
|
+
__END__
|
192
208
|
|
193
|
-
|
194
|
-
|
195
|
-
Censive.writer('out.csv', sep: ',', quote: "'") do |out|
|
196
|
-
Censive.new(data).each do |row|
|
197
|
-
out << row
|
198
|
-
end
|
199
|
-
end
|
209
|
+
# ==[ Playground... ]==
|
200
210
|
|
201
|
-
#
|
202
|
-
# ARGV << "z.csv" if ARGV.empty?
|
203
|
-
#
|
204
|
-
# case 1
|
205
|
-
# when 1
|
206
|
-
# path = ARGV.first
|
207
|
-
# data = File.read(path)
|
208
|
-
# when 2
|
209
|
-
# data = DATA.gets("\n\n").rstrip
|
210
|
-
# end
|
211
|
-
#
|
212
211
|
# STDOUT.sync = true
|
213
212
|
#
|
214
|
-
#
|
213
|
+
# data = File.read('1.csv')
|
215
214
|
#
|
216
|
-
#
|
215
|
+
# Censive.writer('out.csv') do |out|
|
216
|
+
# Censive.new(data, relax: true, excel: true).each do |row|
|
217
|
+
# out << row
|
218
|
+
# end
|
219
|
+
# end
|
217
220
|
#
|
218
|
-
#
|
221
|
+
# __END__
|
222
|
+
|
223
|
+
ARGV << "z.csv" if ARGV.empty?
|
224
|
+
|
225
|
+
path = ARGV.first
|
226
|
+
data = File.read(path)
|
227
|
+
|
228
|
+
csv = Censive.new(data)
|
229
|
+
|
230
|
+
data.size > 1e6 ? csv.parse : csv.parse.each {|cols| p cols }
|
231
|
+
|
232
|
+
csv.stats
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: censive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.8'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Steve Shreeve
|
@@ -10,7 +10,7 @@ bindir: bin
|
|
10
10
|
cert_chain: []
|
11
11
|
date: 2023-01-31 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
|
-
description: A quick and lightweight
|
13
|
+
description: A quick and lightweight CSV handling library for Ruby
|
14
14
|
email: steve.shreeve@gmail.com
|
15
15
|
executables: []
|
16
16
|
extensions: []
|
@@ -43,5 +43,5 @@ requirements: []
|
|
43
43
|
rubygems_version: 3.4.5
|
44
44
|
signing_key:
|
45
45
|
specification_version: 4
|
46
|
-
summary: A quick and lightweight
|
46
|
+
summary: A quick and lightweight CSV handling library for Ruby
|
47
47
|
test_files: []
|