censive 0.6 → 0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/censive.gemspec +3 -3
- data/lib/censive.rb +77 -63
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 6419efcdc9274ea8bcf7b8527001e33f8bdfea348dfd911686cab36984d507da
|
|
4
|
+
data.tar.gz: 3b59aead54517fd64d7ece3eaa6f459e301e1e48f1ae34772a7128c61fb739f2
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7910c09e76a81ed27870ea52fb6c8aea0316ed213c53a026d98adc64f93349477e6acab0a93b88c6f184ce1d317634ecdca9290d50bff9b117b98bedd3ac7b86
|
|
7
|
+
data.tar.gz: 358ab985947d486b5f486b1f7e9c1f591e3b8e906b9eab59a4ed151e5f5d9652c211f2d2a4ee36f0543227e2ae5e33ba57f1e4c178f6f7e72e05c14d7b46895f
|
data/README.md
CHANGED
data/censive.gemspec
CHANGED
|
@@ -2,11 +2,11 @@
|
|
|
2
2
|
|
|
3
3
|
Gem::Specification.new do |s|
|
|
4
4
|
s.name = "censive"
|
|
5
|
-
s.version = "0.
|
|
5
|
+
s.version = "0.8"
|
|
6
6
|
s.author = "Steve Shreeve"
|
|
7
7
|
s.email = "steve.shreeve@gmail.com"
|
|
8
|
-
s.summary = "A quick and lightweight
|
|
9
|
-
s.description = "A quick and lightweight
|
|
8
|
+
s.summary = "A quick and lightweight CSV handling library for Ruby"
|
|
9
|
+
s.description = "A quick and lightweight CSV handling library for Ruby"
|
|
10
10
|
s.homepage = "https://github.com/shreeve/censive"
|
|
11
11
|
s.license = "MIT"
|
|
12
12
|
s.files = `git ls-files`.split("\n") - %w[.gitignore]
|
data/lib/censive.rb
CHANGED
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
#
|
|
6
6
|
# Author: Steve Shreeve (steve.shreeve@gmail.com)
|
|
7
7
|
# Date: Jan 30, 2023
|
|
8
|
+
#
|
|
9
|
+
# Thanks: Crystal's CSV library, see https://crystal-lang.org/api/1.7.2/CSV.html
|
|
8
10
|
# ==============================================================================
|
|
9
11
|
# The goals are:
|
|
10
12
|
#
|
|
@@ -15,10 +17,7 @@
|
|
|
15
17
|
#
|
|
16
18
|
# 1. Option to support IO streaming
|
|
17
19
|
# 2. Option to strip whitespace
|
|
18
|
-
# 3.
|
|
19
|
-
# 4. Option to force quotes in output
|
|
20
|
-
# 5. Option to allow reading excel CSV (="Text" for cells)
|
|
21
|
-
# 6. Confirm file encodings such as UTF-8, UTF-16, etc.
|
|
20
|
+
# 3. Confirm file encodings such as UTF-8, UTF-16, etc.
|
|
22
21
|
#
|
|
23
22
|
# NOTE: Only getch and scan_until advance strscan's position
|
|
24
23
|
# ==============================================================================
|
|
@@ -36,81 +35,98 @@ class Censive < StringScanner
|
|
|
36
35
|
def initialize(str=nil,
|
|
37
36
|
sep: ',' , # column separator character
|
|
38
37
|
quote: '"' , # quote character
|
|
39
|
-
|
|
40
|
-
mode: :compact, # export mode: compact or full
|
|
38
|
+
|
|
41
39
|
drop: false , # enable to drop trailing separators
|
|
42
|
-
eol: "\n"
|
|
40
|
+
eol: "\n" , # desired line endings for exports
|
|
41
|
+
excel: false , # allow ,="0123" style columns
|
|
42
|
+
mode: :compact, # export mode: compact or full
|
|
43
|
+
out: nil , # output IO/file
|
|
44
|
+
relax: false , # relax parsing of quotes
|
|
45
|
+
|
|
46
|
+
**opts # grab bag
|
|
43
47
|
)
|
|
44
48
|
super(str || '')
|
|
45
49
|
reset
|
|
46
50
|
|
|
47
|
-
@sep
|
|
48
|
-
@quote
|
|
49
|
-
|
|
50
|
-
@
|
|
51
|
-
@
|
|
52
|
-
@
|
|
53
|
-
|
|
54
|
-
@
|
|
55
|
-
|
|
56
|
-
@
|
|
57
|
-
@
|
|
51
|
+
@sep = sep .freeze
|
|
52
|
+
@quote = quote.freeze
|
|
53
|
+
|
|
54
|
+
@drop = drop
|
|
55
|
+
@eol = eol.freeze
|
|
56
|
+
@mode = mode
|
|
57
|
+
@out = out
|
|
58
|
+
@relax = relax
|
|
59
|
+
|
|
60
|
+
@es = "" .freeze
|
|
61
|
+
@cr = "\r" .freeze
|
|
62
|
+
@lf = "\n" .freeze
|
|
63
|
+
@eq = "=" .freeze
|
|
64
|
+
@esc = (@quote * 2).freeze
|
|
65
|
+
|
|
66
|
+
@tokens = [@sep,@quote,@cr,@lf,@es,nil]
|
|
67
|
+
@tokens << @eq if excel # See http://bit.ly/3Y7jIvc
|
|
58
68
|
end
|
|
59
69
|
|
|
60
70
|
def reset(str=nil)
|
|
61
71
|
self.string = str if str
|
|
62
72
|
super()
|
|
63
|
-
@char
|
|
64
|
-
@flag
|
|
73
|
+
@char = peek(1)
|
|
74
|
+
@flag = nil
|
|
65
75
|
|
|
66
|
-
@rows
|
|
67
|
-
@cols
|
|
76
|
+
@rows = nil
|
|
77
|
+
@cols = @cells = 0
|
|
68
78
|
end
|
|
69
79
|
|
|
70
80
|
# ==[ Lexer ]==
|
|
71
81
|
|
|
72
82
|
def next_char
|
|
73
83
|
getch
|
|
74
|
-
@char =
|
|
84
|
+
@char = peek(1)
|
|
75
85
|
end
|
|
76
86
|
|
|
77
87
|
def next_token
|
|
78
88
|
case @flag
|
|
79
|
-
when @es then @flag = nil; [@cr,@lf,nil].include?(@char) and return @es
|
|
89
|
+
when @es then @flag = nil; [@cr,@lf,@es,nil].include?(@char) and return @es
|
|
80
90
|
when @cr then @flag = nil; next_char == @lf and next_char
|
|
81
91
|
when @lf then @flag = nil; next_char
|
|
82
92
|
end if @flag
|
|
83
93
|
|
|
84
|
-
if
|
|
94
|
+
if @tokens.include?(@char)
|
|
85
95
|
case @char
|
|
86
|
-
when @quote #
|
|
96
|
+
when @quote, @eq # consume quoted cell
|
|
97
|
+
@char == @eq and next_char # excel mode: allows ,="012",
|
|
87
98
|
match = ""
|
|
88
99
|
while true
|
|
89
100
|
getch # consume the quote (optimized by not calling next_char)
|
|
90
101
|
match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
|
|
91
102
|
case next_char
|
|
92
|
-
when @sep
|
|
93
|
-
when @quote
|
|
94
|
-
when @cr,@lf,nil then break
|
|
95
|
-
else
|
|
103
|
+
when @sep then @flag = @es; next_char; break
|
|
104
|
+
when @quote then match << @quote
|
|
105
|
+
when @cr,@lf,@es,nil then break
|
|
106
|
+
else
|
|
107
|
+
if @relax
|
|
108
|
+
match << @quote + @char
|
|
109
|
+
else
|
|
110
|
+
bomb "invalid character after quote"
|
|
111
|
+
end
|
|
96
112
|
end
|
|
97
113
|
end
|
|
98
114
|
match
|
|
99
|
-
when @sep
|
|
100
|
-
when @cr
|
|
101
|
-
when @lf
|
|
102
|
-
when nil
|
|
115
|
+
when @sep then @flag = @es; next_char; @es
|
|
116
|
+
when @cr then @flag = @cr; nil
|
|
117
|
+
when @lf then @flag = @lf; nil
|
|
118
|
+
when @es,nil then nil
|
|
103
119
|
end
|
|
104
|
-
else #
|
|
120
|
+
else # consume unquoted cell
|
|
105
121
|
match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
|
|
106
|
-
@char =
|
|
122
|
+
@char = peek(1)
|
|
107
123
|
@char == @sep and @flag = @es and next_char
|
|
108
124
|
match
|
|
109
125
|
end
|
|
110
126
|
end
|
|
111
127
|
|
|
112
128
|
def bomb(msg)
|
|
113
|
-
abort "#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
|
|
129
|
+
abort "\n#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
|
|
114
130
|
end
|
|
115
131
|
|
|
116
132
|
# ==[ Parser ]==
|
|
@@ -135,7 +151,7 @@ class Censive < StringScanner
|
|
|
135
151
|
|
|
136
152
|
# ==[ Helpers ]==
|
|
137
153
|
|
|
138
|
-
# grok returns 2
|
|
154
|
+
# grok returns: 2 for seps and quotes, 1 for seps only, and 0 for neither
|
|
139
155
|
def grok(str)
|
|
140
156
|
if pos = str.index(/(#{@quote})|#{@sep}/o)
|
|
141
157
|
$1 ? 2 : str.index(/#{@quote}/o, pos) ? 2 : 1
|
|
@@ -147,6 +163,9 @@ class Censive < StringScanner
|
|
|
147
163
|
def <<(row)
|
|
148
164
|
@out or return super
|
|
149
165
|
|
|
166
|
+
# drop trailing seps, if specified
|
|
167
|
+
row.pop while row.last.empty? if @drop
|
|
168
|
+
|
|
150
169
|
# most compact export format
|
|
151
170
|
s,q = @sep, @quote
|
|
152
171
|
out = case @mode
|
|
@@ -167,9 +186,6 @@ class Censive < StringScanner
|
|
|
167
186
|
row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
|
|
168
187
|
end.join(s)
|
|
169
188
|
|
|
170
|
-
# drop trailing seps, if specified
|
|
171
|
-
out.gsub!(/#{s}+\z/,'') if @drop
|
|
172
|
-
|
|
173
189
|
# write output, using desired line endings
|
|
174
190
|
@out << out + @eol
|
|
175
191
|
end
|
|
@@ -188,31 +204,29 @@ class Censive < StringScanner
|
|
|
188
204
|
end
|
|
189
205
|
end
|
|
190
206
|
|
|
191
|
-
|
|
207
|
+
__END__
|
|
192
208
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
Censive.writer('out.csv', sep: ',', quote: "'") do |out|
|
|
196
|
-
Censive.new(data).each do |row|
|
|
197
|
-
out << row
|
|
198
|
-
end
|
|
199
|
-
end
|
|
209
|
+
# ==[ Playground... ]==
|
|
200
210
|
|
|
201
|
-
#
|
|
202
|
-
# ARGV << "z.csv" if ARGV.empty?
|
|
203
|
-
#
|
|
204
|
-
# case 1
|
|
205
|
-
# when 1
|
|
206
|
-
# path = ARGV.first
|
|
207
|
-
# data = File.read(path)
|
|
208
|
-
# when 2
|
|
209
|
-
# data = DATA.gets("\n\n").rstrip
|
|
210
|
-
# end
|
|
211
|
-
#
|
|
212
211
|
# STDOUT.sync = true
|
|
213
212
|
#
|
|
214
|
-
#
|
|
213
|
+
# data = File.read('1.csv')
|
|
215
214
|
#
|
|
216
|
-
#
|
|
215
|
+
# Censive.writer('out.csv') do |out|
|
|
216
|
+
# Censive.new(data, relax: true, excel: true).each do |row|
|
|
217
|
+
# out << row
|
|
218
|
+
# end
|
|
219
|
+
# end
|
|
217
220
|
#
|
|
218
|
-
#
|
|
221
|
+
# __END__
|
|
222
|
+
|
|
223
|
+
ARGV << "z.csv" if ARGV.empty?
|
|
224
|
+
|
|
225
|
+
path = ARGV.first
|
|
226
|
+
data = File.read(path)
|
|
227
|
+
|
|
228
|
+
csv = Censive.new(data)
|
|
229
|
+
|
|
230
|
+
data.size > 1e6 ? csv.parse : csv.parse.each {|cols| p cols }
|
|
231
|
+
|
|
232
|
+
csv.stats
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: censive
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: '0.
|
|
4
|
+
version: '0.8'
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Steve Shreeve
|
|
@@ -10,7 +10,7 @@ bindir: bin
|
|
|
10
10
|
cert_chain: []
|
|
11
11
|
date: 2023-01-31 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
|
-
description: A quick and lightweight
|
|
13
|
+
description: A quick and lightweight CSV handling library for Ruby
|
|
14
14
|
email: steve.shreeve@gmail.com
|
|
15
15
|
executables: []
|
|
16
16
|
extensions: []
|
|
@@ -43,5 +43,5 @@ requirements: []
|
|
|
43
43
|
rubygems_version: 3.4.5
|
|
44
44
|
signing_key:
|
|
45
45
|
specification_version: 4
|
|
46
|
-
summary: A quick and lightweight
|
|
46
|
+
summary: A quick and lightweight CSV handling library for Ruby
|
|
47
47
|
test_files: []
|