censive 0.9 → 0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f4b06c1c42b5f813f8901c4e7240cdd43df1ccc22cf87327dc3ed7d850720eb4
|
4
|
+
data.tar.gz: 97ab27b79eead81517fa28a4c51923fa02ec2fa95922f6f61dc509c7a4890b2e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a2f297ac516f5e01510a9ceb90cdb2cc1e782ff97a4f67515d73f6d56d8512cd4d9cbb5d04425bcdb8a7a5cdb63aeb2835e7bed2a76dcc149dae0bd63c4cc17b
|
7
|
+
data.tar.gz: 85762c69bc669db5a48f0e3b58e4319afdc9f1765cc18cd2b6c9501aaaccf3e41dddb58d3654bc4c76242632eaf8988f348bb652c6c40105e0373b8afdf463d3
|
data/README.md
CHANGED
@@ -2,7 +2,58 @@
|
|
2
2
|
|
3
3
|
A quick and lightweight CSV handling library for Ruby
|
4
4
|
|
5
|
-
##
|
5
|
+
## Example
|
6
|
+
|
7
|
+
```ruby
|
8
|
+
#!/usr/bin/env ruby
|
9
|
+
|
10
|
+
STDOUT.sync = true
|
11
|
+
|
12
|
+
require 'censive'
|
13
|
+
require 'fileutils'
|
14
|
+
|
15
|
+
abort "usage: #{File.basename($0)} <files>" if ARGV.empty?
|
16
|
+
|
17
|
+
rand = `LC_ALL=C tr -dc a-zA-Z0-9 < /dev/random | head -c12`
|
18
|
+
|
19
|
+
rows = []
|
20
|
+
cols = []
|
21
|
+
coun = 0
|
22
|
+
full = 0
|
23
|
+
|
24
|
+
ARGV.each do |path|
|
25
|
+
File.file?(path) or next
|
26
|
+
|
27
|
+
print "Processing #{path.inspect}"
|
28
|
+
|
29
|
+
rows.clear
|
30
|
+
cols.clear
|
31
|
+
seen = 0
|
32
|
+
coun += 1
|
33
|
+
|
34
|
+
dest = "#{path}-#{rand}"
|
35
|
+
|
36
|
+
begin
|
37
|
+
Censive.writer(dest) do |file|
|
38
|
+
Censive.reader(path, excel: true, relax: true).each do |cols|
|
39
|
+
file << cols
|
40
|
+
seen += 1
|
41
|
+
print "." if (seen % 1e5) == 0 # give a status update every so often
|
42
|
+
end
|
43
|
+
end
|
44
|
+
FileUtils.mv(dest, path)
|
45
|
+
full += (seen - 1)
|
46
|
+
puts " (#{seen - 1} rows of data)"
|
47
|
+
rescue
|
48
|
+
puts " - unable to process (#{$!})"
|
49
|
+
FileUtils.rm_f(dest)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
puts "Processed #{coun} files with a total of #{full} rows of data" if coun > 1
|
54
|
+
```
|
55
|
+
|
56
|
+
## Convert a CSV file to a TSV file
|
6
57
|
|
7
58
|
```ruby
|
8
59
|
require 'censive'
|
data/censive.gemspec
CHANGED
data/lib/censive.rb
CHANGED
@@ -28,37 +28,36 @@ require 'strscan'
|
|
28
28
|
|
29
29
|
class Censive < StringScanner
|
30
30
|
|
31
|
-
def self.writer(
|
32
|
-
|
33
|
-
|
31
|
+
def self.writer(obj=$stdout, **opts, &code)
|
32
|
+
case obj
|
33
|
+
when String then File.open(path, 'w') {|file| yield new(out: obj, **opts, &code) }
|
34
|
+
when IO then new(out: obj, **opts, &code)
|
35
|
+
else abort "#{File.basename($0)}: invalid #{obj.class} object in writer"
|
34
36
|
end
|
35
37
|
end
|
36
38
|
|
37
39
|
def initialize(str=nil,
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
drop: false , # enable to drop trailing separators
|
42
|
-
eol: "\n" , # desired line endings for exports
|
43
|
-
excel: false , # literals (="01"), formulas (=A1 + B2), see http://bit.ly/3Y7jIvc
|
40
|
+
drop: false , # drop trailing empty fields?
|
41
|
+
eol: "\n" , # line endings for exports
|
42
|
+
excel: false , # literals(="01") formulas(=A1 + B2); http://bit.ly/3Y7jIvc
|
44
43
|
mode: :compact, # export mode: compact or full
|
45
|
-
out: nil , # output
|
46
|
-
|
47
|
-
|
44
|
+
out: nil , # output stream, needs to respond to <<
|
45
|
+
quote: '"' , # quote character
|
46
|
+
relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
|
47
|
+
sep: ',' , # column separator character
|
48
48
|
**opts # grab bag
|
49
49
|
)
|
50
50
|
super(str || '')
|
51
51
|
reset
|
52
52
|
|
53
|
-
@sep = sep .freeze
|
54
|
-
@quote = quote.freeze
|
55
|
-
|
56
53
|
@drop = drop
|
57
|
-
@eol = eol.freeze
|
54
|
+
@eol = eol .freeze #!# TODO: are the '.freeze' statements helpful?
|
58
55
|
@excel = excel
|
59
56
|
@mode = mode
|
60
57
|
@out = out
|
58
|
+
@quote = quote.freeze
|
61
59
|
@relax = relax
|
60
|
+
@sep = sep .freeze
|
62
61
|
|
63
62
|
@es = "" .freeze
|
64
63
|
@cr = "\r" .freeze
|
@@ -96,7 +95,7 @@ class Censive < StringScanner
|
|
96
95
|
else @flag = nil
|
97
96
|
end if @flag
|
98
97
|
|
99
|
-
#
|
98
|
+
# Excel literals ="0123" and formulas =A1 + B2 (see http://bit.ly/3Y7jIvc)
|
100
99
|
if @excel && @char == @eq
|
101
100
|
@flag = @eq
|
102
101
|
next_char
|
@@ -107,18 +106,13 @@ class Censive < StringScanner
|
|
107
106
|
when @quote # consume quoted cell
|
108
107
|
match = ""
|
109
108
|
while true
|
110
|
-
getch #
|
109
|
+
getch # move past the quote that got us here
|
111
110
|
match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
|
112
111
|
case next_char
|
113
112
|
when @sep then @flag = @es; next_char; break
|
114
113
|
when @quote then match << @quote
|
115
114
|
when @cr,@lf,@es,nil then break
|
116
|
-
else
|
117
|
-
if @relax
|
118
|
-
match << @quote + @char
|
119
|
-
else
|
120
|
-
bomb "invalid character after quote"
|
121
|
-
end
|
115
|
+
else @relax ? match << (@quote + @char) : bomb("invalid character after quote")
|
122
116
|
end
|
123
117
|
end
|
124
118
|
match
|
@@ -129,7 +123,7 @@ class Censive < StringScanner
|
|
129
123
|
end
|
130
124
|
else # consume unquoted cell
|
131
125
|
match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
|
132
|
-
match = @eq + match
|
126
|
+
match = @eq + match and @flag = nil if @flag == @eq
|
133
127
|
@char = peek(1)
|
134
128
|
@char == @sep and @flag = @es and next_char
|
135
129
|
match
|
@@ -143,12 +137,12 @@ class Censive < StringScanner
|
|
143
137
|
# ==[ Parser ]==
|
144
138
|
|
145
139
|
def parse
|
146
|
-
@rows
|
140
|
+
@rows = []
|
147
141
|
while row = next_row
|
148
142
|
@rows << row
|
149
|
-
|
150
|
-
@cols =
|
151
|
-
@cells +=
|
143
|
+
count = row.size
|
144
|
+
@cols = count if count > @cols
|
145
|
+
@cells += count
|
152
146
|
end
|
153
147
|
@rows
|
154
148
|
end
|
@@ -171,13 +165,15 @@ class Censive < StringScanner
|
|
171
165
|
end
|
172
166
|
end
|
173
167
|
|
168
|
+
# output a row
|
174
169
|
def <<(row)
|
175
170
|
@out or return super
|
176
171
|
|
177
|
-
# drop trailing
|
172
|
+
# drop trailing empty columns
|
178
173
|
row.pop while row.last.empty? if @drop
|
179
174
|
|
180
|
-
|
175
|
+
#!# FIXME: Excel output needs to protect 0-leading numbers
|
176
|
+
|
181
177
|
s,q = @sep, @quote
|
182
178
|
out = case @mode
|
183
179
|
when :compact
|
@@ -201,7 +197,7 @@ class Censive < StringScanner
|
|
201
197
|
row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
|
202
198
|
end.join(s)
|
203
199
|
|
204
|
-
#
|
200
|
+
# add line ending
|
205
201
|
@out << out + @eol
|
206
202
|
end
|
207
203
|
|
@@ -210,6 +206,11 @@ class Censive < StringScanner
|
|
210
206
|
@rows.each {|row| yield row }
|
211
207
|
end
|
212
208
|
|
209
|
+
def export(...)
|
210
|
+
out = self.class.writer(...)
|
211
|
+
each {|row| out << row }
|
212
|
+
end
|
213
|
+
|
213
214
|
def stats
|
214
215
|
wide = string.size.to_s.size
|
215
216
|
puts "%#{wide}d rows" % @rows.size
|
@@ -219,9 +220,35 @@ class Censive < StringScanner
|
|
219
220
|
end
|
220
221
|
end
|
221
222
|
|
223
|
+
# ==[ Command line ]==
|
224
|
+
|
225
|
+
if __FILE__ == $0
|
226
|
+
raw = DATA.gets("\n\n").chomp
|
227
|
+
csv = Censive.new(raw, excel: true)
|
228
|
+
csv.export # (sep: "\t", excel: true)
|
229
|
+
end
|
230
|
+
|
222
231
|
__END__
|
232
|
+
Name,Age,Shoe
|
233
|
+
Alice,27,5
|
234
|
+
Bob,33,10 1/2
|
235
|
+
Charlie or "Chuck",=B2 + B3,9
|
236
|
+
"Doug E Fresh",="007",10
|
237
|
+
Subtotal,=sum(B2:B5),="01234"
|
238
|
+
|
223
239
|
|
224
|
-
|
240
|
+
|
241
|
+
|
242
|
+
path = '../test/a-uses-tabs-and-single-quotes-and-no-trailing-newline.tsv'
|
243
|
+
data = File.read(path)
|
244
|
+
|
245
|
+
out = Censive.writer
|
246
|
+
|
247
|
+
Censive.new(data, sep: "\t", quote: "'").each do |row|
|
248
|
+
p row
|
249
|
+
end
|
250
|
+
|
251
|
+
Censive.reader(path, sep: "\t", quote: "'").each {|r| p r}
|
225
252
|
|
226
253
|
# STDOUT.sync = true
|
227
254
|
#
|
@@ -234,14 +261,14 @@ __END__
|
|
234
261
|
# end
|
235
262
|
#
|
236
263
|
# __END__
|
237
|
-
|
238
|
-
ARGV << "z.csv" if ARGV.empty?
|
239
|
-
|
240
|
-
path = ARGV.first
|
241
|
-
data = File.read(path)
|
242
|
-
|
243
|
-
csv = Censive.new(data)
|
244
|
-
|
245
|
-
data.size > 1e6 ? csv.parse : csv.parse.each {|cols| p cols }
|
246
|
-
|
247
|
-
csv.stats
|
264
|
+
#
|
265
|
+
# ARGV << "z.csv" if ARGV.empty?
|
266
|
+
#
|
267
|
+
# path = ARGV.first
|
268
|
+
# data = File.read(path)
|
269
|
+
#
|
270
|
+
# csv = Censive.new(data)
|
271
|
+
#
|
272
|
+
# data.size > 1e6 ? csv.parse : csv.parse.each {|cols| p cols }
|
273
|
+
#
|
274
|
+
# csv.stats
|
File without changes
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: censive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.10'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Steve Shreeve
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-02-02 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A quick and lightweight CSV handling library for Ruby
|
14
14
|
email: steve.shreeve@gmail.com
|
@@ -20,7 +20,7 @@ files:
|
|
20
20
|
- README.md
|
21
21
|
- censive.gemspec
|
22
22
|
- lib/censive.rb
|
23
|
-
- test/a-uses-tabs-and-single-quotes-and-no-trailing-newline.
|
23
|
+
- test/a-uses-tabs-and-single-quotes-and-no-trailing-newline.tsv
|
24
24
|
homepage: https://github.com/shreeve/censive
|
25
25
|
licenses:
|
26
26
|
- MIT
|