censive 0.6 → 0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/censive.gemspec +3 -3
- data/lib/censive.rb +74 -9
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8a757fa8bbc5ddf364889e4b7feca2001f3784e8d0b2ff70a1b0349691a34aae
|
4
|
+
data.tar.gz: 68dced562eb0dc9b7ad300447091ceb74c04a55201e88cc9fffbe1ba3bbc534d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c48d7e2bd3d1a7baa5fb2fae7b0553de665737849e9a50721f704a1a1f67c758c545dfe53d21f32ce386b20ea21f04c67ee8d765bf20653774b9475ebb60711f
|
7
|
+
data.tar.gz: 411d59006ebcb6a07161186b56f73a8dcc73beeaecbe14e786ad237935c62fd6ef0631483c8f297399098b0dea2387863f7be8c878568e0558804e5bd20b55ee
|
data/README.md
CHANGED
data/censive.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = "censive"
|
5
|
-
s.version = "0.
|
5
|
+
s.version = "0.7"
|
6
6
|
s.author = "Steve Shreeve"
|
7
7
|
s.email = "steve.shreeve@gmail.com"
|
8
|
-
s.summary = "A quick and lightweight
|
9
|
-
s.description = "A quick and lightweight
|
8
|
+
s.summary = "A quick and lightweight CSV handling library for Ruby"
|
9
|
+
s.description = "A quick and lightweight CSV handling library for Ruby"
|
10
10
|
s.homepage = "https://github.com/shreeve/censive"
|
11
11
|
s.license = "MIT"
|
12
12
|
s.files = `git ls-files`.split("\n") - %w[.gitignore]
|
data/lib/censive.rb
CHANGED
@@ -36,20 +36,26 @@ class Censive < StringScanner
|
|
36
36
|
def initialize(str=nil,
|
37
37
|
sep: ',' , # column separator character
|
38
38
|
quote: '"' , # quote character
|
39
|
-
|
40
|
-
mode: :compact, # export mode: compact or full
|
39
|
+
|
41
40
|
drop: false , # enable to drop trailing separators
|
42
|
-
eol: "\n"
|
41
|
+
eol: "\n" , # desired line endings for exports
|
42
|
+
mode: :compact, # export mode: compact or full
|
43
|
+
out: nil , # output IO/file
|
44
|
+
relax: false , # relax parsing of quotes
|
45
|
+
|
46
|
+
**opts # grab bag
|
43
47
|
)
|
44
48
|
super(str || '')
|
45
49
|
reset
|
46
50
|
|
47
51
|
@sep = sep .freeze
|
48
52
|
@quote = quote.freeze
|
49
|
-
|
50
|
-
@mode = mode
|
53
|
+
|
51
54
|
@drop = drop
|
52
55
|
@eol = eol.freeze
|
56
|
+
@mode = mode
|
57
|
+
@out = out
|
58
|
+
@relax = relax
|
53
59
|
|
54
60
|
@es = "" .freeze
|
55
61
|
@cr = "\r" .freeze
|
@@ -92,7 +98,12 @@ class Censive < StringScanner
|
|
92
98
|
when @sep then @flag = @es; next_char; break
|
93
99
|
when @quote then match << @quote
|
94
100
|
when @cr,@lf,nil then break
|
95
|
-
else
|
101
|
+
else
|
102
|
+
if @relax
|
103
|
+
match << @quote + @char
|
104
|
+
else
|
105
|
+
bomb "invalid character after quote"
|
106
|
+
end
|
96
107
|
end
|
97
108
|
end
|
98
109
|
match
|
@@ -110,7 +121,7 @@ class Censive < StringScanner
|
|
110
121
|
end
|
111
122
|
|
112
123
|
def bomb(msg)
|
113
|
-
abort "#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
|
124
|
+
abort "\n#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
|
114
125
|
end
|
115
126
|
|
116
127
|
# ==[ Parser ]==
|
@@ -135,7 +146,7 @@ class Censive < StringScanner
|
|
135
146
|
|
136
147
|
# ==[ Helpers ]==
|
137
148
|
|
138
|
-
# grok returns 2
|
149
|
+
# grok returns: 2 for seps and quotes, 1 for seps only, and 0 for neither
|
139
150
|
def grok(str)
|
140
151
|
if pos = str.index(/(#{@quote})|#{@sep}/o)
|
141
152
|
$1 ? 2 : str.index(/#{@quote}/o, pos) ? 2 : 1
|
@@ -188,8 +199,63 @@ class Censive < StringScanner
|
|
188
199
|
end
|
189
200
|
end
|
190
201
|
|
202
|
+
__END__
|
203
|
+
|
191
204
|
# ==[ Playground... ]==
|
192
205
|
|
206
|
+
STDOUT.sync = true
|
207
|
+
|
208
|
+
require 'fileutils'
|
209
|
+
|
210
|
+
ARGV << "101.csv"
|
211
|
+
|
212
|
+
rand = `LC_ALL=C tr -dc a-zA-Z0-9 < /dev/random | head -c12`
|
213
|
+
|
214
|
+
rows = []
|
215
|
+
cols = []
|
216
|
+
coun = 0
|
217
|
+
full = 0
|
218
|
+
|
219
|
+
ARGV.each do |path|
|
220
|
+
File.file?(path) or next
|
221
|
+
|
222
|
+
print "Processing #{path.inspect}"
|
223
|
+
|
224
|
+
rows.clear
|
225
|
+
cols.clear
|
226
|
+
seen = 0
|
227
|
+
coun += 1
|
228
|
+
|
229
|
+
dest = "#{path}-#{rand}"
|
230
|
+
|
231
|
+
begin
|
232
|
+
Censive.writer(dest) do |file|
|
233
|
+
Censive.new(File.read(path), relax: true).each do |cols|
|
234
|
+
cols.each {|cell| cell && cell.size >= 3 && cell.sub!(/\A="/, '') && cell.sub!(/"\z/, '') }
|
235
|
+
file << cols
|
236
|
+
seen += 1
|
237
|
+
print "." if (seen % 1e5) == 0
|
238
|
+
end
|
239
|
+
end
|
240
|
+
FileUtils.mv(dest, path)
|
241
|
+
full += (seen - 1)
|
242
|
+
puts " (#{seen - 1} rows of data)"
|
243
|
+
rescue
|
244
|
+
puts " - unable to process (#{$!})"
|
245
|
+
FileUtils.rm_f(dest)
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
puts "Processed #{coun} files with a total of #{full} rows of data" if coun > 1
|
250
|
+
|
251
|
+
__END__
|
252
|
+
,"CHUI, LOK HANG "BENNY", => ,"""CHUI, LOK HANG ""BENNY""",
|
253
|
+
|
254
|
+
,"..............."B
|
255
|
+
|
256
|
+
__END__
|
257
|
+
|
258
|
+
|
193
259
|
data = File.read('1.csv')
|
194
260
|
|
195
261
|
Censive.writer('out.csv', sep: ',', quote: "'") do |out|
|
@@ -198,7 +264,6 @@ Censive.writer('out.csv', sep: ',', quote: "'") do |out|
|
|
198
264
|
end
|
199
265
|
end
|
200
266
|
|
201
|
-
#
|
202
267
|
# ARGV << "z.csv" if ARGV.empty?
|
203
268
|
#
|
204
269
|
# case 1
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: censive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.7'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Steve Shreeve
|
@@ -10,7 +10,7 @@ bindir: bin
|
|
10
10
|
cert_chain: []
|
11
11
|
date: 2023-01-31 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
|
-
description: A quick and lightweight
|
13
|
+
description: A quick and lightweight CSV handling library for Ruby
|
14
14
|
email: steve.shreeve@gmail.com
|
15
15
|
executables: []
|
16
16
|
extensions: []
|
@@ -43,5 +43,5 @@ requirements: []
|
|
43
43
|
rubygems_version: 3.4.5
|
44
44
|
signing_key:
|
45
45
|
specification_version: 4
|
46
|
-
summary: A quick and lightweight
|
46
|
+
summary: A quick and lightweight CSV handling library for Ruby
|
47
47
|
test_files: []
|