censive 0.6 → 0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/censive.gemspec +3 -3
- data/lib/censive.rb +74 -9
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8a757fa8bbc5ddf364889e4b7feca2001f3784e8d0b2ff70a1b0349691a34aae
|
4
|
+
data.tar.gz: 68dced562eb0dc9b7ad300447091ceb74c04a55201e88cc9fffbe1ba3bbc534d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c48d7e2bd3d1a7baa5fb2fae7b0553de665737849e9a50721f704a1a1f67c758c545dfe53d21f32ce386b20ea21f04c67ee8d765bf20653774b9475ebb60711f
|
7
|
+
data.tar.gz: 411d59006ebcb6a07161186b56f73a8dcc73beeaecbe14e786ad237935c62fd6ef0631483c8f297399098b0dea2387863f7be8c878568e0558804e5bd20b55ee
|
data/README.md
CHANGED
data/censive.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = "censive"
|
5
|
-
s.version = "0.
|
5
|
+
s.version = "0.7"
|
6
6
|
s.author = "Steve Shreeve"
|
7
7
|
s.email = "steve.shreeve@gmail.com"
|
8
|
-
s.summary = "A quick and lightweight
|
9
|
-
s.description = "A quick and lightweight
|
8
|
+
s.summary = "A quick and lightweight CSV handling library for Ruby"
|
9
|
+
s.description = "A quick and lightweight CSV handling library for Ruby"
|
10
10
|
s.homepage = "https://github.com/shreeve/censive"
|
11
11
|
s.license = "MIT"
|
12
12
|
s.files = `git ls-files`.split("\n") - %w[.gitignore]
|
data/lib/censive.rb
CHANGED
@@ -36,20 +36,26 @@ class Censive < StringScanner
|
|
36
36
|
def initialize(str=nil,
|
37
37
|
sep: ',' , # column separator character
|
38
38
|
quote: '"' , # quote character
|
39
|
-
|
40
|
-
mode: :compact, # export mode: compact or full
|
39
|
+
|
41
40
|
drop: false , # enable to drop trailing separators
|
42
|
-
eol: "\n"
|
41
|
+
eol: "\n" , # desired line endings for exports
|
42
|
+
mode: :compact, # export mode: compact or full
|
43
|
+
out: nil , # output IO/file
|
44
|
+
relax: false , # relax parsing of quotes
|
45
|
+
|
46
|
+
**opts # grab bag
|
43
47
|
)
|
44
48
|
super(str || '')
|
45
49
|
reset
|
46
50
|
|
47
51
|
@sep = sep .freeze
|
48
52
|
@quote = quote.freeze
|
49
|
-
|
50
|
-
@mode = mode
|
53
|
+
|
51
54
|
@drop = drop
|
52
55
|
@eol = eol.freeze
|
56
|
+
@mode = mode
|
57
|
+
@out = out
|
58
|
+
@relax = relax
|
53
59
|
|
54
60
|
@es = "" .freeze
|
55
61
|
@cr = "\r" .freeze
|
@@ -92,7 +98,12 @@ class Censive < StringScanner
|
|
92
98
|
when @sep then @flag = @es; next_char; break
|
93
99
|
when @quote then match << @quote
|
94
100
|
when @cr,@lf,nil then break
|
95
|
-
else
|
101
|
+
else
|
102
|
+
if @relax
|
103
|
+
match << @quote + @char
|
104
|
+
else
|
105
|
+
bomb "invalid character after quote"
|
106
|
+
end
|
96
107
|
end
|
97
108
|
end
|
98
109
|
match
|
@@ -110,7 +121,7 @@ class Censive < StringScanner
|
|
110
121
|
end
|
111
122
|
|
112
123
|
def bomb(msg)
|
113
|
-
abort "#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
|
124
|
+
abort "\n#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
|
114
125
|
end
|
115
126
|
|
116
127
|
# ==[ Parser ]==
|
@@ -135,7 +146,7 @@ class Censive < StringScanner
|
|
135
146
|
|
136
147
|
# ==[ Helpers ]==
|
137
148
|
|
138
|
-
# grok returns 2
|
149
|
+
# grok returns: 2 for seps and quotes, 1 for seps only, and 0 for neither
|
139
150
|
def grok(str)
|
140
151
|
if pos = str.index(/(#{@quote})|#{@sep}/o)
|
141
152
|
$1 ? 2 : str.index(/#{@quote}/o, pos) ? 2 : 1
|
@@ -188,8 +199,63 @@ class Censive < StringScanner
|
|
188
199
|
end
|
189
200
|
end
|
190
201
|
|
202
|
+
__END__
|
203
|
+
|
191
204
|
# ==[ Playground... ]==
|
192
205
|
|
206
|
+
STDOUT.sync = true
|
207
|
+
|
208
|
+
require 'fileutils'
|
209
|
+
|
210
|
+
ARGV << "101.csv"
|
211
|
+
|
212
|
+
rand = `LC_ALL=C tr -dc a-zA-Z0-9 < /dev/random | head -c12`
|
213
|
+
|
214
|
+
rows = []
|
215
|
+
cols = []
|
216
|
+
coun = 0
|
217
|
+
full = 0
|
218
|
+
|
219
|
+
ARGV.each do |path|
|
220
|
+
File.file?(path) or next
|
221
|
+
|
222
|
+
print "Processing #{path.inspect}"
|
223
|
+
|
224
|
+
rows.clear
|
225
|
+
cols.clear
|
226
|
+
seen = 0
|
227
|
+
coun += 1
|
228
|
+
|
229
|
+
dest = "#{path}-#{rand}"
|
230
|
+
|
231
|
+
begin
|
232
|
+
Censive.writer(dest) do |file|
|
233
|
+
Censive.new(File.read(path), relax: true).each do |cols|
|
234
|
+
cols.each {|cell| cell && cell.size >= 3 && cell.sub!(/\A="/, '') && cell.sub!(/"\z/, '') }
|
235
|
+
file << cols
|
236
|
+
seen += 1
|
237
|
+
print "." if (seen % 1e5) == 0
|
238
|
+
end
|
239
|
+
end
|
240
|
+
FileUtils.mv(dest, path)
|
241
|
+
full += (seen - 1)
|
242
|
+
puts " (#{seen - 1} rows of data)"
|
243
|
+
rescue
|
244
|
+
puts " - unable to process (#{$!})"
|
245
|
+
FileUtils.rm_f(dest)
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
puts "Processed #{coun} files with a total of #{full} rows of data" if coun > 1
|
250
|
+
|
251
|
+
__END__
|
252
|
+
,"CHUI, LOK HANG "BENNY", => ,"""CHUI, LOK HANG ""BENNY""",
|
253
|
+
|
254
|
+
,"..............."B
|
255
|
+
|
256
|
+
__END__
|
257
|
+
|
258
|
+
|
193
259
|
data = File.read('1.csv')
|
194
260
|
|
195
261
|
Censive.writer('out.csv', sep: ',', quote: "'") do |out|
|
@@ -198,7 +264,6 @@ Censive.writer('out.csv', sep: ',', quote: "'") do |out|
|
|
198
264
|
end
|
199
265
|
end
|
200
266
|
|
201
|
-
#
|
202
267
|
# ARGV << "z.csv" if ARGV.empty?
|
203
268
|
#
|
204
269
|
# case 1
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: censive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.7'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Steve Shreeve
|
@@ -10,7 +10,7 @@ bindir: bin
|
|
10
10
|
cert_chain: []
|
11
11
|
date: 2023-01-31 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
|
-
description: A quick and lightweight
|
13
|
+
description: A quick and lightweight CSV handling library for Ruby
|
14
14
|
email: steve.shreeve@gmail.com
|
15
15
|
executables: []
|
16
16
|
extensions: []
|
@@ -43,5 +43,5 @@ requirements: []
|
|
43
43
|
rubygems_version: 3.4.5
|
44
44
|
signing_key:
|
45
45
|
specification_version: 4
|
46
|
-
summary: A quick and lightweight
|
46
|
+
summary: A quick and lightweight CSV handling library for Ruby
|
47
47
|
test_files: []
|