censive 0.5 → 0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/censive.gemspec +4 -4
- data/lib/censive.rb +88 -12
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 8a757fa8bbc5ddf364889e4b7feca2001f3784e8d0b2ff70a1b0349691a34aae
|
|
4
|
+
data.tar.gz: 68dced562eb0dc9b7ad300447091ceb74c04a55201e88cc9fffbe1ba3bbc534d
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c48d7e2bd3d1a7baa5fb2fae7b0553de665737849e9a50721f704a1a1f67c758c545dfe53d21f32ce386b20ea21f04c67ee8d765bf20653774b9475ebb60711f
|
|
7
|
+
data.tar.gz: 411d59006ebcb6a07161186b56f73a8dcc73beeaecbe14e786ad237935c62fd6ef0631483c8f297399098b0dea2387863f7be8c878568e0558804e5bd20b55ee
|
data/README.md
CHANGED
data/censive.gemspec
CHANGED
|
@@ -2,13 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
Gem::Specification.new do |s|
|
|
4
4
|
s.name = "censive"
|
|
5
|
-
s.version = "0.
|
|
5
|
+
s.version = "0.7"
|
|
6
6
|
s.author = "Steve Shreeve"
|
|
7
7
|
s.email = "steve.shreeve@gmail.com"
|
|
8
|
-
s.summary = "A quick and lightweight
|
|
9
|
-
s.description = "A quick and lightweight
|
|
8
|
+
s.summary = "A quick and lightweight CSV handling library for Ruby"
|
|
9
|
+
s.description = "A quick and lightweight CSV handling library for Ruby"
|
|
10
10
|
s.homepage = "https://github.com/shreeve/censive"
|
|
11
11
|
s.license = "MIT"
|
|
12
12
|
s.files = `git ls-files`.split("\n") - %w[.gitignore]
|
|
13
|
-
s.executables = `cd bin && git ls-files .`.split("\n")
|
|
13
|
+
s.executables = `(cd bin 2>&1) > /dev/null && git ls-files .`.split("\n")
|
|
14
14
|
end
|
data/lib/censive.rb
CHANGED
|
@@ -33,17 +33,34 @@ class Censive < StringScanner
|
|
|
33
33
|
end
|
|
34
34
|
end
|
|
35
35
|
|
|
36
|
-
def initialize(str=nil,
|
|
36
|
+
def initialize(str=nil,
|
|
37
|
+
sep: ',' , # column separator character
|
|
38
|
+
quote: '"' , # quote character
|
|
39
|
+
|
|
40
|
+
drop: false , # enable to drop trailing separators
|
|
41
|
+
eol: "\n" , # desired line endings for exports
|
|
42
|
+
mode: :compact, # export mode: compact or full
|
|
43
|
+
out: nil , # output IO/file
|
|
44
|
+
relax: false , # relax parsing of quotes
|
|
45
|
+
|
|
46
|
+
**opts # grab bag
|
|
47
|
+
)
|
|
37
48
|
super(str || '')
|
|
38
49
|
reset
|
|
50
|
+
|
|
39
51
|
@sep = sep .freeze
|
|
40
52
|
@quote = quote.freeze
|
|
53
|
+
|
|
54
|
+
@drop = drop
|
|
55
|
+
@eol = eol.freeze
|
|
56
|
+
@mode = mode
|
|
57
|
+
@out = out
|
|
58
|
+
@relax = relax
|
|
59
|
+
|
|
41
60
|
@es = "" .freeze
|
|
42
61
|
@cr = "\r" .freeze
|
|
43
62
|
@lf = "\n" .freeze
|
|
44
|
-
@out = out
|
|
45
63
|
@esc = (@quote * 2).freeze
|
|
46
|
-
@mode = mode
|
|
47
64
|
end
|
|
48
65
|
|
|
49
66
|
def reset(str=nil)
|
|
@@ -81,7 +98,12 @@ class Censive < StringScanner
|
|
|
81
98
|
when @sep then @flag = @es; next_char; break
|
|
82
99
|
when @quote then match << @quote
|
|
83
100
|
when @cr,@lf,nil then break
|
|
84
|
-
else
|
|
101
|
+
else
|
|
102
|
+
if @relax
|
|
103
|
+
match << @quote + @char
|
|
104
|
+
else
|
|
105
|
+
bomb "invalid character after quote"
|
|
106
|
+
end
|
|
85
107
|
end
|
|
86
108
|
end
|
|
87
109
|
match
|
|
@@ -99,7 +121,7 @@ class Censive < StringScanner
|
|
|
99
121
|
end
|
|
100
122
|
|
|
101
123
|
def bomb(msg)
|
|
102
|
-
abort "#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
|
|
124
|
+
abort "\n#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
|
|
103
125
|
end
|
|
104
126
|
|
|
105
127
|
# ==[ Parser ]==
|
|
@@ -124,7 +146,7 @@ class Censive < StringScanner
|
|
|
124
146
|
|
|
125
147
|
# ==[ Helpers ]==
|
|
126
148
|
|
|
127
|
-
# grok returns 2
|
|
149
|
+
# grok returns: 2 for seps and quotes, 1 for seps only, and 0 for neither
|
|
128
150
|
def grok(str)
|
|
129
151
|
if pos = str.index(/(#{@quote})|#{@sep}/o)
|
|
130
152
|
$1 ? 2 : str.index(/#{@quote}/o, pos) ? 2 : 1
|
|
@@ -156,11 +178,11 @@ class Censive < StringScanner
|
|
|
156
178
|
row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
|
|
157
179
|
end.join(s)
|
|
158
180
|
|
|
159
|
-
|
|
160
|
-
|
|
181
|
+
# drop trailing seps, if specified
|
|
182
|
+
out.gsub!(/#{s}+\z/,'') if @drop
|
|
161
183
|
|
|
162
|
-
|
|
163
|
-
@out << out + @
|
|
184
|
+
# write output, using desired line endings
|
|
185
|
+
@out << out + @eol
|
|
164
186
|
end
|
|
165
187
|
|
|
166
188
|
def each
|
|
@@ -177,17 +199,71 @@ class Censive < StringScanner
|
|
|
177
199
|
end
|
|
178
200
|
end
|
|
179
201
|
|
|
202
|
+
__END__
|
|
203
|
+
|
|
180
204
|
# ==[ Playground... ]==
|
|
181
205
|
|
|
206
|
+
STDOUT.sync = true
|
|
207
|
+
|
|
208
|
+
require 'fileutils'
|
|
209
|
+
|
|
210
|
+
ARGV << "101.csv"
|
|
211
|
+
|
|
212
|
+
rand = `LC_ALL=C tr -dc a-zA-Z0-9 < /dev/random | head -c12`
|
|
213
|
+
|
|
214
|
+
rows = []
|
|
215
|
+
cols = []
|
|
216
|
+
coun = 0
|
|
217
|
+
full = 0
|
|
218
|
+
|
|
219
|
+
ARGV.each do |path|
|
|
220
|
+
File.file?(path) or next
|
|
221
|
+
|
|
222
|
+
print "Processing #{path.inspect}"
|
|
223
|
+
|
|
224
|
+
rows.clear
|
|
225
|
+
cols.clear
|
|
226
|
+
seen = 0
|
|
227
|
+
coun += 1
|
|
228
|
+
|
|
229
|
+
dest = "#{path}-#{rand}"
|
|
230
|
+
|
|
231
|
+
begin
|
|
232
|
+
Censive.writer(dest) do |file|
|
|
233
|
+
Censive.new(File.read(path), relax: true).each do |cols|
|
|
234
|
+
cols.each {|cell| cell && cell.size >= 3 && cell.sub!(/\A="/, '') && cell.sub!(/"\z/, '') }
|
|
235
|
+
file << cols
|
|
236
|
+
seen += 1
|
|
237
|
+
print "." if (seen % 1e5) == 0
|
|
238
|
+
end
|
|
239
|
+
end
|
|
240
|
+
FileUtils.mv(dest, path)
|
|
241
|
+
full += (seen - 1)
|
|
242
|
+
puts " (#{seen - 1} rows of data)"
|
|
243
|
+
rescue
|
|
244
|
+
puts " - unable to process (#{$!})"
|
|
245
|
+
FileUtils.rm_f(dest)
|
|
246
|
+
end
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
puts "Processed #{coun} files with a total of #{full} rows of data" if coun > 1
|
|
250
|
+
|
|
251
|
+
__END__
|
|
252
|
+
,"CHUI, LOK HANG "BENNY", => ,"""CHUI, LOK HANG ""BENNY""",
|
|
253
|
+
|
|
254
|
+
,"..............."B
|
|
255
|
+
|
|
256
|
+
__END__
|
|
257
|
+
|
|
258
|
+
|
|
182
259
|
data = File.read('1.csv')
|
|
183
260
|
|
|
184
|
-
Censive.writer('out.csv', sep: ',', quote: "'"
|
|
261
|
+
Censive.writer('out.csv', sep: ',', quote: "'") do |out|
|
|
185
262
|
Censive.new(data).each do |row|
|
|
186
263
|
out << row
|
|
187
264
|
end
|
|
188
265
|
end
|
|
189
266
|
|
|
190
|
-
#
|
|
191
267
|
# ARGV << "z.csv" if ARGV.empty?
|
|
192
268
|
#
|
|
193
269
|
# case 1
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: censive
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: '0.
|
|
4
|
+
version: '0.7'
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Steve Shreeve
|
|
@@ -10,7 +10,7 @@ bindir: bin
|
|
|
10
10
|
cert_chain: []
|
|
11
11
|
date: 2023-01-31 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
|
-
description: A quick and lightweight
|
|
13
|
+
description: A quick and lightweight CSV handling library for Ruby
|
|
14
14
|
email: steve.shreeve@gmail.com
|
|
15
15
|
executables: []
|
|
16
16
|
extensions: []
|
|
@@ -43,5 +43,5 @@ requirements: []
|
|
|
43
43
|
rubygems_version: 3.4.5
|
|
44
44
|
signing_key:
|
|
45
45
|
specification_version: 4
|
|
46
|
-
summary: A quick and lightweight
|
|
46
|
+
summary: A quick and lightweight CSV handling library for Ruby
|
|
47
47
|
test_files: []
|