censive 0.5 → 0.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/censive.gemspec +4 -4
  4. data/lib/censive.rb +88 -12
  5. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3de2789bb1678210e42d6fb3c4987b91c70b18765cc417c336db6f6f1eb969a7
4
- data.tar.gz: 6ffdf0f664df8978b9b44cacbdc81a3c664c95d48a1d0c940dd8634fd51fc3e2
3
+ metadata.gz: 8a757fa8bbc5ddf364889e4b7feca2001f3784e8d0b2ff70a1b0349691a34aae
4
+ data.tar.gz: 68dced562eb0dc9b7ad300447091ceb74c04a55201e88cc9fffbe1ba3bbc534d
5
5
  SHA512:
6
- metadata.gz: d64e1980813c2e7231637a3f7386c583892f6deee14991412075e559ac0d97118900d6f42d9722dc36db2c9195d909fb8dbd3dc1f9914ab1b493f4a46f9eacd0
7
- data.tar.gz: 0bb17da4415090f1d6235d594f03a604be3b594142e9add762eeaa1d25048942e64d7d99f2e1310566c03a7011e2457f87ce137f293c414d175a47bc4f6d5a8f
6
+ metadata.gz: c48d7e2bd3d1a7baa5fb2fae7b0553de665737849e9a50721f704a1a1f67c758c545dfe53d21f32ce386b20ea21f04c67ee8d765bf20653774b9475ebb60711f
7
+ data.tar.gz: 411d59006ebcb6a07161186b56f73a8dcc73beeaecbe14e786ad237935c62fd6ef0631483c8f297399098b0dea2387863f7be8c878568e0558804e5bd20b55ee
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # censive
2
2
 
3
- A quick and lightweight CVS handling library for Ruby
3
+ A quick and lightweight CSV handling library for Ruby
4
4
 
5
5
  ## Writing CSV
6
6
 
data/censive.gemspec CHANGED
@@ -2,13 +2,13 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "censive"
5
- s.version = "0.5"
5
+ s.version = "0.7"
6
6
  s.author = "Steve Shreeve"
7
7
  s.email = "steve.shreeve@gmail.com"
8
- s.summary = "A quick and lightweight CVS handling library for Ruby"
9
- s.description = "A quick and lightweight CVS handling library for Ruby"
8
+ s.summary = "A quick and lightweight CSV handling library for Ruby"
9
+ s.description = "A quick and lightweight CSV handling library for Ruby"
10
10
  s.homepage = "https://github.com/shreeve/censive"
11
11
  s.license = "MIT"
12
12
  s.files = `git ls-files`.split("\n") - %w[.gitignore]
13
- s.executables = `cd bin && git ls-files .`.split("\n")
13
+ s.executables = `(cd bin 2>&1) > /dev/null && git ls-files .`.split("\n")
14
14
  end
data/lib/censive.rb CHANGED
@@ -33,17 +33,34 @@ class Censive < StringScanner
33
33
  end
34
34
  end
35
35
 
36
- def initialize(str=nil, sep: ',', quote: '"', out: nil, mode: :compact)
36
+ def initialize(str=nil,
37
+ sep: ',' , # column separator character
38
+ quote: '"' , # quote character
39
+
40
+ drop: false , # enable to drop trailing separators
41
+ eol: "\n" , # desired line endings for exports
42
+ mode: :compact, # export mode: compact or full
43
+ out: nil , # output IO/file
44
+ relax: false , # relax parsing of quotes
45
+
46
+ **opts # grab bag
47
+ )
37
48
  super(str || '')
38
49
  reset
50
+
39
51
  @sep = sep .freeze
40
52
  @quote = quote.freeze
53
+
54
+ @drop = drop
55
+ @eol = eol.freeze
56
+ @mode = mode
57
+ @out = out
58
+ @relax = relax
59
+
41
60
  @es = "" .freeze
42
61
  @cr = "\r" .freeze
43
62
  @lf = "\n" .freeze
44
- @out = out
45
63
  @esc = (@quote * 2).freeze
46
- @mode = mode
47
64
  end
48
65
 
49
66
  def reset(str=nil)
@@ -81,7 +98,12 @@ class Censive < StringScanner
81
98
  when @sep then @flag = @es; next_char; break
82
99
  when @quote then match << @quote
83
100
  when @cr,@lf,nil then break
84
- else bomb "unexpected character after quote"
101
+ else
102
+ if @relax
103
+ match << @quote + @char
104
+ else
105
+ bomb "invalid character after quote"
106
+ end
85
107
  end
86
108
  end
87
109
  match
@@ -99,7 +121,7 @@ class Censive < StringScanner
99
121
  end
100
122
 
101
123
  def bomb(msg)
102
- abort "#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
124
+ abort "\n#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
103
125
  end
104
126
 
105
127
  # ==[ Parser ]==
@@ -124,7 +146,7 @@ class Censive < StringScanner
124
146
 
125
147
  # ==[ Helpers ]==
126
148
 
127
- # grok returns 2 (seps and quotes), 1 (seps only), 0 (neither)
149
+ # grok returns: 2 for seps and quotes, 1 for seps only, and 0 for neither
128
150
  def grok(str)
129
151
  if pos = str.index(/(#{@quote})|#{@sep}/o)
130
152
  $1 ? 2 : str.index(/#{@quote}/o, pos) ? 2 : 1
@@ -156,11 +178,11 @@ class Censive < StringScanner
156
178
  row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
157
179
  end.join(s)
158
180
 
159
- #!# TODO: allow an option to remove trailing seps in the output
160
- # out.gsub!(/#{s}+\z/,'')
181
+ # drop trailing seps, if specified
182
+ out.gsub!(/#{s}+\z/,'') if @drop
161
183
 
162
- #!# TODO: allow these line endings to be configurable
163
- @out << out + @lf
184
+ # write output, using desired line endings
185
+ @out << out + @eol
164
186
  end
165
187
 
166
188
  def each
@@ -177,17 +199,71 @@ class Censive < StringScanner
177
199
  end
178
200
  end
179
201
 
202
+ __END__
203
+
180
204
  # ==[ Playground... ]==
181
205
 
206
+ STDOUT.sync = true
207
+
208
+ require 'fileutils'
209
+
210
+ ARGV << "101.csv"
211
+
212
+ rand = `LC_ALL=C tr -dc a-zA-Z0-9 < /dev/random | head -c12`
213
+
214
+ rows = []
215
+ cols = []
216
+ coun = 0
217
+ full = 0
218
+
219
+ ARGV.each do |path|
220
+ File.file?(path) or next
221
+
222
+ print "Processing #{path.inspect}"
223
+
224
+ rows.clear
225
+ cols.clear
226
+ seen = 0
227
+ coun += 1
228
+
229
+ dest = "#{path}-#{rand}"
230
+
231
+ begin
232
+ Censive.writer(dest) do |file|
233
+ Censive.new(File.read(path), relax: true).each do |cols|
234
+ cols.each {|cell| cell && cell.size >= 3 && cell.sub!(/\A="/, '') && cell.sub!(/"\z/, '') }
235
+ file << cols
236
+ seen += 1
237
+ print "." if (seen % 1e5) == 0
238
+ end
239
+ end
240
+ FileUtils.mv(dest, path)
241
+ full += (seen - 1)
242
+ puts " (#{seen - 1} rows of data)"
243
+ rescue
244
+ puts " - unable to process (#{$!})"
245
+ FileUtils.rm_f(dest)
246
+ end
247
+ end
248
+
249
+ puts "Processed #{coun} files with a total of #{full} rows of data" if coun > 1
250
+
251
+ __END__
252
+ ,"CHUI, LOK HANG "BENNY", => ,"""CHUI, LOK HANG ""BENNY""",
253
+
254
+ ,"..............."B
255
+
256
+ __END__
257
+
258
+
182
259
  data = File.read('1.csv')
183
260
 
184
- Censive.writer('out.csv', sep: ',', quote: "'", mode: :compact) do |out|
261
+ Censive.writer('out.csv', sep: ',', quote: "'") do |out|
185
262
  Censive.new(data).each do |row|
186
263
  out << row
187
264
  end
188
265
  end
189
266
 
190
- #
191
267
  # ARGV << "z.csv" if ARGV.empty?
192
268
  #
193
269
  # case 1
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: censive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.5'
4
+ version: '0.7'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steve Shreeve
@@ -10,7 +10,7 @@ bindir: bin
10
10
  cert_chain: []
11
11
  date: 2023-01-31 00:00:00.000000000 Z
12
12
  dependencies: []
13
- description: A quick and lightweight CVS handling library for Ruby
13
+ description: A quick and lightweight CSV handling library for Ruby
14
14
  email: steve.shreeve@gmail.com
15
15
  executables: []
16
16
  extensions: []
@@ -43,5 +43,5 @@ requirements: []
43
43
  rubygems_version: 3.4.5
44
44
  signing_key:
45
45
  specification_version: 4
46
- summary: A quick and lightweight CVS handling library for Ruby
46
+ summary: A quick and lightweight CSV handling library for Ruby
47
47
  test_files: []