censive 0.5 → 0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/censive.gemspec +4 -4
  4. data/lib/censive.rb +88 -12
  5. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3de2789bb1678210e42d6fb3c4987b91c70b18765cc417c336db6f6f1eb969a7
4
- data.tar.gz: 6ffdf0f664df8978b9b44cacbdc81a3c664c95d48a1d0c940dd8634fd51fc3e2
3
+ metadata.gz: 8a757fa8bbc5ddf364889e4b7feca2001f3784e8d0b2ff70a1b0349691a34aae
4
+ data.tar.gz: 68dced562eb0dc9b7ad300447091ceb74c04a55201e88cc9fffbe1ba3bbc534d
5
5
  SHA512:
6
- metadata.gz: d64e1980813c2e7231637a3f7386c583892f6deee14991412075e559ac0d97118900d6f42d9722dc36db2c9195d909fb8dbd3dc1f9914ab1b493f4a46f9eacd0
7
- data.tar.gz: 0bb17da4415090f1d6235d594f03a604be3b594142e9add762eeaa1d25048942e64d7d99f2e1310566c03a7011e2457f87ce137f293c414d175a47bc4f6d5a8f
6
+ metadata.gz: c48d7e2bd3d1a7baa5fb2fae7b0553de665737849e9a50721f704a1a1f67c758c545dfe53d21f32ce386b20ea21f04c67ee8d765bf20653774b9475ebb60711f
7
+ data.tar.gz: 411d59006ebcb6a07161186b56f73a8dcc73beeaecbe14e786ad237935c62fd6ef0631483c8f297399098b0dea2387863f7be8c878568e0558804e5bd20b55ee
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # censive
2
2
 
3
- A quick and lightweight CVS handling library for Ruby
3
+ A quick and lightweight CSV handling library for Ruby
4
4
 
5
5
  ## Writing CSV
6
6
 
data/censive.gemspec CHANGED
@@ -2,13 +2,13 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "censive"
5
- s.version = "0.5"
5
+ s.version = "0.7"
6
6
  s.author = "Steve Shreeve"
7
7
  s.email = "steve.shreeve@gmail.com"
8
- s.summary = "A quick and lightweight CVS handling library for Ruby"
9
- s.description = "A quick and lightweight CVS handling library for Ruby"
8
+ s.summary = "A quick and lightweight CSV handling library for Ruby"
9
+ s.description = "A quick and lightweight CSV handling library for Ruby"
10
10
  s.homepage = "https://github.com/shreeve/censive"
11
11
  s.license = "MIT"
12
12
  s.files = `git ls-files`.split("\n") - %w[.gitignore]
13
- s.executables = `cd bin && git ls-files .`.split("\n")
13
+ s.executables = `(cd bin 2>&1) > /dev/null && git ls-files .`.split("\n")
14
14
  end
data/lib/censive.rb CHANGED
@@ -33,17 +33,34 @@ class Censive < StringScanner
33
33
  end
34
34
  end
35
35
 
36
- def initialize(str=nil, sep: ',', quote: '"', out: nil, mode: :compact)
36
+ def initialize(str=nil,
37
+ sep: ',' , # column separator character
38
+ quote: '"' , # quote character
39
+
40
+ drop: false , # enable to drop trailing separators
41
+ eol: "\n" , # desired line endings for exports
42
+ mode: :compact, # export mode: compact or full
43
+ out: nil , # output IO/file
44
+ relax: false , # relax parsing of quotes
45
+
46
+ **opts # grab bag
47
+ )
37
48
  super(str || '')
38
49
  reset
50
+
39
51
  @sep = sep .freeze
40
52
  @quote = quote.freeze
53
+
54
+ @drop = drop
55
+ @eol = eol.freeze
56
+ @mode = mode
57
+ @out = out
58
+ @relax = relax
59
+
41
60
  @es = "" .freeze
42
61
  @cr = "\r" .freeze
43
62
  @lf = "\n" .freeze
44
- @out = out
45
63
  @esc = (@quote * 2).freeze
46
- @mode = mode
47
64
  end
48
65
 
49
66
  def reset(str=nil)
@@ -81,7 +98,12 @@ class Censive < StringScanner
81
98
  when @sep then @flag = @es; next_char; break
82
99
  when @quote then match << @quote
83
100
  when @cr,@lf,nil then break
84
- else bomb "unexpected character after quote"
101
+ else
102
+ if @relax
103
+ match << @quote + @char
104
+ else
105
+ bomb "invalid character after quote"
106
+ end
85
107
  end
86
108
  end
87
109
  match
@@ -99,7 +121,7 @@ class Censive < StringScanner
99
121
  end
100
122
 
101
123
  def bomb(msg)
102
- abort "#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
124
+ abort "\n#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
103
125
  end
104
126
 
105
127
  # ==[ Parser ]==
@@ -124,7 +146,7 @@ class Censive < StringScanner
124
146
 
125
147
  # ==[ Helpers ]==
126
148
 
127
- # grok returns 2 (seps and quotes), 1 (seps only), 0 (neither)
149
+ # grok returns: 2 for seps and quotes, 1 for seps only, and 0 for neither
128
150
  def grok(str)
129
151
  if pos = str.index(/(#{@quote})|#{@sep}/o)
130
152
  $1 ? 2 : str.index(/#{@quote}/o, pos) ? 2 : 1
@@ -156,11 +178,11 @@ class Censive < StringScanner
156
178
  row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
157
179
  end.join(s)
158
180
 
159
- #!# TODO: allow an option to remove trailing seps in the output
160
- # out.gsub!(/#{s}+\z/,'')
181
+ # drop trailing seps, if specified
182
+ out.gsub!(/#{s}+\z/,'') if @drop
161
183
 
162
- #!# TODO: allow these line endings to be configurable
163
- @out << out + @lf
184
+ # write output, using desired line endings
185
+ @out << out + @eol
164
186
  end
165
187
 
166
188
  def each
@@ -177,17 +199,71 @@ class Censive < StringScanner
177
199
  end
178
200
  end
179
201
 
202
+ __END__
203
+
180
204
  # ==[ Playground... ]==
181
205
 
206
+ STDOUT.sync = true
207
+
208
+ require 'fileutils'
209
+
210
+ ARGV << "101.csv"
211
+
212
+ rand = `LC_ALL=C tr -dc a-zA-Z0-9 < /dev/random | head -c12`
213
+
214
+ rows = []
215
+ cols = []
216
+ coun = 0
217
+ full = 0
218
+
219
+ ARGV.each do |path|
220
+ File.file?(path) or next
221
+
222
+ print "Processing #{path.inspect}"
223
+
224
+ rows.clear
225
+ cols.clear
226
+ seen = 0
227
+ coun += 1
228
+
229
+ dest = "#{path}-#{rand}"
230
+
231
+ begin
232
+ Censive.writer(dest) do |file|
233
+ Censive.new(File.read(path), relax: true).each do |cols|
234
+ cols.each {|cell| cell && cell.size >= 3 && cell.sub!(/\A="/, '') && cell.sub!(/"\z/, '') }
235
+ file << cols
236
+ seen += 1
237
+ print "." if (seen % 1e5) == 0
238
+ end
239
+ end
240
+ FileUtils.mv(dest, path)
241
+ full += (seen - 1)
242
+ puts " (#{seen - 1} rows of data)"
243
+ rescue
244
+ puts " - unable to process (#{$!})"
245
+ FileUtils.rm_f(dest)
246
+ end
247
+ end
248
+
249
+ puts "Processed #{coun} files with a total of #{full} rows of data" if coun > 1
250
+
251
+ __END__
252
+ ,"CHUI, LOK HANG "BENNY", => ,"""CHUI, LOK HANG ""BENNY""",
253
+
254
+ ,"..............."B
255
+
256
+ __END__
257
+
258
+
182
259
  data = File.read('1.csv')
183
260
 
184
- Censive.writer('out.csv', sep: ',', quote: "'", mode: :compact) do |out|
261
+ Censive.writer('out.csv', sep: ',', quote: "'") do |out|
185
262
  Censive.new(data).each do |row|
186
263
  out << row
187
264
  end
188
265
  end
189
266
 
190
- #
191
267
  # ARGV << "z.csv" if ARGV.empty?
192
268
  #
193
269
  # case 1
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: censive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.5'
4
+ version: '0.7'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steve Shreeve
@@ -10,7 +10,7 @@ bindir: bin
10
10
  cert_chain: []
11
11
  date: 2023-01-31 00:00:00.000000000 Z
12
12
  dependencies: []
13
- description: A quick and lightweight CVS handling library for Ruby
13
+ description: A quick and lightweight CSV handling library for Ruby
14
14
  email: steve.shreeve@gmail.com
15
15
  executables: []
16
16
  extensions: []
@@ -43,5 +43,5 @@ requirements: []
43
43
  rubygems_version: 3.4.5
44
44
  signing_key:
45
45
  specification_version: 4
46
- summary: A quick and lightweight CVS handling library for Ruby
46
+ summary: A quick and lightweight CSV handling library for Ruby
47
47
  test_files: []