censive 0.7 → 0.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/censive.gemspec +1 -1
  3. data/lib/censive.rb +57 -108
  4. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8a757fa8bbc5ddf364889e4b7feca2001f3784e8d0b2ff70a1b0349691a34aae
4
- data.tar.gz: 68dced562eb0dc9b7ad300447091ceb74c04a55201e88cc9fffbe1ba3bbc534d
3
+ metadata.gz: 6419efcdc9274ea8bcf7b8527001e33f8bdfea348dfd911686cab36984d507da
4
+ data.tar.gz: 3b59aead54517fd64d7ece3eaa6f459e301e1e48f1ae34772a7128c61fb739f2
5
5
  SHA512:
6
- metadata.gz: c48d7e2bd3d1a7baa5fb2fae7b0553de665737849e9a50721f704a1a1f67c758c545dfe53d21f32ce386b20ea21f04c67ee8d765bf20653774b9475ebb60711f
7
- data.tar.gz: 411d59006ebcb6a07161186b56f73a8dcc73beeaecbe14e786ad237935c62fd6ef0631483c8f297399098b0dea2387863f7be8c878568e0558804e5bd20b55ee
6
+ metadata.gz: 7910c09e76a81ed27870ea52fb6c8aea0316ed213c53a026d98adc64f93349477e6acab0a93b88c6f184ce1d317634ecdca9290d50bff9b117b98bedd3ac7b86
7
+ data.tar.gz: 358ab985947d486b5f486b1f7e9c1f591e3b8e906b9eab59a4ed151e5f5d9652c211f2d2a4ee36f0543227e2ae5e33ba57f1e4c178f6f7e72e05c14d7b46895f
data/censive.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "censive"
5
- s.version = "0.7"
5
+ s.version = "0.8"
6
6
  s.author = "Steve Shreeve"
7
7
  s.email = "steve.shreeve@gmail.com"
8
8
  s.summary = "A quick and lightweight CSV handling library for Ruby"
data/lib/censive.rb CHANGED
@@ -5,6 +5,8 @@
5
5
  #
6
6
  # Author: Steve Shreeve (steve.shreeve@gmail.com)
7
7
  # Date: Jan 30, 2023
8
+ #
9
+ # Thanks: Crystal's CSV library, see https://crystal-lang.org/api/1.7.2/CSV.html
8
10
  # ==============================================================================
9
11
  # The goals are:
10
12
  #
@@ -15,10 +17,7 @@
15
17
  #
16
18
  # 1. Option to support IO streaming
17
19
  # 2. Option to strip whitespace
18
- # 3. Option to change output line endings
19
- # 4. Option to force quotes in output
20
- # 5. Option to allow reading excel CSV (="Text" for cells)
21
- # 6. Confirm file encodings such as UTF-8, UTF-16, etc.
20
+ # 3. Confirm file encodings such as UTF-8, UTF-16, etc.
22
21
  #
23
22
  # NOTE: Only getch and scan_until advance strscan's position
24
23
  # ==============================================================================
@@ -39,6 +38,7 @@ class Censive < StringScanner
39
38
 
40
39
  drop: false , # enable to drop trailing separators
41
40
  eol: "\n" , # desired line endings for exports
41
+ excel: false , # allow ,="0123" style columns
42
42
  mode: :compact, # export mode: compact or full
43
43
  out: nil , # output IO/file
44
44
  relax: false , # relax parsing of quotes
@@ -48,56 +48,61 @@ class Censive < StringScanner
48
48
  super(str || '')
49
49
  reset
50
50
 
51
- @sep = sep .freeze
52
- @quote = quote.freeze
51
+ @sep = sep .freeze
52
+ @quote = quote.freeze
53
+
54
+ @drop = drop
55
+ @eol = eol.freeze
56
+ @mode = mode
57
+ @out = out
58
+ @relax = relax
53
59
 
54
- @drop = drop
55
- @eol = eol.freeze
56
- @mode = mode
57
- @out = out
58
- @relax = relax
60
+ @es = "" .freeze
61
+ @cr = "\r" .freeze
62
+ @lf = "\n" .freeze
63
+ @eq = "=" .freeze
64
+ @esc = (@quote * 2).freeze
59
65
 
60
- @es = "" .freeze
61
- @cr = "\r" .freeze
62
- @lf = "\n" .freeze
63
- @esc = (@quote * 2).freeze
66
+ @tokens = [@sep,@quote,@cr,@lf,@es,nil]
67
+ @tokens << @eq if excel # See http://bit.ly/3Y7jIvc
64
68
  end
65
69
 
66
70
  def reset(str=nil)
67
71
  self.string = str if str
68
72
  super()
69
- @char = string[pos]
70
- @flag = nil
73
+ @char = peek(1)
74
+ @flag = nil
71
75
 
72
- @rows = nil
73
- @cols = @cells = 0
76
+ @rows = nil
77
+ @cols = @cells = 0
74
78
  end
75
79
 
76
80
  # ==[ Lexer ]==
77
81
 
78
82
  def next_char
79
83
  getch
80
- @char = string[pos]
84
+ @char = peek(1)
81
85
  end
82
86
 
83
87
  def next_token
84
88
  case @flag
85
- when @es then @flag = nil; [@cr,@lf,nil].include?(@char) and return @es
89
+ when @es then @flag = nil; [@cr,@lf,@es,nil].include?(@char) and return @es
86
90
  when @cr then @flag = nil; next_char == @lf and next_char
87
91
  when @lf then @flag = nil; next_char
88
92
  end if @flag
89
93
 
90
- if [@sep,@quote,@cr,@lf,nil].include?(@char)
94
+ if @tokens.include?(@char)
91
95
  case @char
92
- when @quote # consume_quoted_cell
96
+ when @quote, @eq # consume quoted cell
97
+ @char == @eq and next_char # excel mode: allows ,="012",
93
98
  match = ""
94
99
  while true
95
100
  getch # consume the quote (optimized by not calling next_char)
96
101
  match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
97
102
  case next_char
98
- when @sep then @flag = @es; next_char; break
99
- when @quote then match << @quote
100
- when @cr,@lf,nil then break
103
+ when @sep then @flag = @es; next_char; break
104
+ when @quote then match << @quote
105
+ when @cr,@lf,@es,nil then break
101
106
  else
102
107
  if @relax
103
108
  match << @quote + @char
@@ -107,14 +112,14 @@ class Censive < StringScanner
107
112
  end
108
113
  end
109
114
  match
110
- when @sep then @flag = @es; next_char; @es
111
- when @cr then @flag = @cr; nil
112
- when @lf then @flag = @lf; nil
113
- when nil then nil
115
+ when @sep then @flag = @es; next_char; @es
116
+ when @cr then @flag = @cr; nil
117
+ when @lf then @flag = @lf; nil
118
+ when @es,nil then nil
114
119
  end
115
- else # consume_unquoted_cell
120
+ else # consume unquoted cell
116
121
  match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
117
- @char = string[pos]
122
+ @char = peek(1)
118
123
  @char == @sep and @flag = @es and next_char
119
124
  match
120
125
  end
@@ -158,6 +163,9 @@ class Censive < StringScanner
158
163
  def <<(row)
159
164
  @out or return super
160
165
 
166
+ # drop trailing seps, if specified
167
+ row.pop while row.last.empty? if @drop
168
+
161
169
  # most compact export format
162
170
  s,q = @sep, @quote
163
171
  out = case @mode
@@ -178,9 +186,6 @@ class Censive < StringScanner
178
186
  row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
179
187
  end.join(s)
180
188
 
181
- # drop trailing seps, if specified
182
- out.gsub!(/#{s}+\z/,'') if @drop
183
-
184
189
  # write output, using desired line endings
185
190
  @out << out + @eol
186
191
  end
@@ -203,81 +208,25 @@ __END__
203
208
 
204
209
  # ==[ Playground... ]==
205
210
 
206
- STDOUT.sync = true
207
-
208
- require 'fileutils'
209
-
210
- ARGV << "101.csv"
211
-
212
- rand = `LC_ALL=C tr -dc a-zA-Z0-9 < /dev/random | head -c12`
213
-
214
- rows = []
215
- cols = []
216
- coun = 0
217
- full = 0
218
-
219
- ARGV.each do |path|
220
- File.file?(path) or next
221
-
222
- print "Processing #{path.inspect}"
223
-
224
- rows.clear
225
- cols.clear
226
- seen = 0
227
- coun += 1
228
-
229
- dest = "#{path}-#{rand}"
230
-
231
- begin
232
- Censive.writer(dest) do |file|
233
- Censive.new(File.read(path), relax: true).each do |cols|
234
- cols.each {|cell| cell && cell.size >= 3 && cell.sub!(/\A="/, '') && cell.sub!(/"\z/, '') }
235
- file << cols
236
- seen += 1
237
- print "." if (seen % 1e5) == 0
238
- end
239
- end
240
- FileUtils.mv(dest, path)
241
- full += (seen - 1)
242
- puts " (#{seen - 1} rows of data)"
243
- rescue
244
- puts " - unable to process (#{$!})"
245
- FileUtils.rm_f(dest)
246
- end
247
- end
248
-
249
- puts "Processed #{coun} files with a total of #{full} rows of data" if coun > 1
250
-
251
- __END__
252
- ,"CHUI, LOK HANG "BENNY", => ,"""CHUI, LOK HANG ""BENNY""",
211
+ # STDOUT.sync = true
212
+ #
213
+ # data = File.read('1.csv')
214
+ #
215
+ # Censive.writer('out.csv') do |out|
216
+ # Censive.new(data, relax: true, excel: true).each do |row|
217
+ # out << row
218
+ # end
219
+ # end
220
+ #
221
+ # __END__
253
222
 
254
- ,"..............."B
223
+ ARGV << "z.csv" if ARGV.empty?
255
224
 
256
- __END__
225
+ path = ARGV.first
226
+ data = File.read(path)
257
227
 
228
+ csv = Censive.new(data)
258
229
 
259
- data = File.read('1.csv')
230
+ data.size > 1e6 ? csv.parse : csv.parse.each {|cols| p cols }
260
231
 
261
- Censive.writer('out.csv', sep: ',', quote: "'") do |out|
262
- Censive.new(data).each do |row|
263
- out << row
264
- end
265
- end
266
-
267
- # ARGV << "z.csv" if ARGV.empty?
268
- #
269
- # case 1
270
- # when 1
271
- # path = ARGV.first
272
- # data = File.read(path)
273
- # when 2
274
- # data = DATA.gets("\n\n").rstrip
275
- # end
276
- #
277
- # STDOUT.sync = true
278
- #
279
- # csv = Censive.new(data)
280
- #
281
- # data.size > 1e6 ? csv.parse : csv.parse.each {|cols| p cols }
282
- #
283
- # csv.stats
232
+ csv.stats
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: censive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.7'
4
+ version: '0.8'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steve Shreeve