censive 0.7 → 0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/censive.gemspec +1 -1
  3. data/lib/censive.rb +57 -108
  4. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8a757fa8bbc5ddf364889e4b7feca2001f3784e8d0b2ff70a1b0349691a34aae
4
- data.tar.gz: 68dced562eb0dc9b7ad300447091ceb74c04a55201e88cc9fffbe1ba3bbc534d
3
+ metadata.gz: 6419efcdc9274ea8bcf7b8527001e33f8bdfea348dfd911686cab36984d507da
4
+ data.tar.gz: 3b59aead54517fd64d7ece3eaa6f459e301e1e48f1ae34772a7128c61fb739f2
5
5
  SHA512:
6
- metadata.gz: c48d7e2bd3d1a7baa5fb2fae7b0553de665737849e9a50721f704a1a1f67c758c545dfe53d21f32ce386b20ea21f04c67ee8d765bf20653774b9475ebb60711f
7
- data.tar.gz: 411d59006ebcb6a07161186b56f73a8dcc73beeaecbe14e786ad237935c62fd6ef0631483c8f297399098b0dea2387863f7be8c878568e0558804e5bd20b55ee
6
+ metadata.gz: 7910c09e76a81ed27870ea52fb6c8aea0316ed213c53a026d98adc64f93349477e6acab0a93b88c6f184ce1d317634ecdca9290d50bff9b117b98bedd3ac7b86
7
+ data.tar.gz: 358ab985947d486b5f486b1f7e9c1f591e3b8e906b9eab59a4ed151e5f5d9652c211f2d2a4ee36f0543227e2ae5e33ba57f1e4c178f6f7e72e05c14d7b46895f
data/censive.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "censive"
5
- s.version = "0.7"
5
+ s.version = "0.8"
6
6
  s.author = "Steve Shreeve"
7
7
  s.email = "steve.shreeve@gmail.com"
8
8
  s.summary = "A quick and lightweight CSV handling library for Ruby"
data/lib/censive.rb CHANGED
@@ -5,6 +5,8 @@
5
5
  #
6
6
  # Author: Steve Shreeve (steve.shreeve@gmail.com)
7
7
  # Date: Jan 30, 2023
8
+ #
9
+ # Thanks: Crystal's CSV library, see https://crystal-lang.org/api/1.7.2/CSV.html
8
10
  # ==============================================================================
9
11
  # The goals are:
10
12
  #
@@ -15,10 +17,7 @@
15
17
  #
16
18
  # 1. Option to support IO streaming
17
19
  # 2. Option to strip whitespace
18
- # 3. Option to change output line endings
19
- # 4. Option to force quotes in output
20
- # 5. Option to allow reading excel CSV (="Text" for cells)
21
- # 6. Confirm file encodings such as UTF-8, UTF-16, etc.
20
+ # 3. Confirm file encodings such as UTF-8, UTF-16, etc.
22
21
  #
23
22
  # NOTE: Only getch and scan_until advance strscan's position
24
23
  # ==============================================================================
@@ -39,6 +38,7 @@ class Censive < StringScanner
39
38
 
40
39
  drop: false , # enable to drop trailing separators
41
40
  eol: "\n" , # desired line endings for exports
41
+ excel: false , # allow ,="0123" style columns
42
42
  mode: :compact, # export mode: compact or full
43
43
  out: nil , # output IO/file
44
44
  relax: false , # relax parsing of quotes
@@ -48,56 +48,61 @@ class Censive < StringScanner
48
48
  super(str || '')
49
49
  reset
50
50
 
51
- @sep = sep .freeze
52
- @quote = quote.freeze
51
+ @sep = sep .freeze
52
+ @quote = quote.freeze
53
+
54
+ @drop = drop
55
+ @eol = eol.freeze
56
+ @mode = mode
57
+ @out = out
58
+ @relax = relax
53
59
 
54
- @drop = drop
55
- @eol = eol.freeze
56
- @mode = mode
57
- @out = out
58
- @relax = relax
60
+ @es = "" .freeze
61
+ @cr = "\r" .freeze
62
+ @lf = "\n" .freeze
63
+ @eq = "=" .freeze
64
+ @esc = (@quote * 2).freeze
59
65
 
60
- @es = "" .freeze
61
- @cr = "\r" .freeze
62
- @lf = "\n" .freeze
63
- @esc = (@quote * 2).freeze
66
+ @tokens = [@sep,@quote,@cr,@lf,@es,nil]
67
+ @tokens << @eq if excel # See http://bit.ly/3Y7jIvc
64
68
  end
65
69
 
66
70
  def reset(str=nil)
67
71
  self.string = str if str
68
72
  super()
69
- @char = string[pos]
70
- @flag = nil
73
+ @char = peek(1)
74
+ @flag = nil
71
75
 
72
- @rows = nil
73
- @cols = @cells = 0
76
+ @rows = nil
77
+ @cols = @cells = 0
74
78
  end
75
79
 
76
80
  # ==[ Lexer ]==
77
81
 
78
82
  def next_char
79
83
  getch
80
- @char = string[pos]
84
+ @char = peek(1)
81
85
  end
82
86
 
83
87
  def next_token
84
88
  case @flag
85
- when @es then @flag = nil; [@cr,@lf,nil].include?(@char) and return @es
89
+ when @es then @flag = nil; [@cr,@lf,@es,nil].include?(@char) and return @es
86
90
  when @cr then @flag = nil; next_char == @lf and next_char
87
91
  when @lf then @flag = nil; next_char
88
92
  end if @flag
89
93
 
90
- if [@sep,@quote,@cr,@lf,nil].include?(@char)
94
+ if @tokens.include?(@char)
91
95
  case @char
92
- when @quote # consume_quoted_cell
96
+ when @quote, @eq # consume quoted cell
97
+ @char == @eq and next_char # excel mode: allows ,="012",
93
98
  match = ""
94
99
  while true
95
100
  getch # consume the quote (optimized by not calling next_char)
96
101
  match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
97
102
  case next_char
98
- when @sep then @flag = @es; next_char; break
99
- when @quote then match << @quote
100
- when @cr,@lf,nil then break
103
+ when @sep then @flag = @es; next_char; break
104
+ when @quote then match << @quote
105
+ when @cr,@lf,@es,nil then break
101
106
  else
102
107
  if @relax
103
108
  match << @quote + @char
@@ -107,14 +112,14 @@ class Censive < StringScanner
107
112
  end
108
113
  end
109
114
  match
110
- when @sep then @flag = @es; next_char; @es
111
- when @cr then @flag = @cr; nil
112
- when @lf then @flag = @lf; nil
113
- when nil then nil
115
+ when @sep then @flag = @es; next_char; @es
116
+ when @cr then @flag = @cr; nil
117
+ when @lf then @flag = @lf; nil
118
+ when @es,nil then nil
114
119
  end
115
- else # consume_unquoted_cell
120
+ else # consume unquoted cell
116
121
  match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
117
- @char = string[pos]
122
+ @char = peek(1)
118
123
  @char == @sep and @flag = @es and next_char
119
124
  match
120
125
  end
@@ -158,6 +163,9 @@ class Censive < StringScanner
158
163
  def <<(row)
159
164
  @out or return super
160
165
 
166
+ # drop trailing seps, if specified
167
+ row.pop while row.last.empty? if @drop
168
+
161
169
  # most compact export format
162
170
  s,q = @sep, @quote
163
171
  out = case @mode
@@ -178,9 +186,6 @@ class Censive < StringScanner
178
186
  row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
179
187
  end.join(s)
180
188
 
181
- # drop trailing seps, if specified
182
- out.gsub!(/#{s}+\z/,'') if @drop
183
-
184
189
  # write output, using desired line endings
185
190
  @out << out + @eol
186
191
  end
@@ -203,81 +208,25 @@ __END__
203
208
 
204
209
  # ==[ Playground... ]==
205
210
 
206
- STDOUT.sync = true
207
-
208
- require 'fileutils'
209
-
210
- ARGV << "101.csv"
211
-
212
- rand = `LC_ALL=C tr -dc a-zA-Z0-9 < /dev/random | head -c12`
213
-
214
- rows = []
215
- cols = []
216
- coun = 0
217
- full = 0
218
-
219
- ARGV.each do |path|
220
- File.file?(path) or next
221
-
222
- print "Processing #{path.inspect}"
223
-
224
- rows.clear
225
- cols.clear
226
- seen = 0
227
- coun += 1
228
-
229
- dest = "#{path}-#{rand}"
230
-
231
- begin
232
- Censive.writer(dest) do |file|
233
- Censive.new(File.read(path), relax: true).each do |cols|
234
- cols.each {|cell| cell && cell.size >= 3 && cell.sub!(/\A="/, '') && cell.sub!(/"\z/, '') }
235
- file << cols
236
- seen += 1
237
- print "." if (seen % 1e5) == 0
238
- end
239
- end
240
- FileUtils.mv(dest, path)
241
- full += (seen - 1)
242
- puts " (#{seen - 1} rows of data)"
243
- rescue
244
- puts " - unable to process (#{$!})"
245
- FileUtils.rm_f(dest)
246
- end
247
- end
248
-
249
- puts "Processed #{coun} files with a total of #{full} rows of data" if coun > 1
250
-
251
- __END__
252
- ,"CHUI, LOK HANG "BENNY", => ,"""CHUI, LOK HANG ""BENNY""",
211
+ # STDOUT.sync = true
212
+ #
213
+ # data = File.read('1.csv')
214
+ #
215
+ # Censive.writer('out.csv') do |out|
216
+ # Censive.new(data, relax: true, excel: true).each do |row|
217
+ # out << row
218
+ # end
219
+ # end
220
+ #
221
+ # __END__
253
222
 
254
- ,"..............."B
223
+ ARGV << "z.csv" if ARGV.empty?
255
224
 
256
- __END__
225
+ path = ARGV.first
226
+ data = File.read(path)
257
227
 
228
+ csv = Censive.new(data)
258
229
 
259
- data = File.read('1.csv')
230
+ data.size > 1e6 ? csv.parse : csv.parse.each {|cols| p cols }
260
231
 
261
- Censive.writer('out.csv', sep: ',', quote: "'") do |out|
262
- Censive.new(data).each do |row|
263
- out << row
264
- end
265
- end
266
-
267
- # ARGV << "z.csv" if ARGV.empty?
268
- #
269
- # case 1
270
- # when 1
271
- # path = ARGV.first
272
- # data = File.read(path)
273
- # when 2
274
- # data = DATA.gets("\n\n").rstrip
275
- # end
276
- #
277
- # STDOUT.sync = true
278
- #
279
- # csv = Censive.new(data)
280
- #
281
- # data.size > 1e6 ? csv.parse : csv.parse.each {|cols| p cols }
282
- #
283
- # csv.stats
232
+ csv.stats
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: censive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.7'
4
+ version: '0.8'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steve Shreeve