censive 0.6 → 0.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/censive.gemspec +3 -3
  4. data/lib/censive.rb +77 -63
  5. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2fd3e9feff7e7610f300de0896977b7403ad161178c1f4cd28ef9c5248f8b9d1
4
- data.tar.gz: 8ac7307f60706052caafa0f1b6c1a28dd4385d33275dfa73af69e29b29471897
3
+ metadata.gz: 6419efcdc9274ea8bcf7b8527001e33f8bdfea348dfd911686cab36984d507da
4
+ data.tar.gz: 3b59aead54517fd64d7ece3eaa6f459e301e1e48f1ae34772a7128c61fb739f2
5
5
  SHA512:
6
- metadata.gz: ccc3de3e28e7fe7463acd86423f8391cad49238363ea4631727202639815fac10554a1bd388450d07422578ac04840d0b37c464dd56e3f600e7f4b732c6b4fee
7
- data.tar.gz: 5304136792442e52d339bd9c86db318104773665b041c5d62ed18e74b1d50dcb0fe1b135ae6e9accffccb883aef34b05bc1f6d0b1681826faf9334d18ed0624e
6
+ metadata.gz: 7910c09e76a81ed27870ea52fb6c8aea0316ed213c53a026d98adc64f93349477e6acab0a93b88c6f184ce1d317634ecdca9290d50bff9b117b98bedd3ac7b86
7
+ data.tar.gz: 358ab985947d486b5f486b1f7e9c1f591e3b8e906b9eab59a4ed151e5f5d9652c211f2d2a4ee36f0543227e2ae5e33ba57f1e4c178f6f7e72e05c14d7b46895f
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # censive
2
2
 
3
- A quick and lightweight CVS handling library for Ruby
3
+ A quick and lightweight CSV handling library for Ruby
4
4
 
5
5
  ## Writing CSV
6
6
 
data/censive.gemspec CHANGED
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "censive"
5
- s.version = "0.6"
5
+ s.version = "0.8"
6
6
  s.author = "Steve Shreeve"
7
7
  s.email = "steve.shreeve@gmail.com"
8
- s.summary = "A quick and lightweight CVS handling library for Ruby"
9
- s.description = "A quick and lightweight CVS handling library for Ruby"
8
+ s.summary = "A quick and lightweight CSV handling library for Ruby"
9
+ s.description = "A quick and lightweight CSV handling library for Ruby"
10
10
  s.homepage = "https://github.com/shreeve/censive"
11
11
  s.license = "MIT"
12
12
  s.files = `git ls-files`.split("\n") - %w[.gitignore]
data/lib/censive.rb CHANGED
@@ -5,6 +5,8 @@
5
5
  #
6
6
  # Author: Steve Shreeve (steve.shreeve@gmail.com)
7
7
  # Date: Jan 30, 2023
8
+ #
9
+ # Thanks: Crystal's CSV library, see https://crystal-lang.org/api/1.7.2/CSV.html
8
10
  # ==============================================================================
9
11
  # The goals are:
10
12
  #
@@ -15,10 +17,7 @@
15
17
  #
16
18
  # 1. Option to support IO streaming
17
19
  # 2. Option to strip whitespace
18
- # 3. Option to change output line endings
19
- # 4. Option to force quotes in output
20
- # 5. Option to allow reading excel CSV (="Text" for cells)
21
- # 6. Confirm file encodings such as UTF-8, UTF-16, etc.
20
+ # 3. Confirm file encodings such as UTF-8, UTF-16, etc.
22
21
  #
23
22
  # NOTE: Only getch and scan_until advance strscan's position
24
23
  # ==============================================================================
@@ -36,81 +35,98 @@ class Censive < StringScanner
36
35
  def initialize(str=nil,
37
36
  sep: ',' , # column separator character
38
37
  quote: '"' , # quote character
39
- out: nil , # output IO/file
40
- mode: :compact, # export mode: compact or full
38
+
41
39
  drop: false , # enable to drop trailing separators
42
- eol: "\n" # desired line endings for exports
40
+ eol: "\n" , # desired line endings for exports
41
+ excel: false , # allow ,="0123" style columns
42
+ mode: :compact, # export mode: compact or full
43
+ out: nil , # output IO/file
44
+ relax: false , # relax parsing of quotes
45
+
46
+ **opts # grab bag
43
47
  )
44
48
  super(str || '')
45
49
  reset
46
50
 
47
- @sep = sep .freeze
48
- @quote = quote.freeze
49
- @out = out
50
- @mode = mode
51
- @drop = drop
52
- @eol = eol.freeze
53
-
54
- @es = "" .freeze
55
- @cr = "\r" .freeze
56
- @lf = "\n" .freeze
57
- @esc = (@quote * 2).freeze
51
+ @sep = sep .freeze
52
+ @quote = quote.freeze
53
+
54
+ @drop = drop
55
+ @eol = eol.freeze
56
+ @mode = mode
57
+ @out = out
58
+ @relax = relax
59
+
60
+ @es = "" .freeze
61
+ @cr = "\r" .freeze
62
+ @lf = "\n" .freeze
63
+ @eq = "=" .freeze
64
+ @esc = (@quote * 2).freeze
65
+
66
+ @tokens = [@sep,@quote,@cr,@lf,@es,nil]
67
+ @tokens << @eq if excel # See http://bit.ly/3Y7jIvc
58
68
  end
59
69
 
60
70
  def reset(str=nil)
61
71
  self.string = str if str
62
72
  super()
63
- @char = string[pos]
64
- @flag = nil
73
+ @char = peek(1)
74
+ @flag = nil
65
75
 
66
- @rows = nil
67
- @cols = @cells = 0
76
+ @rows = nil
77
+ @cols = @cells = 0
68
78
  end
69
79
 
70
80
  # ==[ Lexer ]==
71
81
 
72
82
  def next_char
73
83
  getch
74
- @char = string[pos]
84
+ @char = peek(1)
75
85
  end
76
86
 
77
87
  def next_token
78
88
  case @flag
79
- when @es then @flag = nil; [@cr,@lf,nil].include?(@char) and return @es
89
+ when @es then @flag = nil; [@cr,@lf,@es,nil].include?(@char) and return @es
80
90
  when @cr then @flag = nil; next_char == @lf and next_char
81
91
  when @lf then @flag = nil; next_char
82
92
  end if @flag
83
93
 
84
- if [@sep,@quote,@cr,@lf,nil].include?(@char)
94
+ if @tokens.include?(@char)
85
95
  case @char
86
- when @quote # consume_quoted_cell
96
+ when @quote, @eq # consume quoted cell
97
+ @char == @eq and next_char # excel mode: allows ,="012",
87
98
  match = ""
88
99
  while true
89
100
  getch # consume the quote (optimized by not calling next_char)
90
101
  match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
91
102
  case next_char
92
- when @sep then @flag = @es; next_char; break
93
- when @quote then match << @quote
94
- when @cr,@lf,nil then break
95
- else bomb "unexpected character after quote"
103
+ when @sep then @flag = @es; next_char; break
104
+ when @quote then match << @quote
105
+ when @cr,@lf,@es,nil then break
106
+ else
107
+ if @relax
108
+ match << @quote + @char
109
+ else
110
+ bomb "invalid character after quote"
111
+ end
96
112
  end
97
113
  end
98
114
  match
99
- when @sep then @flag = @es; next_char; @es
100
- when @cr then @flag = @cr; nil
101
- when @lf then @flag = @lf; nil
102
- when nil then nil
115
+ when @sep then @flag = @es; next_char; @es
116
+ when @cr then @flag = @cr; nil
117
+ when @lf then @flag = @lf; nil
118
+ when @es,nil then nil
103
119
  end
104
- else # consume_unquoted_cell
120
+ else # consume unquoted cell
105
121
  match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
106
- @char = string[pos]
122
+ @char = peek(1)
107
123
  @char == @sep and @flag = @es and next_char
108
124
  match
109
125
  end
110
126
  end
111
127
 
112
128
  def bomb(msg)
113
- abort "#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
129
+ abort "\n#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
114
130
  end
115
131
 
116
132
  # ==[ Parser ]==
@@ -135,7 +151,7 @@ class Censive < StringScanner
135
151
 
136
152
  # ==[ Helpers ]==
137
153
 
138
- # grok returns 2 (seps and quotes), 1 (seps only), 0 (neither)
154
+ # grok returns: 2 for seps and quotes, 1 for seps only, and 0 for neither
139
155
  def grok(str)
140
156
  if pos = str.index(/(#{@quote})|#{@sep}/o)
141
157
  $1 ? 2 : str.index(/#{@quote}/o, pos) ? 2 : 1
@@ -147,6 +163,9 @@ class Censive < StringScanner
147
163
  def <<(row)
148
164
  @out or return super
149
165
 
166
+ # drop trailing seps, if specified
167
+ row.pop while row.last.empty? if @drop
168
+
150
169
  # most compact export format
151
170
  s,q = @sep, @quote
152
171
  out = case @mode
@@ -167,9 +186,6 @@ class Censive < StringScanner
167
186
  row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
168
187
  end.join(s)
169
188
 
170
- # drop trailing seps, if specified
171
- out.gsub!(/#{s}+\z/,'') if @drop
172
-
173
189
  # write output, using desired line endings
174
190
  @out << out + @eol
175
191
  end
@@ -188,31 +204,29 @@ class Censive < StringScanner
188
204
  end
189
205
  end
190
206
 
191
- # ==[ Playground... ]==
207
+ __END__
192
208
 
193
- data = File.read('1.csv')
194
-
195
- Censive.writer('out.csv', sep: ',', quote: "'") do |out|
196
- Censive.new(data).each do |row|
197
- out << row
198
- end
199
- end
209
+ # ==[ Playground... ]==
200
210
 
201
- #
202
- # ARGV << "z.csv" if ARGV.empty?
203
- #
204
- # case 1
205
- # when 1
206
- # path = ARGV.first
207
- # data = File.read(path)
208
- # when 2
209
- # data = DATA.gets("\n\n").rstrip
210
- # end
211
- #
212
211
  # STDOUT.sync = true
213
212
  #
214
- # csv = Censive.new(data)
213
+ # data = File.read('1.csv')
215
214
  #
216
- # data.size > 1e6 ? csv.parse : csv.parse.each {|cols| p cols }
215
+ # Censive.writer('out.csv') do |out|
216
+ # Censive.new(data, relax: true, excel: true).each do |row|
217
+ # out << row
218
+ # end
219
+ # end
217
220
  #
218
- # csv.stats
221
+ # __END__
222
+
223
+ ARGV << "z.csv" if ARGV.empty?
224
+
225
+ path = ARGV.first
226
+ data = File.read(path)
227
+
228
+ csv = Censive.new(data)
229
+
230
+ data.size > 1e6 ? csv.parse : csv.parse.each {|cols| p cols }
231
+
232
+ csv.stats
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: censive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.6'
4
+ version: '0.8'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steve Shreeve
@@ -10,7 +10,7 @@ bindir: bin
10
10
  cert_chain: []
11
11
  date: 2023-01-31 00:00:00.000000000 Z
12
12
  dependencies: []
13
- description: A quick and lightweight CVS handling library for Ruby
13
+ description: A quick and lightweight CSV handling library for Ruby
14
14
  email: steve.shreeve@gmail.com
15
15
  executables: []
16
16
  extensions: []
@@ -43,5 +43,5 @@ requirements: []
43
43
  rubygems_version: 3.4.5
44
44
  signing_key:
45
45
  specification_version: 4
46
- summary: A quick and lightweight CVS handling library for Ruby
46
+ summary: A quick and lightweight CSV handling library for Ruby
47
47
  test_files: []