censive 0.6 → 0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/censive.gemspec +3 -3
  4. data/lib/censive.rb +77 -63
  5. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2fd3e9feff7e7610f300de0896977b7403ad161178c1f4cd28ef9c5248f8b9d1
4
- data.tar.gz: 8ac7307f60706052caafa0f1b6c1a28dd4385d33275dfa73af69e29b29471897
3
+ metadata.gz: 6419efcdc9274ea8bcf7b8527001e33f8bdfea348dfd911686cab36984d507da
4
+ data.tar.gz: 3b59aead54517fd64d7ece3eaa6f459e301e1e48f1ae34772a7128c61fb739f2
5
5
  SHA512:
6
- metadata.gz: ccc3de3e28e7fe7463acd86423f8391cad49238363ea4631727202639815fac10554a1bd388450d07422578ac04840d0b37c464dd56e3f600e7f4b732c6b4fee
7
- data.tar.gz: 5304136792442e52d339bd9c86db318104773665b041c5d62ed18e74b1d50dcb0fe1b135ae6e9accffccb883aef34b05bc1f6d0b1681826faf9334d18ed0624e
6
+ metadata.gz: 7910c09e76a81ed27870ea52fb6c8aea0316ed213c53a026d98adc64f93349477e6acab0a93b88c6f184ce1d317634ecdca9290d50bff9b117b98bedd3ac7b86
7
+ data.tar.gz: 358ab985947d486b5f486b1f7e9c1f591e3b8e906b9eab59a4ed151e5f5d9652c211f2d2a4ee36f0543227e2ae5e33ba57f1e4c178f6f7e72e05c14d7b46895f
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # censive
2
2
 
3
- A quick and lightweight CVS handling library for Ruby
3
+ A quick and lightweight CSV handling library for Ruby
4
4
 
5
5
  ## Writing CSV
6
6
 
data/censive.gemspec CHANGED
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "censive"
5
- s.version = "0.6"
5
+ s.version = "0.8"
6
6
  s.author = "Steve Shreeve"
7
7
  s.email = "steve.shreeve@gmail.com"
8
- s.summary = "A quick and lightweight CVS handling library for Ruby"
9
- s.description = "A quick and lightweight CVS handling library for Ruby"
8
+ s.summary = "A quick and lightweight CSV handling library for Ruby"
9
+ s.description = "A quick and lightweight CSV handling library for Ruby"
10
10
  s.homepage = "https://github.com/shreeve/censive"
11
11
  s.license = "MIT"
12
12
  s.files = `git ls-files`.split("\n") - %w[.gitignore]
data/lib/censive.rb CHANGED
@@ -5,6 +5,8 @@
5
5
  #
6
6
  # Author: Steve Shreeve (steve.shreeve@gmail.com)
7
7
  # Date: Jan 30, 2023
8
+ #
9
+ # Thanks: Crystal's CSV library, see https://crystal-lang.org/api/1.7.2/CSV.html
8
10
  # ==============================================================================
9
11
  # The goals are:
10
12
  #
@@ -15,10 +17,7 @@
15
17
  #
16
18
  # 1. Option to support IO streaming
17
19
  # 2. Option to strip whitespace
18
- # 3. Option to change output line endings
19
- # 4. Option to force quotes in output
20
- # 5. Option to allow reading excel CSV (="Text" for cells)
21
- # 6. Confirm file encodings such as UTF-8, UTF-16, etc.
20
+ # 3. Confirm file encodings such as UTF-8, UTF-16, etc.
22
21
  #
23
22
  # NOTE: Only getch and scan_until advance strscan's position
24
23
  # ==============================================================================
@@ -36,81 +35,98 @@ class Censive < StringScanner
36
35
  def initialize(str=nil,
37
36
  sep: ',' , # column separator character
38
37
  quote: '"' , # quote character
39
- out: nil , # output IO/file
40
- mode: :compact, # export mode: compact or full
38
+
41
39
  drop: false , # enable to drop trailing separators
42
- eol: "\n" # desired line endings for exports
40
+ eol: "\n" , # desired line endings for exports
41
+ excel: false , # allow ,="0123" style columns
42
+ mode: :compact, # export mode: compact or full
43
+ out: nil , # output IO/file
44
+ relax: false , # relax parsing of quotes
45
+
46
+ **opts # grab bag
43
47
  )
44
48
  super(str || '')
45
49
  reset
46
50
 
47
- @sep = sep .freeze
48
- @quote = quote.freeze
49
- @out = out
50
- @mode = mode
51
- @drop = drop
52
- @eol = eol.freeze
53
-
54
- @es = "" .freeze
55
- @cr = "\r" .freeze
56
- @lf = "\n" .freeze
57
- @esc = (@quote * 2).freeze
51
+ @sep = sep .freeze
52
+ @quote = quote.freeze
53
+
54
+ @drop = drop
55
+ @eol = eol.freeze
56
+ @mode = mode
57
+ @out = out
58
+ @relax = relax
59
+
60
+ @es = "" .freeze
61
+ @cr = "\r" .freeze
62
+ @lf = "\n" .freeze
63
+ @eq = "=" .freeze
64
+ @esc = (@quote * 2).freeze
65
+
66
+ @tokens = [@sep,@quote,@cr,@lf,@es,nil]
67
+ @tokens << @eq if excel # See http://bit.ly/3Y7jIvc
58
68
  end
59
69
 
60
70
  def reset(str=nil)
61
71
  self.string = str if str
62
72
  super()
63
- @char = string[pos]
64
- @flag = nil
73
+ @char = peek(1)
74
+ @flag = nil
65
75
 
66
- @rows = nil
67
- @cols = @cells = 0
76
+ @rows = nil
77
+ @cols = @cells = 0
68
78
  end
69
79
 
70
80
  # ==[ Lexer ]==
71
81
 
72
82
  def next_char
73
83
  getch
74
- @char = string[pos]
84
+ @char = peek(1)
75
85
  end
76
86
 
77
87
  def next_token
78
88
  case @flag
79
- when @es then @flag = nil; [@cr,@lf,nil].include?(@char) and return @es
89
+ when @es then @flag = nil; [@cr,@lf,@es,nil].include?(@char) and return @es
80
90
  when @cr then @flag = nil; next_char == @lf and next_char
81
91
  when @lf then @flag = nil; next_char
82
92
  end if @flag
83
93
 
84
- if [@sep,@quote,@cr,@lf,nil].include?(@char)
94
+ if @tokens.include?(@char)
85
95
  case @char
86
- when @quote # consume_quoted_cell
96
+ when @quote, @eq # consume quoted cell
97
+ @char == @eq and next_char # excel mode: allows ,="012",
87
98
  match = ""
88
99
  while true
89
100
  getch # consume the quote (optimized by not calling next_char)
90
101
  match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
91
102
  case next_char
92
- when @sep then @flag = @es; next_char; break
93
- when @quote then match << @quote
94
- when @cr,@lf,nil then break
95
- else bomb "unexpected character after quote"
103
+ when @sep then @flag = @es; next_char; break
104
+ when @quote then match << @quote
105
+ when @cr,@lf,@es,nil then break
106
+ else
107
+ if @relax
108
+ match << @quote + @char
109
+ else
110
+ bomb "invalid character after quote"
111
+ end
96
112
  end
97
113
  end
98
114
  match
99
- when @sep then @flag = @es; next_char; @es
100
- when @cr then @flag = @cr; nil
101
- when @lf then @flag = @lf; nil
102
- when nil then nil
115
+ when @sep then @flag = @es; next_char; @es
116
+ when @cr then @flag = @cr; nil
117
+ when @lf then @flag = @lf; nil
118
+ when @es,nil then nil
103
119
  end
104
- else # consume_unquoted_cell
120
+ else # consume unquoted cell
105
121
  match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
106
- @char = string[pos]
122
+ @char = peek(1)
107
123
  @char == @sep and @flag = @es and next_char
108
124
  match
109
125
  end
110
126
  end
111
127
 
112
128
  def bomb(msg)
113
- abort "#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
129
+ abort "\n#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
114
130
  end
115
131
 
116
132
  # ==[ Parser ]==
@@ -135,7 +151,7 @@ class Censive < StringScanner
135
151
 
136
152
  # ==[ Helpers ]==
137
153
 
138
- # grok returns 2 (seps and quotes), 1 (seps only), 0 (neither)
154
+ # grok returns: 2 for seps and quotes, 1 for seps only, and 0 for neither
139
155
  def grok(str)
140
156
  if pos = str.index(/(#{@quote})|#{@sep}/o)
141
157
  $1 ? 2 : str.index(/#{@quote}/o, pos) ? 2 : 1
@@ -147,6 +163,9 @@ class Censive < StringScanner
147
163
  def <<(row)
148
164
  @out or return super
149
165
 
166
+ # drop trailing seps, if specified
167
+ row.pop while row.last.empty? if @drop
168
+
150
169
  # most compact export format
151
170
  s,q = @sep, @quote
152
171
  out = case @mode
@@ -167,9 +186,6 @@ class Censive < StringScanner
167
186
  row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
168
187
  end.join(s)
169
188
 
170
- # drop trailing seps, if specified
171
- out.gsub!(/#{s}+\z/,'') if @drop
172
-
173
189
  # write output, using desired line endings
174
190
  @out << out + @eol
175
191
  end
@@ -188,31 +204,29 @@ class Censive < StringScanner
188
204
  end
189
205
  end
190
206
 
191
- # ==[ Playground... ]==
207
+ __END__
192
208
 
193
- data = File.read('1.csv')
194
-
195
- Censive.writer('out.csv', sep: ',', quote: "'") do |out|
196
- Censive.new(data).each do |row|
197
- out << row
198
- end
199
- end
209
+ # ==[ Playground... ]==
200
210
 
201
- #
202
- # ARGV << "z.csv" if ARGV.empty?
203
- #
204
- # case 1
205
- # when 1
206
- # path = ARGV.first
207
- # data = File.read(path)
208
- # when 2
209
- # data = DATA.gets("\n\n").rstrip
210
- # end
211
- #
212
211
  # STDOUT.sync = true
213
212
  #
214
- # csv = Censive.new(data)
213
+ # data = File.read('1.csv')
215
214
  #
216
- # data.size > 1e6 ? csv.parse : csv.parse.each {|cols| p cols }
215
+ # Censive.writer('out.csv') do |out|
216
+ # Censive.new(data, relax: true, excel: true).each do |row|
217
+ # out << row
218
+ # end
219
+ # end
217
220
  #
218
- # csv.stats
221
+ # __END__
222
+
223
+ ARGV << "z.csv" if ARGV.empty?
224
+
225
+ path = ARGV.first
226
+ data = File.read(path)
227
+
228
+ csv = Censive.new(data)
229
+
230
+ data.size > 1e6 ? csv.parse : csv.parse.each {|cols| p cols }
231
+
232
+ csv.stats
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: censive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.6'
4
+ version: '0.8'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steve Shreeve
@@ -10,7 +10,7 @@ bindir: bin
10
10
  cert_chain: []
11
11
  date: 2023-01-31 00:00:00.000000000 Z
12
12
  dependencies: []
13
- description: A quick and lightweight CVS handling library for Ruby
13
+ description: A quick and lightweight CSV handling library for Ruby
14
14
  email: steve.shreeve@gmail.com
15
15
  executables: []
16
16
  extensions: []
@@ -43,5 +43,5 @@ requirements: []
43
43
  rubygems_version: 3.4.5
44
44
  signing_key:
45
45
  specification_version: 4
46
- summary: A quick and lightweight CVS handling library for Ruby
46
+ summary: A quick and lightweight CSV handling library for Ruby
47
47
  test_files: []