censive 0.8 → 0.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/censive.gemspec +1 -1
  4. data/lib/censive.rb +26 -11
  5. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6419efcdc9274ea8bcf7b8527001e33f8bdfea348dfd911686cab36984d507da
4
- data.tar.gz: 3b59aead54517fd64d7ece3eaa6f459e301e1e48f1ae34772a7128c61fb739f2
3
+ metadata.gz: cbca33c415269ae1fafea5297f2b409879a46c37c884a0a7017bca322bcff2a6
4
+ data.tar.gz: ac021ddf3d7503aebc5791b0912c6409a0888627060b532e65f6eb72b94965a3
5
5
  SHA512:
6
- metadata.gz: 7910c09e76a81ed27870ea52fb6c8aea0316ed213c53a026d98adc64f93349477e6acab0a93b88c6f184ce1d317634ecdca9290d50bff9b117b98bedd3ac7b86
7
- data.tar.gz: 358ab985947d486b5f486b1f7e9c1f591e3b8e906b9eab59a4ed151e5f5d9652c211f2d2a4ee36f0543227e2ae5e33ba57f1e4c178f6f7e72e05c14d7b46895f
6
+ metadata.gz: 8095c0c7704e3a6ee66930b36f0131b38d52a68cdd066d9677e8ceb58c4ecd7ce7eed496c78b1841cabe845b8c82624ca808b33a7cf7ec4c8fd504b287b3ffb5
7
+ data.tar.gz: 2e363b63b37977784a38c06e091f3201a1cd7a13138e8101e0e41ca49c47b3c4b433e7e6f2843a6816ddcbf9c1c8293da0d858f6be38bd0d3d82ed5dbd904bfe
data/README.md CHANGED
@@ -12,7 +12,7 @@ data = File.read('data.csv')
12
12
 
13
13
  # write out a tab-separated tsv file
14
14
  Censive.writer('out.tsv', sep: "\t", mode: :full) do |out|
15
- Censive.new(data).each do |row|
15
+ Censive.new(data, excel: true, relax: true).each do |row|
16
16
  out << row
17
17
  end
18
18
  end
data/censive.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "censive"
5
- s.version = "0.8"
5
+ s.version = "0.9"
6
6
  s.author = "Steve Shreeve"
7
7
  s.email = "steve.shreeve@gmail.com"
8
8
  s.summary = "A quick and lightweight CSV handling library for Ruby"
data/lib/censive.rb CHANGED
@@ -6,7 +6,8 @@
6
6
  # Author: Steve Shreeve (steve.shreeve@gmail.com)
7
7
  # Date: Jan 30, 2023
8
8
  #
9
- # Thanks: Crystal's CSV library, see https://crystal-lang.org/api/1.7.2/CSV.html
9
+ # Thanks to https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
10
+ # and, also https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
10
11
  # ==============================================================================
11
12
  # The goals are:
12
13
  #
@@ -17,7 +18,8 @@
17
18
  #
18
19
  # 1. Option to support IO streaming
19
20
  # 2. Option to strip whitespace
20
- # 3. Confirm file encodings such as UTF-8, UTF-16, etc.
21
+ # 3. Option to support headers in readers and writers
22
+ # 4. Confirm file encodings such as UTF-8, UTF-16, etc.
21
23
  #
22
24
  # NOTE: Only getch and scan_until advance strscan's position
23
25
  # ==============================================================================
@@ -38,7 +40,7 @@ class Censive < StringScanner
38
40
 
39
41
  drop: false , # enable to drop trailing separators
40
42
  eol: "\n" , # desired line endings for exports
41
- excel: false , # allow ,="0123" style columns
43
+ excel: false , # literals (="01"), formulas (=A1 + B2), see http://bit.ly/3Y7jIvc
42
44
  mode: :compact, # export mode: compact or full
43
45
  out: nil , # output IO/file
44
46
  relax: false , # relax parsing of quotes
@@ -53,6 +55,7 @@ class Censive < StringScanner
53
55
 
54
56
  @drop = drop
55
57
  @eol = eol.freeze
58
+ @excel = excel
56
59
  @mode = mode
57
60
  @out = out
58
61
  @relax = relax
@@ -64,7 +67,6 @@ class Censive < StringScanner
64
67
  @esc = (@quote * 2).freeze
65
68
 
66
69
  @tokens = [@sep,@quote,@cr,@lf,@es,nil]
67
- @tokens << @eq if excel # See http://bit.ly/3Y7jIvc
68
70
  end
69
71
 
70
72
  def reset(str=nil)
@@ -85,19 +87,27 @@ class Censive < StringScanner
85
87
  end
86
88
 
87
89
  def next_token
90
+
91
+ # process and clear @flag
88
92
  case @flag
89
93
  when @es then @flag = nil; [@cr,@lf,@es,nil].include?(@char) and return @es
90
94
  when @cr then @flag = nil; next_char == @lf and next_char
91
95
  when @lf then @flag = nil; next_char
96
+ else @flag = nil
92
97
  end if @flag
93
98
 
99
+ # See http://bit.ly/3Y7jIvc
100
+ if @excel && @char == @eq
101
+ @flag = @eq
102
+ next_char
103
+ end
104
+
94
105
  if @tokens.include?(@char)
95
106
  case @char
96
- when @quote, @eq # consume quoted cell
97
- @char == @eq and next_char # excel mode: allows ,="012",
107
+ when @quote # consume quoted cell
98
108
  match = ""
99
109
  while true
100
- getch # consume the quote (optimized by not calling next_char)
110
+ getch # consume the quote that got us here
101
111
  match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
102
112
  case next_char
103
113
  when @sep then @flag = @es; next_char; break
@@ -119,6 +129,7 @@ class Censive < StringScanner
119
129
  end
120
130
  else # consume unquoted cell
121
131
  match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
132
+ match = @eq + match if @flag == @eq # preserve @eq for excel formulas
122
133
  @char = peek(1)
123
134
  @char == @sep and @flag = @es and next_char
124
135
  match
@@ -151,9 +162,9 @@ class Censive < StringScanner
151
162
 
152
163
  # ==[ Helpers ]==
153
164
 
154
- # grok returns: 2 for seps and quotes, 1 for seps only, and 0 for neither
165
+ # grok returns: 2 (must be quoted and escaped), 1 (must be quoted), 0 (neither)
155
166
  def grok(str)
156
- if pos = str.index(/(#{@quote})|#{@sep}/o)
167
+ if pos = str.index(/(#{@quote})|#{@sep}|#{@cr}|#{@lf}/o)
157
168
  $1 ? 2 : str.index(/#{@quote}/o, pos) ? 2 : 1
158
169
  else
159
170
  0
@@ -171,8 +182,12 @@ class Censive < StringScanner
171
182
  out = case @mode
172
183
  when :compact
173
184
  case grok(row.join)
174
- when 0 then row
175
- when 1 then row.map {|col| col.include?(s) ? "#{q}#{col}#{q}" : col }
185
+ when 0
186
+ row
187
+ when 1
188
+ row.map do |col|
189
+ col.match?(/#{@sep}|#{@cr}|#{@lf}/o) ? "#{q}#{col}#{q}" : col
190
+ end
176
191
  else
177
192
  row.map do |col|
178
193
  case grok(col)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: censive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.8'
4
+ version: '0.9'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steve Shreeve