censive 0.8 → 0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/censive.gemspec +1 -1
  4. data/lib/censive.rb +26 -11
  5. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6419efcdc9274ea8bcf7b8527001e33f8bdfea348dfd911686cab36984d507da
4
- data.tar.gz: 3b59aead54517fd64d7ece3eaa6f459e301e1e48f1ae34772a7128c61fb739f2
3
+ metadata.gz: cbca33c415269ae1fafea5297f2b409879a46c37c884a0a7017bca322bcff2a6
4
+ data.tar.gz: ac021ddf3d7503aebc5791b0912c6409a0888627060b532e65f6eb72b94965a3
5
5
  SHA512:
6
- metadata.gz: 7910c09e76a81ed27870ea52fb6c8aea0316ed213c53a026d98adc64f93349477e6acab0a93b88c6f184ce1d317634ecdca9290d50bff9b117b98bedd3ac7b86
7
- data.tar.gz: 358ab985947d486b5f486b1f7e9c1f591e3b8e906b9eab59a4ed151e5f5d9652c211f2d2a4ee36f0543227e2ae5e33ba57f1e4c178f6f7e72e05c14d7b46895f
6
+ metadata.gz: 8095c0c7704e3a6ee66930b36f0131b38d52a68cdd066d9677e8ceb58c4ecd7ce7eed496c78b1841cabe845b8c82624ca808b33a7cf7ec4c8fd504b287b3ffb5
7
+ data.tar.gz: 2e363b63b37977784a38c06e091f3201a1cd7a13138e8101e0e41ca49c47b3c4b433e7e6f2843a6816ddcbf9c1c8293da0d858f6be38bd0d3d82ed5dbd904bfe
data/README.md CHANGED
@@ -12,7 +12,7 @@ data = File.read('data.csv')
12
12
 
13
13
  # write out a tab-separated tsv file
14
14
  Censive.writer('out.tsv', sep: "\t", mode: :full) do |out|
15
- Censive.new(data).each do |row|
15
+ Censive.new(data, excel: true, relax: true).each do |row|
16
16
  out << row
17
17
  end
18
18
  end
data/censive.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "censive"
5
- s.version = "0.8"
5
+ s.version = "0.9"
6
6
  s.author = "Steve Shreeve"
7
7
  s.email = "steve.shreeve@gmail.com"
8
8
  s.summary = "A quick and lightweight CSV handling library for Ruby"
data/lib/censive.rb CHANGED
@@ -6,7 +6,8 @@
6
6
  # Author: Steve Shreeve (steve.shreeve@gmail.com)
7
7
  # Date: Jan 30, 2023
8
8
  #
9
- # Thanks: Crystal's CSV library, see https://crystal-lang.org/api/1.7.2/CSV.html
9
+ # Thanks to https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
10
+ # and, also https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
10
11
  # ==============================================================================
11
12
  # The goals are:
12
13
  #
@@ -17,7 +18,8 @@
17
18
  #
18
19
  # 1. Option to support IO streaming
19
20
  # 2. Option to strip whitespace
20
- # 3. Confirm file encodings such as UTF-8, UTF-16, etc.
21
+ # 3. Option to support headers in readers and writers
22
+ # 4. Confirm file encodings such as UTF-8, UTF-16, etc.
21
23
  #
22
24
  # NOTE: Only getch and scan_until advance strscan's position
23
25
  # ==============================================================================
@@ -38,7 +40,7 @@ class Censive < StringScanner
38
40
 
39
41
  drop: false , # enable to drop trailing separators
40
42
  eol: "\n" , # desired line endings for exports
41
- excel: false , # allow ,="0123" style columns
43
+ excel: false , # literals (="01"), formulas (=A1 + B2), see http://bit.ly/3Y7jIvc
42
44
  mode: :compact, # export mode: compact or full
43
45
  out: nil , # output IO/file
44
46
  relax: false , # relax parsing of quotes
@@ -53,6 +55,7 @@ class Censive < StringScanner
53
55
 
54
56
  @drop = drop
55
57
  @eol = eol.freeze
58
+ @excel = excel
56
59
  @mode = mode
57
60
  @out = out
58
61
  @relax = relax
@@ -64,7 +67,6 @@ class Censive < StringScanner
64
67
  @esc = (@quote * 2).freeze
65
68
 
66
69
  @tokens = [@sep,@quote,@cr,@lf,@es,nil]
67
- @tokens << @eq if excel # See http://bit.ly/3Y7jIvc
68
70
  end
69
71
 
70
72
  def reset(str=nil)
@@ -85,19 +87,27 @@ class Censive < StringScanner
85
87
  end
86
88
 
87
89
  def next_token
90
+
91
+ # process and clear @flag
88
92
  case @flag
89
93
  when @es then @flag = nil; [@cr,@lf,@es,nil].include?(@char) and return @es
90
94
  when @cr then @flag = nil; next_char == @lf and next_char
91
95
  when @lf then @flag = nil; next_char
96
+ else @flag = nil
92
97
  end if @flag
93
98
 
99
+ # See http://bit.ly/3Y7jIvc
100
+ if @excel && @char == @eq
101
+ @flag = @eq
102
+ next_char
103
+ end
104
+
94
105
  if @tokens.include?(@char)
95
106
  case @char
96
- when @quote, @eq # consume quoted cell
97
- @char == @eq and next_char # excel mode: allows ,="012",
107
+ when @quote # consume quoted cell
98
108
  match = ""
99
109
  while true
100
- getch # consume the quote (optimized by not calling next_char)
110
+ getch # consume the quote that got us here
101
111
  match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
102
112
  case next_char
103
113
  when @sep then @flag = @es; next_char; break
@@ -119,6 +129,7 @@ class Censive < StringScanner
119
129
  end
120
130
  else # consume unquoted cell
121
131
  match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
132
+ match = @eq + match if @flag == @eq # preserve @eq for excel formulas
122
133
  @char = peek(1)
123
134
  @char == @sep and @flag = @es and next_char
124
135
  match
@@ -151,9 +162,9 @@ class Censive < StringScanner
151
162
 
152
163
  # ==[ Helpers ]==
153
164
 
154
- # grok returns: 2 for seps and quotes, 1 for seps only, and 0 for neither
165
+ # grok returns: 2 (must be quoted and escaped), 1 (must be quoted), 0 (neither)
155
166
  def grok(str)
156
- if pos = str.index(/(#{@quote})|#{@sep}/o)
167
+ if pos = str.index(/(#{@quote})|#{@sep}|#{@cr}|#{@lf}/o)
157
168
  $1 ? 2 : str.index(/#{@quote}/o, pos) ? 2 : 1
158
169
  else
159
170
  0
@@ -171,8 +182,12 @@ class Censive < StringScanner
171
182
  out = case @mode
172
183
  when :compact
173
184
  case grok(row.join)
174
- when 0 then row
175
- when 1 then row.map {|col| col.include?(s) ? "#{q}#{col}#{q}" : col }
185
+ when 0
186
+ row
187
+ when 1
188
+ row.map do |col|
189
+ col.match?(/#{@sep}|#{@cr}|#{@lf}/o) ? "#{q}#{col}#{q}" : col
190
+ end
176
191
  else
177
192
  row.map do |col|
178
193
  case grok(col)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: censive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.8'
4
+ version: '0.9'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steve Shreeve