censive 0.8 → 0.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/censive.gemspec +1 -1
- data/lib/censive.rb +26 -11
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cbca33c415269ae1fafea5297f2b409879a46c37c884a0a7017bca322bcff2a6
|
4
|
+
data.tar.gz: ac021ddf3d7503aebc5791b0912c6409a0888627060b532e65f6eb72b94965a3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8095c0c7704e3a6ee66930b36f0131b38d52a68cdd066d9677e8ceb58c4ecd7ce7eed496c78b1841cabe845b8c82624ca808b33a7cf7ec4c8fd504b287b3ffb5
|
7
|
+
data.tar.gz: 2e363b63b37977784a38c06e091f3201a1cd7a13138e8101e0e41ca49c47b3c4b433e7e6f2843a6816ddcbf9c1c8293da0d858f6be38bd0d3d82ed5dbd904bfe
|
data/README.md
CHANGED
data/censive.gemspec
CHANGED
data/lib/censive.rb
CHANGED
@@ -6,7 +6,8 @@
|
|
6
6
|
# Author: Steve Shreeve (steve.shreeve@gmail.com)
|
7
7
|
# Date: Jan 30, 2023
|
8
8
|
#
|
9
|
-
# Thanks
|
9
|
+
# Thanks to https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
|
10
|
+
# and, also https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
|
10
11
|
# ==============================================================================
|
11
12
|
# The goals are:
|
12
13
|
#
|
@@ -17,7 +18,8 @@
|
|
17
18
|
#
|
18
19
|
# 1. Option to support IO streaming
|
19
20
|
# 2. Option to strip whitespace
|
20
|
-
# 3.
|
21
|
+
# 3. Option to support headers in readers and writers
|
22
|
+
# 4. Confirm file encodings such as UTF-8, UTF-16, etc.
|
21
23
|
#
|
22
24
|
# NOTE: Only getch and scan_until advance strscan's position
|
23
25
|
# ==============================================================================
|
@@ -38,7 +40,7 @@ class Censive < StringScanner
|
|
38
40
|
|
39
41
|
drop: false , # enable to drop trailing separators
|
40
42
|
eol: "\n" , # desired line endings for exports
|
41
|
-
excel: false , #
|
43
|
+
excel: false , # literals (="01"), formulas (=A1 + B2), see http://bit.ly/3Y7jIvc
|
42
44
|
mode: :compact, # export mode: compact or full
|
43
45
|
out: nil , # output IO/file
|
44
46
|
relax: false , # relax parsing of quotes
|
@@ -53,6 +55,7 @@ class Censive < StringScanner
|
|
53
55
|
|
54
56
|
@drop = drop
|
55
57
|
@eol = eol.freeze
|
58
|
+
@excel = excel
|
56
59
|
@mode = mode
|
57
60
|
@out = out
|
58
61
|
@relax = relax
|
@@ -64,7 +67,6 @@ class Censive < StringScanner
|
|
64
67
|
@esc = (@quote * 2).freeze
|
65
68
|
|
66
69
|
@tokens = [@sep,@quote,@cr,@lf,@es,nil]
|
67
|
-
@tokens << @eq if excel # See http://bit.ly/3Y7jIvc
|
68
70
|
end
|
69
71
|
|
70
72
|
def reset(str=nil)
|
@@ -85,19 +87,27 @@ class Censive < StringScanner
|
|
85
87
|
end
|
86
88
|
|
87
89
|
def next_token
|
90
|
+
|
91
|
+
# process and clear @flag
|
88
92
|
case @flag
|
89
93
|
when @es then @flag = nil; [@cr,@lf,@es,nil].include?(@char) and return @es
|
90
94
|
when @cr then @flag = nil; next_char == @lf and next_char
|
91
95
|
when @lf then @flag = nil; next_char
|
96
|
+
else @flag = nil
|
92
97
|
end if @flag
|
93
98
|
|
99
|
+
# See http://bit.ly/3Y7jIvc
|
100
|
+
if @excel && @char == @eq
|
101
|
+
@flag = @eq
|
102
|
+
next_char
|
103
|
+
end
|
104
|
+
|
94
105
|
if @tokens.include?(@char)
|
95
106
|
case @char
|
96
|
-
when @quote
|
97
|
-
@char == @eq and next_char # excel mode: allows ,="012",
|
107
|
+
when @quote # consume quoted cell
|
98
108
|
match = ""
|
99
109
|
while true
|
100
|
-
getch # consume the quote
|
110
|
+
getch # consume the quote that got us here
|
101
111
|
match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
|
102
112
|
case next_char
|
103
113
|
when @sep then @flag = @es; next_char; break
|
@@ -119,6 +129,7 @@ class Censive < StringScanner
|
|
119
129
|
end
|
120
130
|
else # consume unquoted cell
|
121
131
|
match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
|
132
|
+
match = @eq + match if @flag == @eq # preserve @eq for excel formulas
|
122
133
|
@char = peek(1)
|
123
134
|
@char == @sep and @flag = @es and next_char
|
124
135
|
match
|
@@ -151,9 +162,9 @@ class Censive < StringScanner
|
|
151
162
|
|
152
163
|
# ==[ Helpers ]==
|
153
164
|
|
154
|
-
# grok returns: 2
|
165
|
+
# grok returns: 2 (must be quoted and escaped), 1 (must be quoted), 0 (neither)
|
155
166
|
def grok(str)
|
156
|
-
if pos = str.index(/(#{@quote})|#{@sep}/o)
|
167
|
+
if pos = str.index(/(#{@quote})|#{@sep}|#{@cr}|#{@lf}/o)
|
157
168
|
$1 ? 2 : str.index(/#{@quote}/o, pos) ? 2 : 1
|
158
169
|
else
|
159
170
|
0
|
@@ -171,8 +182,12 @@ class Censive < StringScanner
|
|
171
182
|
out = case @mode
|
172
183
|
when :compact
|
173
184
|
case grok(row.join)
|
174
|
-
when 0
|
175
|
-
|
185
|
+
when 0
|
186
|
+
row
|
187
|
+
when 1
|
188
|
+
row.map do |col|
|
189
|
+
col.match?(/#{@sep}|#{@cr}|#{@lf}/o) ? "#{q}#{col}#{q}" : col
|
190
|
+
end
|
176
191
|
else
|
177
192
|
row.map do |col|
|
178
193
|
case grok(col)
|