censive 0.8 → 0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/censive.gemspec +1 -1
- data/lib/censive.rb +26 -11
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cbca33c415269ae1fafea5297f2b409879a46c37c884a0a7017bca322bcff2a6
|
4
|
+
data.tar.gz: ac021ddf3d7503aebc5791b0912c6409a0888627060b532e65f6eb72b94965a3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8095c0c7704e3a6ee66930b36f0131b38d52a68cdd066d9677e8ceb58c4ecd7ce7eed496c78b1841cabe845b8c82624ca808b33a7cf7ec4c8fd504b287b3ffb5
|
7
|
+
data.tar.gz: 2e363b63b37977784a38c06e091f3201a1cd7a13138e8101e0e41ca49c47b3c4b433e7e6f2843a6816ddcbf9c1c8293da0d858f6be38bd0d3d82ed5dbd904bfe
|
data/README.md
CHANGED
data/censive.gemspec
CHANGED
data/lib/censive.rb
CHANGED
@@ -6,7 +6,8 @@
|
|
6
6
|
# Author: Steve Shreeve (steve.shreeve@gmail.com)
|
7
7
|
# Date: Jan 30, 2023
|
8
8
|
#
|
9
|
-
# Thanks
|
9
|
+
# Thanks to https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
|
10
|
+
# and, also https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
|
10
11
|
# ==============================================================================
|
11
12
|
# The goals are:
|
12
13
|
#
|
@@ -17,7 +18,8 @@
|
|
17
18
|
#
|
18
19
|
# 1. Option to support IO streaming
|
19
20
|
# 2. Option to strip whitespace
|
20
|
-
# 3.
|
21
|
+
# 3. Option to support headers in readers and writers
|
22
|
+
# 4. Confirm file encodings such as UTF-8, UTF-16, etc.
|
21
23
|
#
|
22
24
|
# NOTE: Only getch and scan_until advance strscan's position
|
23
25
|
# ==============================================================================
|
@@ -38,7 +40,7 @@ class Censive < StringScanner
|
|
38
40
|
|
39
41
|
drop: false , # enable to drop trailing separators
|
40
42
|
eol: "\n" , # desired line endings for exports
|
41
|
-
excel: false , #
|
43
|
+
excel: false , # literals (="01"), formulas (=A1 + B2), see http://bit.ly/3Y7jIvc
|
42
44
|
mode: :compact, # export mode: compact or full
|
43
45
|
out: nil , # output IO/file
|
44
46
|
relax: false , # relax parsing of quotes
|
@@ -53,6 +55,7 @@ class Censive < StringScanner
|
|
53
55
|
|
54
56
|
@drop = drop
|
55
57
|
@eol = eol.freeze
|
58
|
+
@excel = excel
|
56
59
|
@mode = mode
|
57
60
|
@out = out
|
58
61
|
@relax = relax
|
@@ -64,7 +67,6 @@ class Censive < StringScanner
|
|
64
67
|
@esc = (@quote * 2).freeze
|
65
68
|
|
66
69
|
@tokens = [@sep,@quote,@cr,@lf,@es,nil]
|
67
|
-
@tokens << @eq if excel # See http://bit.ly/3Y7jIvc
|
68
70
|
end
|
69
71
|
|
70
72
|
def reset(str=nil)
|
@@ -85,19 +87,27 @@ class Censive < StringScanner
|
|
85
87
|
end
|
86
88
|
|
87
89
|
def next_token
|
90
|
+
|
91
|
+
# process and clear @flag
|
88
92
|
case @flag
|
89
93
|
when @es then @flag = nil; [@cr,@lf,@es,nil].include?(@char) and return @es
|
90
94
|
when @cr then @flag = nil; next_char == @lf and next_char
|
91
95
|
when @lf then @flag = nil; next_char
|
96
|
+
else @flag = nil
|
92
97
|
end if @flag
|
93
98
|
|
99
|
+
# See http://bit.ly/3Y7jIvc
|
100
|
+
if @excel && @char == @eq
|
101
|
+
@flag = @eq
|
102
|
+
next_char
|
103
|
+
end
|
104
|
+
|
94
105
|
if @tokens.include?(@char)
|
95
106
|
case @char
|
96
|
-
when @quote
|
97
|
-
@char == @eq and next_char # excel mode: allows ,="012",
|
107
|
+
when @quote # consume quoted cell
|
98
108
|
match = ""
|
99
109
|
while true
|
100
|
-
getch # consume the quote
|
110
|
+
getch # consume the quote that got us here
|
101
111
|
match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
|
102
112
|
case next_char
|
103
113
|
when @sep then @flag = @es; next_char; break
|
@@ -119,6 +129,7 @@ class Censive < StringScanner
|
|
119
129
|
end
|
120
130
|
else # consume unquoted cell
|
121
131
|
match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
|
132
|
+
match = @eq + match if @flag == @eq # preserve @eq for excel formulas
|
122
133
|
@char = peek(1)
|
123
134
|
@char == @sep and @flag = @es and next_char
|
124
135
|
match
|
@@ -151,9 +162,9 @@ class Censive < StringScanner
|
|
151
162
|
|
152
163
|
# ==[ Helpers ]==
|
153
164
|
|
154
|
-
# grok returns: 2
|
165
|
+
# grok returns: 2 (must be quoted and escaped), 1 (must be quoted), 0 (neither)
|
155
166
|
def grok(str)
|
156
|
-
if pos = str.index(/(#{@quote})|#{@sep}/o)
|
167
|
+
if pos = str.index(/(#{@quote})|#{@sep}|#{@cr}|#{@lf}/o)
|
157
168
|
$1 ? 2 : str.index(/#{@quote}/o, pos) ? 2 : 1
|
158
169
|
else
|
159
170
|
0
|
@@ -171,8 +182,12 @@ class Censive < StringScanner
|
|
171
182
|
out = case @mode
|
172
183
|
when :compact
|
173
184
|
case grok(row.join)
|
174
|
-
when 0
|
175
|
-
|
185
|
+
when 0
|
186
|
+
row
|
187
|
+
when 1
|
188
|
+
row.map do |col|
|
189
|
+
col.match?(/#{@sep}|#{@cr}|#{@lf}/o) ? "#{q}#{col}#{q}" : col
|
190
|
+
end
|
176
191
|
else
|
177
192
|
row.map do |col|
|
178
193
|
case grok(col)
|