censive 0.13 → 0.14
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +21 -2
- data/censive.gemspec +1 -1
- data/lib/censive.rb +29 -36
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '072390db5028d3fc9f2bf9c22f95616dea749a502c6f010c25f222f0f8bd86ec'
|
4
|
+
data.tar.gz: 012b5549ca2454f22db2b5bdc225e4a703c622d6a636ddea2f31bd6380556164
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 285efce01bbdee5e2f2505757342de56918025a2f2a146e1ec5ba9677a2c27cb8698ef0e68170fdeb74cf7e7a82665ea7e533732b813a8c39680bff7c3c22674
|
7
|
+
data.tar.gz: 0f36dcba5cac1f608db296fa1728dc87fc50134d1d356bc1d699533c75b02c8eb4dcfa5b33326b68d78f4bb9edaff39d7da975c04eaa6549b1536e2e113bbb61
|
data/README.md
CHANGED
@@ -74,6 +74,25 @@ Or, you can be more succinct with:
|
|
74
74
|
```ruby
|
75
75
|
require 'censive'
|
76
76
|
|
77
|
-
csv = Censive.new(
|
78
|
-
csv.export(sep: "
|
77
|
+
csv = Censive.new(DATA.read, excel: true, relax: true, strip: true)
|
78
|
+
csv.export(sep: "|", excel: true) # pipe separated, protect leading zeroes
|
79
|
+
|
80
|
+
__END__
|
81
|
+
Name,Age, Shoe
|
82
|
+
Alice, 27,5
|
83
|
+
Bob, 33,10 1/2
|
84
|
+
Charlie or "Chuck",=B2 + B3,9
|
85
|
+
"Doug E Fresh",="007",001122
|
86
|
+
Subtotal,=sum(B2:B5),="01234"
|
87
|
+
```
|
88
|
+
|
89
|
+
Which returns:
|
90
|
+
|
91
|
+
```
|
92
|
+
Name|Age|Shoe
|
93
|
+
Alice|27|5
|
94
|
+
Bob|33|10 1/2
|
95
|
+
"Charlie or ""Chuck"""|=B2 + B3|9
|
96
|
+
Doug E Fresh|="007"|="001122"
|
97
|
+
Subtotal|=sum(B2:B5)|="01234"
|
79
98
|
```
|
data/censive.gemspec
CHANGED
data/lib/censive.rb
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
# censive - A quick and lightweight CSV handling library for Ruby
|
5
5
|
#
|
6
6
|
# Author: Steve Shreeve (steve.shreeve@gmail.com)
|
7
|
-
# Date: Feb
|
7
|
+
# Date: Feb 4, 2023
|
8
8
|
#
|
9
9
|
# https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
|
10
10
|
# https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
|
@@ -13,13 +13,10 @@
|
|
13
13
|
# ============================================================================
|
14
14
|
# GOALS:
|
15
15
|
# 1. Faster than Ruby's default CSV library
|
16
|
-
# 2. Lightweight code
|
17
|
-
# 3. Support
|
16
|
+
# 2. Lightweight code with streamlined and optimized logic
|
17
|
+
# 3. Support most non-compliant CSV variations (eg - @excel, @relax, @strip)
|
18
18
|
#
|
19
|
-
# TODO:
|
20
|
-
# 1. Support IO streaming
|
21
|
-
# 2. Add option to strip whitespace
|
22
|
-
# 3. Support CSV headers in first row
|
19
|
+
# TODO: Support IO streaming
|
23
20
|
# ============================================================================
|
24
21
|
|
25
22
|
require "bundler/setup"
|
@@ -44,6 +41,7 @@ class Censive < StringScanner
|
|
44
41
|
quote: '"' , # quote character
|
45
42
|
relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
|
46
43
|
sep: "," , # column separator character
|
44
|
+
strip: false , # strip fields when reading
|
47
45
|
**opts # grab bag
|
48
46
|
)
|
49
47
|
super(str || "")
|
@@ -57,6 +55,7 @@ class Censive < StringScanner
|
|
57
55
|
@quote = quote
|
58
56
|
@relax = relax
|
59
57
|
@sep = sep
|
58
|
+
@strip = strip
|
60
59
|
|
61
60
|
@cr = "\r"
|
62
61
|
@lf = "\n"
|
@@ -68,50 +67,43 @@ class Censive < StringScanner
|
|
68
67
|
def reset(str=nil)
|
69
68
|
self.string = str if str
|
70
69
|
super()
|
71
|
-
@char =
|
70
|
+
@char = currchar
|
72
71
|
@rows = nil
|
73
72
|
@cols = @cells = 0
|
74
73
|
end
|
75
74
|
|
76
75
|
# ==[ Lexer ]==
|
77
76
|
|
78
|
-
# pure ruby versions for debugging
|
79
|
-
# def curr_char; @char = string[pos]; end
|
80
|
-
# def next_char; scan(/./m); @char = string[pos]; end
|
81
|
-
|
82
|
-
def curr_char; @char = currchar; end
|
83
|
-
def next_char; @char = nextchar; end
|
84
|
-
|
85
77
|
def next_token
|
86
78
|
if @excel && @char == @eq
|
87
79
|
excel = true
|
88
|
-
|
80
|
+
@char = nextchar
|
89
81
|
end
|
90
82
|
|
91
83
|
if @char == @quote # consume quoted cell
|
92
|
-
|
84
|
+
token = ""
|
93
85
|
while true
|
94
|
-
|
95
|
-
|
96
|
-
|
86
|
+
@char = nextchar
|
87
|
+
token << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
|
88
|
+
token << @quote and next if (@char = nextchar) == @quote
|
97
89
|
break if [@sep,@cr,@lf,@es,nil].include?(@char)
|
98
90
|
@relax or bomb "invalid character after quote"
|
99
|
-
|
91
|
+
token << @quote + scan_until(/(?=#{@quote})/o) + @quote
|
100
92
|
end
|
101
|
-
|
102
|
-
|
93
|
+
@char = nextchar if @char == @sep
|
94
|
+
@strip ? token.strip : token
|
103
95
|
elsif [@sep,@cr,@lf,@es,nil].include?(@char)
|
104
96
|
case @char
|
105
|
-
when @sep then
|
106
|
-
when @cr then
|
107
|
-
when @lf then
|
108
|
-
else
|
97
|
+
when @sep then @char = nextchar ; @es
|
98
|
+
when @cr then (@char = nextchar) == @lf and @char = nextchar; nil
|
99
|
+
when @lf then @char = nextchar ; nil
|
100
|
+
else nil
|
109
101
|
end
|
110
102
|
else # consume unquoted cell
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
103
|
+
token = scan_until(/(?=#{"\\"+@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
|
104
|
+
token.prepend(@eq) if excel
|
105
|
+
@char = nextchar if (@char = currchar) == @sep
|
106
|
+
@strip ? token.strip : token
|
115
107
|
end
|
116
108
|
end
|
117
109
|
|
@@ -143,7 +135,7 @@ class Censive < StringScanner
|
|
143
135
|
|
144
136
|
# returns 2 (must be quoted and escaped), 1 (must be quoted), 0 (neither)
|
145
137
|
def grok(str)
|
146
|
-
if idx = str.index(/(#{@quote})|#{
|
138
|
+
if idx = str.index(/(#{@quote})|#{"\\"+@sep}|#{@cr}|#{@lf}/o)
|
147
139
|
$1 ? 2 : str.index(/#{@quote}/o, idx) ? 2 : 1
|
148
140
|
else
|
149
141
|
0
|
@@ -164,7 +156,7 @@ class Censive < StringScanner
|
|
164
156
|
row
|
165
157
|
when 1
|
166
158
|
row.map do |col|
|
167
|
-
col.match?(/#{
|
159
|
+
col.match?(/#{"\\"+@sep}|#{@cr}|#{@lf}/o) ? "#{q}#{col}#{q}" : col
|
168
160
|
end
|
169
161
|
else
|
170
162
|
row.map do |col|
|
@@ -209,10 +201,11 @@ class Censive < StringScanner
|
|
209
201
|
end
|
210
202
|
|
211
203
|
if __FILE__ == $0
|
212
|
-
raw = DATA.
|
204
|
+
raw = DATA.read
|
205
|
+
# raw = DATA.gets("\n\n").chomp
|
213
206
|
# raw = File.read(ARGV.first || "lc-2023.csv")
|
214
|
-
csv = Censive.new(raw, excel: true, relax: true)
|
215
|
-
csv.export
|
207
|
+
csv = Censive.new(raw, excel: true, relax: true, strip: true)
|
208
|
+
csv.export(sep: "|", excel: true)
|
216
209
|
end
|
217
210
|
|
218
211
|
__END__
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: censive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.14'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Steve Shreeve
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-02-
|
11
|
+
date: 2023-02-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: strscan
|