censive 0.13 → 0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +21 -2
- data/censive.gemspec +1 -1
- data/lib/censive.rb +29 -36
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '072390db5028d3fc9f2bf9c22f95616dea749a502c6f010c25f222f0f8bd86ec'
|
4
|
+
data.tar.gz: 012b5549ca2454f22db2b5bdc225e4a703c622d6a636ddea2f31bd6380556164
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 285efce01bbdee5e2f2505757342de56918025a2f2a146e1ec5ba9677a2c27cb8698ef0e68170fdeb74cf7e7a82665ea7e533732b813a8c39680bff7c3c22674
|
7
|
+
data.tar.gz: 0f36dcba5cac1f608db296fa1728dc87fc50134d1d356bc1d699533c75b02c8eb4dcfa5b33326b68d78f4bb9edaff39d7da975c04eaa6549b1536e2e113bbb61
|
data/README.md
CHANGED
@@ -74,6 +74,25 @@ Or, you can be more succinct with:
|
|
74
74
|
```ruby
|
75
75
|
require 'censive'
|
76
76
|
|
77
|
-
csv = Censive.new(
|
78
|
-
csv.export(sep: "
|
77
|
+
csv = Censive.new(DATA.read, excel: true, relax: true, strip: true)
|
78
|
+
csv.export(sep: "|", excel: true) # pipe separated, protect leading zeroes
|
79
|
+
|
80
|
+
__END__
|
81
|
+
Name,Age, Shoe
|
82
|
+
Alice, 27,5
|
83
|
+
Bob, 33,10 1/2
|
84
|
+
Charlie or "Chuck",=B2 + B3,9
|
85
|
+
"Doug E Fresh",="007",001122
|
86
|
+
Subtotal,=sum(B2:B5),="01234"
|
87
|
+
```
|
88
|
+
|
89
|
+
Which returns:
|
90
|
+
|
91
|
+
```
|
92
|
+
Name|Age|Shoe
|
93
|
+
Alice|27|5
|
94
|
+
Bob|33|10 1/2
|
95
|
+
"Charlie or ""Chuck"""|=B2 + B3|9
|
96
|
+
Doug E Fresh|="007"|="001122"
|
97
|
+
Subtotal|=sum(B2:B5)|="01234"
|
79
98
|
```
|
data/censive.gemspec
CHANGED
data/lib/censive.rb
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
# censive - A quick and lightweight CSV handling library for Ruby
|
5
5
|
#
|
6
6
|
# Author: Steve Shreeve (steve.shreeve@gmail.com)
|
7
|
-
# Date: Feb
|
7
|
+
# Date: Feb 4, 2023
|
8
8
|
#
|
9
9
|
# https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
|
10
10
|
# https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
|
@@ -13,13 +13,10 @@
|
|
13
13
|
# ============================================================================
|
14
14
|
# GOALS:
|
15
15
|
# 1. Faster than Ruby's default CSV library
|
16
|
-
# 2. Lightweight code
|
17
|
-
# 3. Support
|
16
|
+
# 2. Lightweight code with streamlined and optimized logic
|
17
|
+
# 3. Support most non-compliant CSV variations (eg - @excel, @relax, @strip)
|
18
18
|
#
|
19
|
-
# TODO:
|
20
|
-
# 1. Support IO streaming
|
21
|
-
# 2. Add option to strip whitespace
|
22
|
-
# 3. Support CSV headers in first row
|
19
|
+
# TODO: Support IO streaming
|
23
20
|
# ============================================================================
|
24
21
|
|
25
22
|
require "bundler/setup"
|
@@ -44,6 +41,7 @@ class Censive < StringScanner
|
|
44
41
|
quote: '"' , # quote character
|
45
42
|
relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
|
46
43
|
sep: "," , # column separator character
|
44
|
+
strip: false , # strip fields when reading
|
47
45
|
**opts # grab bag
|
48
46
|
)
|
49
47
|
super(str || "")
|
@@ -57,6 +55,7 @@ class Censive < StringScanner
|
|
57
55
|
@quote = quote
|
58
56
|
@relax = relax
|
59
57
|
@sep = sep
|
58
|
+
@strip = strip
|
60
59
|
|
61
60
|
@cr = "\r"
|
62
61
|
@lf = "\n"
|
@@ -68,50 +67,43 @@ class Censive < StringScanner
|
|
68
67
|
def reset(str=nil)
|
69
68
|
self.string = str if str
|
70
69
|
super()
|
71
|
-
@char =
|
70
|
+
@char = currchar
|
72
71
|
@rows = nil
|
73
72
|
@cols = @cells = 0
|
74
73
|
end
|
75
74
|
|
76
75
|
# ==[ Lexer ]==
|
77
76
|
|
78
|
-
# pure ruby versions for debugging
|
79
|
-
# def curr_char; @char = string[pos]; end
|
80
|
-
# def next_char; scan(/./m); @char = string[pos]; end
|
81
|
-
|
82
|
-
def curr_char; @char = currchar; end
|
83
|
-
def next_char; @char = nextchar; end
|
84
|
-
|
85
77
|
def next_token
|
86
78
|
if @excel && @char == @eq
|
87
79
|
excel = true
|
88
|
-
|
80
|
+
@char = nextchar
|
89
81
|
end
|
90
82
|
|
91
83
|
if @char == @quote # consume quoted cell
|
92
|
-
|
84
|
+
token = ""
|
93
85
|
while true
|
94
|
-
|
95
|
-
|
96
|
-
|
86
|
+
@char = nextchar
|
87
|
+
token << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
|
88
|
+
token << @quote and next if (@char = nextchar) == @quote
|
97
89
|
break if [@sep,@cr,@lf,@es,nil].include?(@char)
|
98
90
|
@relax or bomb "invalid character after quote"
|
99
|
-
|
91
|
+
token << @quote + scan_until(/(?=#{@quote})/o) + @quote
|
100
92
|
end
|
101
|
-
|
102
|
-
|
93
|
+
@char = nextchar if @char == @sep
|
94
|
+
@strip ? token.strip : token
|
103
95
|
elsif [@sep,@cr,@lf,@es,nil].include?(@char)
|
104
96
|
case @char
|
105
|
-
when @sep then
|
106
|
-
when @cr then
|
107
|
-
when @lf then
|
108
|
-
else
|
97
|
+
when @sep then @char = nextchar ; @es
|
98
|
+
when @cr then (@char = nextchar) == @lf and @char = nextchar; nil
|
99
|
+
when @lf then @char = nextchar ; nil
|
100
|
+
else nil
|
109
101
|
end
|
110
102
|
else # consume unquoted cell
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
103
|
+
token = scan_until(/(?=#{"\\"+@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
|
104
|
+
token.prepend(@eq) if excel
|
105
|
+
@char = nextchar if (@char = currchar) == @sep
|
106
|
+
@strip ? token.strip : token
|
115
107
|
end
|
116
108
|
end
|
117
109
|
|
@@ -143,7 +135,7 @@ class Censive < StringScanner
|
|
143
135
|
|
144
136
|
# returns 2 (must be quoted and escaped), 1 (must be quoted), 0 (neither)
|
145
137
|
def grok(str)
|
146
|
-
if idx = str.index(/(#{@quote})|#{
|
138
|
+
if idx = str.index(/(#{@quote})|#{"\\"+@sep}|#{@cr}|#{@lf}/o)
|
147
139
|
$1 ? 2 : str.index(/#{@quote}/o, idx) ? 2 : 1
|
148
140
|
else
|
149
141
|
0
|
@@ -164,7 +156,7 @@ class Censive < StringScanner
|
|
164
156
|
row
|
165
157
|
when 1
|
166
158
|
row.map do |col|
|
167
|
-
col.match?(/#{
|
159
|
+
col.match?(/#{"\\"+@sep}|#{@cr}|#{@lf}/o) ? "#{q}#{col}#{q}" : col
|
168
160
|
end
|
169
161
|
else
|
170
162
|
row.map do |col|
|
@@ -209,10 +201,11 @@ class Censive < StringScanner
|
|
209
201
|
end
|
210
202
|
|
211
203
|
if __FILE__ == $0
|
212
|
-
raw = DATA.
|
204
|
+
raw = DATA.read
|
205
|
+
# raw = DATA.gets("\n\n").chomp
|
213
206
|
# raw = File.read(ARGV.first || "lc-2023.csv")
|
214
|
-
csv = Censive.new(raw, excel: true, relax: true)
|
215
|
-
csv.export
|
207
|
+
csv = Censive.new(raw, excel: true, relax: true, strip: true)
|
208
|
+
csv.export(sep: "|", excel: true)
|
216
209
|
end
|
217
210
|
|
218
211
|
__END__
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: censive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.14'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Steve Shreeve
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-02-
|
11
|
+
date: 2023-02-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: strscan
|