censive 0.13 → 0.15
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +21 -2
- data/censive.gemspec +1 -2
- data/lib/censive.rb +52 -68
- metadata +3 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4211eb0036bcbadf9d1683f10084bb8cbb109d26845d6990778e159b634b8d00
|
4
|
+
data.tar.gz: 1f97a3e5343958a86d54b7f1f87ada5d05a8f091d597253c3079c1a15eeb4dfa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f3b8e0ee3cf2d8eadbb26d6be5b590418a844561062779f47eec0863b87cfbc65868a8c89624865eb74ee7f6825ee3fb1d2d26611353a30f805d40d567c1f1ac
|
7
|
+
data.tar.gz: 949e07f3f2208cbecc868a85c7bde5e67f38bf570ae1708496fd7745703c3fc133e8d48fc03c5c18c85264c0bff5b394d5ce2c2b2894daa8b7ca9033f9d68828
|
data/README.md
CHANGED
@@ -74,6 +74,25 @@ Or, you can be more succinct with:
|
|
74
74
|
```ruby
|
75
75
|
require 'censive'
|
76
76
|
|
77
|
-
csv = Censive.new(
|
78
|
-
csv.export(sep: "
|
77
|
+
csv = Censive.new(DATA.read, excel: true, relax: true, strip: true)
|
78
|
+
csv.export(sep: "|", excel: true) # pipe separated, protect leading zeroes
|
79
|
+
|
80
|
+
__END__
|
81
|
+
Name,Age, Shoe
|
82
|
+
Alice, 27,5
|
83
|
+
Bob, 33,10 1/2
|
84
|
+
Charlie or "Chuck",=B2 + B3,9
|
85
|
+
"Doug E Fresh",="007",001122
|
86
|
+
Subtotal,=sum(B2:B5),="01234"
|
87
|
+
```
|
88
|
+
|
89
|
+
Which returns:
|
90
|
+
|
91
|
+
```
|
92
|
+
Name|Age|Shoe
|
93
|
+
Alice|27|5
|
94
|
+
Bob|33|10 1/2
|
95
|
+
"Charlie or ""Chuck"""|=B2 + B3|9
|
96
|
+
Doug E Fresh|="007"|="001122"
|
97
|
+
Subtotal|=sum(B2:B5)|="01234"
|
79
98
|
```
|
data/censive.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = "censive"
|
5
|
-
s.version = "0.
|
5
|
+
s.version = "0.15"
|
6
6
|
s.author = "Steve Shreeve"
|
7
7
|
s.email = "steve.shreeve@gmail.com"
|
8
8
|
s.summary =
|
@@ -11,5 +11,4 @@ Gem::Specification.new do |s|
|
|
11
11
|
s.license = "MIT"
|
12
12
|
s.files = `git ls-files`.split("\n") - %w[.gitignore]
|
13
13
|
s.executables = `(cd bin 2>&1) > /dev/null && git ls-files .`.split("\n")
|
14
|
-
s.add_dependency "strscan", ">= 3.0.6"
|
15
14
|
end
|
data/lib/censive.rb
CHANGED
@@ -4,25 +4,21 @@
|
|
4
4
|
# censive - A quick and lightweight CSV handling library for Ruby
|
5
5
|
#
|
6
6
|
# Author: Steve Shreeve (steve.shreeve@gmail.com)
|
7
|
-
# Date: Feb
|
7
|
+
# Date: Feb 5, 2023
|
8
8
|
#
|
9
9
|
# https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
|
10
10
|
# https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
|
11
|
-
#
|
12
|
-
#
|
11
|
+
#
|
12
|
+
# Thanks to Sutou Kouhei (kou) for his excellent advice on using scan
|
13
13
|
# ============================================================================
|
14
14
|
# GOALS:
|
15
15
|
# 1. Faster than Ruby's default CSV library
|
16
|
-
# 2. Lightweight code
|
17
|
-
# 3. Support
|
16
|
+
# 2. Lightweight code with streamlined and optimized logic
|
17
|
+
# 3. Support most non-compliant CSV variations (eg - @excel, @relax, @strip)
|
18
18
|
#
|
19
|
-
# TODO:
|
20
|
-
# 1. Support IO streaming
|
21
|
-
# 2. Add option to strip whitespace
|
22
|
-
# 3. Support CSV headers in first row
|
19
|
+
# TODO: Support IO streaming
|
23
20
|
# ============================================================================
|
24
21
|
|
25
|
-
require "bundler/setup"
|
26
22
|
require "strscan"
|
27
23
|
|
28
24
|
class Censive < StringScanner
|
@@ -36,83 +32,71 @@ class Censive < StringScanner
|
|
36
32
|
end
|
37
33
|
|
38
34
|
def initialize(str=nil,
|
39
|
-
drop:
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
sep:
|
47
|
-
|
35
|
+
drop: false , # drop trailing empty fields?
|
36
|
+
excel: false , # literals ="01" formulas =A1 + B2 http://bit.ly/3Y7jIvc
|
37
|
+
mode: :compact, # export mode: compact or full
|
38
|
+
out: $stdout , # output stream, needs to respond to <<
|
39
|
+
quote: '"' , # quote character
|
40
|
+
relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
|
41
|
+
rowsep: "\n" , # row separator for export
|
42
|
+
sep: "," , # column separator character
|
43
|
+
strip: false , # strip fields when reading
|
44
|
+
**opts # grab bag
|
48
45
|
)
|
49
46
|
super(str || "")
|
50
47
|
reset
|
51
48
|
|
52
|
-
|
53
|
-
@
|
54
|
-
@excel
|
55
|
-
@mode
|
56
|
-
@out
|
57
|
-
@quote
|
58
|
-
@relax
|
59
|
-
@
|
60
|
-
|
61
|
-
@
|
62
|
-
|
63
|
-
|
64
|
-
@
|
65
|
-
@
|
49
|
+
# options
|
50
|
+
@drop = drop
|
51
|
+
@excel = excel
|
52
|
+
@mode = mode
|
53
|
+
@out = out
|
54
|
+
@quote = quote
|
55
|
+
@relax = relax
|
56
|
+
@rowsep = rowsep
|
57
|
+
@sep = sep
|
58
|
+
@strip = strip
|
59
|
+
|
60
|
+
# determined
|
61
|
+
@cr = "\r"
|
62
|
+
@lf = "\n"
|
63
|
+
@es = ""
|
64
|
+
@eq = "="
|
65
|
+
@esc = (@quote * 2)
|
66
|
+
@eol = /#{@cr}#{@lf}?|#{@lf}|\z/o # end of line
|
67
|
+
@eoc = /(?=#{"\\" + @sep}|#{@cr}|#{@lf}|\z)/o # end of cell
|
66
68
|
end
|
67
69
|
|
68
70
|
def reset(str=nil)
|
69
71
|
self.string = str if str
|
70
72
|
super()
|
71
|
-
@char = curr_char
|
72
73
|
@rows = nil
|
73
74
|
@cols = @cells = 0
|
74
75
|
end
|
75
76
|
|
76
77
|
# ==[ Lexer ]==
|
77
78
|
|
78
|
-
# pure ruby versions for debugging
|
79
|
-
# def curr_char; @char = string[pos]; end
|
80
|
-
# def next_char; scan(/./m); @char = string[pos]; end
|
81
|
-
|
82
|
-
def curr_char; @char = currchar; end
|
83
|
-
def next_char; @char = nextchar; end
|
84
|
-
|
85
79
|
def next_token
|
86
|
-
if @excel && @
|
87
|
-
excel = true
|
88
|
-
next_char
|
89
|
-
end
|
80
|
+
excel = true if @excel && scan(@eq)
|
90
81
|
|
91
|
-
if @
|
92
|
-
|
82
|
+
if scan(@quote) # consume quoted cell
|
83
|
+
token = ""
|
93
84
|
while true
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
break if [@sep,@cr,@lf,@es,nil].include?(@char)
|
85
|
+
token << (scan_until(/#{@quote}/o) or bomb "unclosed quote")[0..-2]
|
86
|
+
token << @quote and next if scan(@quote)
|
87
|
+
break if scan(@eoc)
|
98
88
|
@relax or bomb "invalid character after quote"
|
99
|
-
|
100
|
-
|
101
|
-
next_char if @char == @sep
|
102
|
-
match
|
103
|
-
elsif [@sep,@cr,@lf,@es,nil].include?(@char)
|
104
|
-
case @char
|
105
|
-
when @sep then next_char; @es
|
106
|
-
when @cr then next_char == @lf and next_char; nil
|
107
|
-
when @lf then next_char; nil
|
108
|
-
else nil
|
89
|
+
quoted = scan_until(/#{@quote}/o) or bomb "invalid inline quote"
|
90
|
+
token << @quote + quoted + @quote
|
109
91
|
end
|
92
|
+
elsif scan(@sep) then return @es
|
93
|
+
elsif scan(@eol) then return nil
|
110
94
|
else # consume unquoted cell
|
111
|
-
|
112
|
-
|
113
|
-
next_char if curr_char == @sep
|
114
|
-
match
|
95
|
+
token = scan_until(@eoc) or bomb "unexpected character"
|
96
|
+
token.prepend(@eq) if excel
|
115
97
|
end
|
98
|
+
scan(@sep)
|
99
|
+
@strip ? token.strip : token
|
116
100
|
end
|
117
101
|
|
118
102
|
def bomb(msg)
|
@@ -143,7 +127,7 @@ class Censive < StringScanner
|
|
143
127
|
|
144
128
|
# returns 2 (must be quoted and escaped), 1 (must be quoted), 0 (neither)
|
145
129
|
def grok(str)
|
146
|
-
if idx = str.index(/(#{@quote})|#{
|
130
|
+
if idx = str.index(/(#{@quote})|#{"\\"+@sep}|#{@cr}|#{@lf}/o)
|
147
131
|
$1 ? 2 : str.index(/#{@quote}/o, idx) ? 2 : 1
|
148
132
|
else
|
149
133
|
0
|
@@ -164,7 +148,7 @@ class Censive < StringScanner
|
|
164
148
|
row
|
165
149
|
when 1
|
166
150
|
row.map do |col|
|
167
|
-
col.match?(/#{
|
151
|
+
col.match?(/#{"\\"+@sep}|#{@cr}|#{@lf}/o) ? "#{q}#{col}#{q}" : col
|
168
152
|
end
|
169
153
|
else
|
170
154
|
row.map do |col|
|
@@ -186,7 +170,7 @@ class Censive < StringScanner
|
|
186
170
|
end
|
187
171
|
end.join(s)
|
188
172
|
|
189
|
-
@out << out + @
|
173
|
+
@out << out + @rowsep
|
190
174
|
end
|
191
175
|
|
192
176
|
def each
|
metadata
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: censive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.15'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Steve Shreeve
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-02-
|
12
|
-
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: strscan
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: 3.0.6
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ">="
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: 3.0.6
|
11
|
+
date: 2023-02-05 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
27
13
|
description: A quick and lightweight CSV handling library for Ruby
|
28
14
|
email: steve.shreeve@gmail.com
|
29
15
|
executables: []
|