censive 0.13 → 0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +21 -2
- data/censive.gemspec +1 -2
- data/lib/censive.rb +52 -68
- metadata +3 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4211eb0036bcbadf9d1683f10084bb8cbb109d26845d6990778e159b634b8d00
|
4
|
+
data.tar.gz: 1f97a3e5343958a86d54b7f1f87ada5d05a8f091d597253c3079c1a15eeb4dfa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f3b8e0ee3cf2d8eadbb26d6be5b590418a844561062779f47eec0863b87cfbc65868a8c89624865eb74ee7f6825ee3fb1d2d26611353a30f805d40d567c1f1ac
|
7
|
+
data.tar.gz: 949e07f3f2208cbecc868a85c7bde5e67f38bf570ae1708496fd7745703c3fc133e8d48fc03c5c18c85264c0bff5b394d5ce2c2b2894daa8b7ca9033f9d68828
|
data/README.md
CHANGED
@@ -74,6 +74,25 @@ Or, you can be more succinct with:
|
|
74
74
|
```ruby
|
75
75
|
require 'censive'
|
76
76
|
|
77
|
-
csv = Censive.new(
|
78
|
-
csv.export(sep: "
|
77
|
+
csv = Censive.new(DATA.read, excel: true, relax: true, strip: true)
|
78
|
+
csv.export(sep: "|", excel: true) # pipe separated, protect leading zeroes
|
79
|
+
|
80
|
+
__END__
|
81
|
+
Name,Age, Shoe
|
82
|
+
Alice, 27,5
|
83
|
+
Bob, 33,10 1/2
|
84
|
+
Charlie or "Chuck",=B2 + B3,9
|
85
|
+
"Doug E Fresh",="007",001122
|
86
|
+
Subtotal,=sum(B2:B5),="01234"
|
87
|
+
```
|
88
|
+
|
89
|
+
Which returns:
|
90
|
+
|
91
|
+
```
|
92
|
+
Name|Age|Shoe
|
93
|
+
Alice|27|5
|
94
|
+
Bob|33|10 1/2
|
95
|
+
"Charlie or ""Chuck"""|=B2 + B3|9
|
96
|
+
Doug E Fresh|="007"|="001122"
|
97
|
+
Subtotal|=sum(B2:B5)|="01234"
|
79
98
|
```
|
data/censive.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = "censive"
|
5
|
-
s.version = "0.
|
5
|
+
s.version = "0.15"
|
6
6
|
s.author = "Steve Shreeve"
|
7
7
|
s.email = "steve.shreeve@gmail.com"
|
8
8
|
s.summary =
|
@@ -11,5 +11,4 @@ Gem::Specification.new do |s|
|
|
11
11
|
s.license = "MIT"
|
12
12
|
s.files = `git ls-files`.split("\n") - %w[.gitignore]
|
13
13
|
s.executables = `(cd bin 2>&1) > /dev/null && git ls-files .`.split("\n")
|
14
|
-
s.add_dependency "strscan", ">= 3.0.6"
|
15
14
|
end
|
data/lib/censive.rb
CHANGED
@@ -4,25 +4,21 @@
|
|
4
4
|
# censive - A quick and lightweight CSV handling library for Ruby
|
5
5
|
#
|
6
6
|
# Author: Steve Shreeve (steve.shreeve@gmail.com)
|
7
|
-
# Date: Feb
|
7
|
+
# Date: Feb 5, 2023
|
8
8
|
#
|
9
9
|
# https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
|
10
10
|
# https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
|
11
|
-
#
|
12
|
-
#
|
11
|
+
#
|
12
|
+
# Thanks to Sutou Kouhei (kou) for his excellent advice on using scan
|
13
13
|
# ============================================================================
|
14
14
|
# GOALS:
|
15
15
|
# 1. Faster than Ruby's default CSV library
|
16
|
-
# 2. Lightweight code
|
17
|
-
# 3. Support
|
16
|
+
# 2. Lightweight code with streamlined and optimized logic
|
17
|
+
# 3. Support most non-compliant CSV variations (eg - @excel, @relax, @strip)
|
18
18
|
#
|
19
|
-
# TODO:
|
20
|
-
# 1. Support IO streaming
|
21
|
-
# 2. Add option to strip whitespace
|
22
|
-
# 3. Support CSV headers in first row
|
19
|
+
# TODO: Support IO streaming
|
23
20
|
# ============================================================================
|
24
21
|
|
25
|
-
require "bundler/setup"
|
26
22
|
require "strscan"
|
27
23
|
|
28
24
|
class Censive < StringScanner
|
@@ -36,83 +32,71 @@ class Censive < StringScanner
|
|
36
32
|
end
|
37
33
|
|
38
34
|
def initialize(str=nil,
|
39
|
-
drop:
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
sep:
|
47
|
-
|
35
|
+
drop: false , # drop trailing empty fields?
|
36
|
+
excel: false , # literals ="01" formulas =A1 + B2 http://bit.ly/3Y7jIvc
|
37
|
+
mode: :compact, # export mode: compact or full
|
38
|
+
out: $stdout , # output stream, needs to respond to <<
|
39
|
+
quote: '"' , # quote character
|
40
|
+
relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
|
41
|
+
rowsep: "\n" , # row separator for export
|
42
|
+
sep: "," , # column separator character
|
43
|
+
strip: false , # strip fields when reading
|
44
|
+
**opts # grab bag
|
48
45
|
)
|
49
46
|
super(str || "")
|
50
47
|
reset
|
51
48
|
|
52
|
-
|
53
|
-
@
|
54
|
-
@excel
|
55
|
-
@mode
|
56
|
-
@out
|
57
|
-
@quote
|
58
|
-
@relax
|
59
|
-
@
|
60
|
-
|
61
|
-
@
|
62
|
-
|
63
|
-
|
64
|
-
@
|
65
|
-
@
|
49
|
+
# options
|
50
|
+
@drop = drop
|
51
|
+
@excel = excel
|
52
|
+
@mode = mode
|
53
|
+
@out = out
|
54
|
+
@quote = quote
|
55
|
+
@relax = relax
|
56
|
+
@rowsep = rowsep
|
57
|
+
@sep = sep
|
58
|
+
@strip = strip
|
59
|
+
|
60
|
+
# determined
|
61
|
+
@cr = "\r"
|
62
|
+
@lf = "\n"
|
63
|
+
@es = ""
|
64
|
+
@eq = "="
|
65
|
+
@esc = (@quote * 2)
|
66
|
+
@eol = /#{@cr}#{@lf}?|#{@lf}|\z/o # end of line
|
67
|
+
@eoc = /(?=#{"\\" + @sep}|#{@cr}|#{@lf}|\z)/o # end of cell
|
66
68
|
end
|
67
69
|
|
68
70
|
def reset(str=nil)
|
69
71
|
self.string = str if str
|
70
72
|
super()
|
71
|
-
@char = curr_char
|
72
73
|
@rows = nil
|
73
74
|
@cols = @cells = 0
|
74
75
|
end
|
75
76
|
|
76
77
|
# ==[ Lexer ]==
|
77
78
|
|
78
|
-
# pure ruby versions for debugging
|
79
|
-
# def curr_char; @char = string[pos]; end
|
80
|
-
# def next_char; scan(/./m); @char = string[pos]; end
|
81
|
-
|
82
|
-
def curr_char; @char = currchar; end
|
83
|
-
def next_char; @char = nextchar; end
|
84
|
-
|
85
79
|
def next_token
|
86
|
-
if @excel && @
|
87
|
-
excel = true
|
88
|
-
next_char
|
89
|
-
end
|
80
|
+
excel = true if @excel && scan(@eq)
|
90
81
|
|
91
|
-
if @
|
92
|
-
|
82
|
+
if scan(@quote) # consume quoted cell
|
83
|
+
token = ""
|
93
84
|
while true
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
break if [@sep,@cr,@lf,@es,nil].include?(@char)
|
85
|
+
token << (scan_until(/#{@quote}/o) or bomb "unclosed quote")[0..-2]
|
86
|
+
token << @quote and next if scan(@quote)
|
87
|
+
break if scan(@eoc)
|
98
88
|
@relax or bomb "invalid character after quote"
|
99
|
-
|
100
|
-
|
101
|
-
next_char if @char == @sep
|
102
|
-
match
|
103
|
-
elsif [@sep,@cr,@lf,@es,nil].include?(@char)
|
104
|
-
case @char
|
105
|
-
when @sep then next_char; @es
|
106
|
-
when @cr then next_char == @lf and next_char; nil
|
107
|
-
when @lf then next_char; nil
|
108
|
-
else nil
|
89
|
+
quoted = scan_until(/#{@quote}/o) or bomb "invalid inline quote"
|
90
|
+
token << @quote + quoted + @quote
|
109
91
|
end
|
92
|
+
elsif scan(@sep) then return @es
|
93
|
+
elsif scan(@eol) then return nil
|
110
94
|
else # consume unquoted cell
|
111
|
-
|
112
|
-
|
113
|
-
next_char if curr_char == @sep
|
114
|
-
match
|
95
|
+
token = scan_until(@eoc) or bomb "unexpected character"
|
96
|
+
token.prepend(@eq) if excel
|
115
97
|
end
|
98
|
+
scan(@sep)
|
99
|
+
@strip ? token.strip : token
|
116
100
|
end
|
117
101
|
|
118
102
|
def bomb(msg)
|
@@ -143,7 +127,7 @@ class Censive < StringScanner
|
|
143
127
|
|
144
128
|
# returns 2 (must be quoted and escaped), 1 (must be quoted), 0 (neither)
|
145
129
|
def grok(str)
|
146
|
-
if idx = str.index(/(#{@quote})|#{
|
130
|
+
if idx = str.index(/(#{@quote})|#{"\\"+@sep}|#{@cr}|#{@lf}/o)
|
147
131
|
$1 ? 2 : str.index(/#{@quote}/o, idx) ? 2 : 1
|
148
132
|
else
|
149
133
|
0
|
@@ -164,7 +148,7 @@ class Censive < StringScanner
|
|
164
148
|
row
|
165
149
|
when 1
|
166
150
|
row.map do |col|
|
167
|
-
col.match?(/#{
|
151
|
+
col.match?(/#{"\\"+@sep}|#{@cr}|#{@lf}/o) ? "#{q}#{col}#{q}" : col
|
168
152
|
end
|
169
153
|
else
|
170
154
|
row.map do |col|
|
@@ -186,7 +170,7 @@ class Censive < StringScanner
|
|
186
170
|
end
|
187
171
|
end.join(s)
|
188
172
|
|
189
|
-
@out << out + @
|
173
|
+
@out << out + @rowsep
|
190
174
|
end
|
191
175
|
|
192
176
|
def each
|
metadata
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: censive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.15'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Steve Shreeve
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-02-
|
12
|
-
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: strscan
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: 3.0.6
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ">="
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: 3.0.6
|
11
|
+
date: 2023-02-05 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
27
13
|
description: A quick and lightweight CSV handling library for Ruby
|
28
14
|
email: steve.shreeve@gmail.com
|
29
15
|
executables: []
|