censive 0.14 → 0.15
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/censive.gemspec +1 -2
- data/lib/censive.rb +48 -57
- metadata +2 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4211eb0036bcbadf9d1683f10084bb8cbb109d26845d6990778e159b634b8d00
|
4
|
+
data.tar.gz: 1f97a3e5343958a86d54b7f1f87ada5d05a8f091d597253c3079c1a15eeb4dfa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f3b8e0ee3cf2d8eadbb26d6be5b590418a844561062779f47eec0863b87cfbc65868a8c89624865eb74ee7f6825ee3fb1d2d26611353a30f805d40d567c1f1ac
|
7
|
+
data.tar.gz: 949e07f3f2208cbecc868a85c7bde5e67f38bf570ae1708496fd7745703c3fc133e8d48fc03c5c18c85264c0bff5b394d5ce2c2b2894daa8b7ca9033f9d68828
|
data/censive.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = "censive"
|
5
|
-
s.version = "0.
|
5
|
+
s.version = "0.15"
|
6
6
|
s.author = "Steve Shreeve"
|
7
7
|
s.email = "steve.shreeve@gmail.com"
|
8
8
|
s.summary =
|
@@ -11,5 +11,4 @@ Gem::Specification.new do |s|
|
|
11
11
|
s.license = "MIT"
|
12
12
|
s.files = `git ls-files`.split("\n") - %w[.gitignore]
|
13
13
|
s.executables = `(cd bin 2>&1) > /dev/null && git ls-files .`.split("\n")
|
14
|
-
s.add_dependency "strscan", ">= 3.0.6"
|
15
14
|
end
|
data/lib/censive.rb
CHANGED
@@ -4,12 +4,12 @@
|
|
4
4
|
# censive - A quick and lightweight CSV handling library for Ruby
|
5
5
|
#
|
6
6
|
# Author: Steve Shreeve (steve.shreeve@gmail.com)
|
7
|
-
# Date: Feb
|
7
|
+
# Date: Feb 5, 2023
|
8
8
|
#
|
9
9
|
# https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
|
10
10
|
# https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
|
11
|
-
#
|
12
|
-
#
|
11
|
+
#
|
12
|
+
# Thanks to Sutou Kouhei (kou) for his excellent advice on using scan
|
13
13
|
# ============================================================================
|
14
14
|
# GOALS:
|
15
15
|
# 1. Faster than Ruby's default CSV library
|
@@ -19,7 +19,6 @@
|
|
19
19
|
# TODO: Support IO streaming
|
20
20
|
# ============================================================================
|
21
21
|
|
22
|
-
require "bundler/setup"
|
23
22
|
require "strscan"
|
24
23
|
|
25
24
|
class Censive < StringScanner
|
@@ -33,41 +32,44 @@ class Censive < StringScanner
|
|
33
32
|
end
|
34
33
|
|
35
34
|
def initialize(str=nil,
|
36
|
-
drop:
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
sep:
|
44
|
-
strip:
|
45
|
-
**opts
|
35
|
+
drop: false , # drop trailing empty fields?
|
36
|
+
excel: false , # literals ="01" formulas =A1 + B2 http://bit.ly/3Y7jIvc
|
37
|
+
mode: :compact, # export mode: compact or full
|
38
|
+
out: $stdout , # output stream, needs to respond to <<
|
39
|
+
quote: '"' , # quote character
|
40
|
+
relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
|
41
|
+
rowsep: "\n" , # row separator for export
|
42
|
+
sep: "," , # column separator character
|
43
|
+
strip: false , # strip fields when reading
|
44
|
+
**opts # grab bag
|
46
45
|
)
|
47
46
|
super(str || "")
|
48
47
|
reset
|
49
48
|
|
50
|
-
|
51
|
-
@
|
52
|
-
@excel
|
53
|
-
@mode
|
54
|
-
@out
|
55
|
-
@quote
|
56
|
-
@relax
|
57
|
-
@
|
58
|
-
@
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
@
|
63
|
-
@
|
64
|
-
@
|
49
|
+
# options
|
50
|
+
@drop = drop
|
51
|
+
@excel = excel
|
52
|
+
@mode = mode
|
53
|
+
@out = out
|
54
|
+
@quote = quote
|
55
|
+
@relax = relax
|
56
|
+
@rowsep = rowsep
|
57
|
+
@sep = sep
|
58
|
+
@strip = strip
|
59
|
+
|
60
|
+
# determined
|
61
|
+
@cr = "\r"
|
62
|
+
@lf = "\n"
|
63
|
+
@es = ""
|
64
|
+
@eq = "="
|
65
|
+
@esc = (@quote * 2)
|
66
|
+
@eol = /#{@cr}#{@lf}?|#{@lf}|\z/o # end of line
|
67
|
+
@eoc = /(?=#{"\\" + @sep}|#{@cr}|#{@lf}|\z)/o # end of cell
|
65
68
|
end
|
66
69
|
|
67
70
|
def reset(str=nil)
|
68
71
|
self.string = str if str
|
69
72
|
super()
|
70
|
-
@char = currchar
|
71
73
|
@rows = nil
|
72
74
|
@cols = @cells = 0
|
73
75
|
end
|
@@ -75,36 +77,26 @@ class Censive < StringScanner
|
|
75
77
|
# ==[ Lexer ]==
|
76
78
|
|
77
79
|
def next_token
|
78
|
-
if @excel && @
|
79
|
-
excel = true
|
80
|
-
@char = nextchar
|
81
|
-
end
|
80
|
+
excel = true if @excel && scan(@eq)
|
82
81
|
|
83
|
-
if @
|
82
|
+
if scan(@quote) # consume quoted cell
|
84
83
|
token = ""
|
85
84
|
while true
|
86
|
-
@
|
87
|
-
token <<
|
88
|
-
|
89
|
-
break if [@sep,@cr,@lf,@es,nil].include?(@char)
|
85
|
+
token << (scan_until(/#{@quote}/o) or bomb "unclosed quote")[0..-2]
|
86
|
+
token << @quote and next if scan(@quote)
|
87
|
+
break if scan(@eoc)
|
90
88
|
@relax or bomb "invalid character after quote"
|
91
|
-
|
92
|
-
|
93
|
-
@char = nextchar if @char == @sep
|
94
|
-
@strip ? token.strip : token
|
95
|
-
elsif [@sep,@cr,@lf,@es,nil].include?(@char)
|
96
|
-
case @char
|
97
|
-
when @sep then @char = nextchar ; @es
|
98
|
-
when @cr then (@char = nextchar) == @lf and @char = nextchar; nil
|
99
|
-
when @lf then @char = nextchar ; nil
|
100
|
-
else nil
|
89
|
+
quoted = scan_until(/#{@quote}/o) or bomb "invalid inline quote"
|
90
|
+
token << @quote + quoted + @quote
|
101
91
|
end
|
92
|
+
elsif scan(@sep) then return @es
|
93
|
+
elsif scan(@eol) then return nil
|
102
94
|
else # consume unquoted cell
|
103
|
-
token = scan_until(
|
95
|
+
token = scan_until(@eoc) or bomb "unexpected character"
|
104
96
|
token.prepend(@eq) if excel
|
105
|
-
@char = nextchar if (@char = currchar) == @sep
|
106
|
-
@strip ? token.strip : token
|
107
97
|
end
|
98
|
+
scan(@sep)
|
99
|
+
@strip ? token.strip : token
|
108
100
|
end
|
109
101
|
|
110
102
|
def bomb(msg)
|
@@ -178,7 +170,7 @@ class Censive < StringScanner
|
|
178
170
|
end
|
179
171
|
end.join(s)
|
180
172
|
|
181
|
-
@out << out + @
|
173
|
+
@out << out + @rowsep
|
182
174
|
end
|
183
175
|
|
184
176
|
def each
|
@@ -201,11 +193,10 @@ class Censive < StringScanner
|
|
201
193
|
end
|
202
194
|
|
203
195
|
if __FILE__ == $0
|
204
|
-
raw = DATA.
|
205
|
-
# raw = DATA.gets("\n\n").chomp
|
196
|
+
raw = DATA.gets("\n\n").chomp
|
206
197
|
# raw = File.read(ARGV.first || "lc-2023.csv")
|
207
|
-
csv = Censive.new(raw, excel: true, relax: true
|
208
|
-
csv.export(sep: "
|
198
|
+
csv = Censive.new(raw, excel: true, relax: true)
|
199
|
+
csv.export # (sep: ",", excel: true)
|
209
200
|
end
|
210
201
|
|
211
202
|
__END__
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: censive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.15'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Steve Shreeve
|
@@ -9,21 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
date: 2023-02-05 00:00:00.000000000 Z
|
12
|
-
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: strscan
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: 3.0.6
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ">="
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: 3.0.6
|
12
|
+
dependencies: []
|
27
13
|
description: A quick and lightweight CSV handling library for Ruby
|
28
14
|
email: steve.shreeve@gmail.com
|
29
15
|
executables: []
|