censive 0.14 → 0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/censive.gemspec +1 -2
- data/lib/censive.rb +48 -57
- metadata +2 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4211eb0036bcbadf9d1683f10084bb8cbb109d26845d6990778e159b634b8d00
|
4
|
+
data.tar.gz: 1f97a3e5343958a86d54b7f1f87ada5d05a8f091d597253c3079c1a15eeb4dfa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f3b8e0ee3cf2d8eadbb26d6be5b590418a844561062779f47eec0863b87cfbc65868a8c89624865eb74ee7f6825ee3fb1d2d26611353a30f805d40d567c1f1ac
|
7
|
+
data.tar.gz: 949e07f3f2208cbecc868a85c7bde5e67f38bf570ae1708496fd7745703c3fc133e8d48fc03c5c18c85264c0bff5b394d5ce2c2b2894daa8b7ca9033f9d68828
|
data/censive.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = "censive"
|
5
|
-
s.version = "0.
|
5
|
+
s.version = "0.15"
|
6
6
|
s.author = "Steve Shreeve"
|
7
7
|
s.email = "steve.shreeve@gmail.com"
|
8
8
|
s.summary =
|
@@ -11,5 +11,4 @@ Gem::Specification.new do |s|
|
|
11
11
|
s.license = "MIT"
|
12
12
|
s.files = `git ls-files`.split("\n") - %w[.gitignore]
|
13
13
|
s.executables = `(cd bin 2>&1) > /dev/null && git ls-files .`.split("\n")
|
14
|
-
s.add_dependency "strscan", ">= 3.0.6"
|
15
14
|
end
|
data/lib/censive.rb
CHANGED
@@ -4,12 +4,12 @@
|
|
4
4
|
# censive - A quick and lightweight CSV handling library for Ruby
|
5
5
|
#
|
6
6
|
# Author: Steve Shreeve (steve.shreeve@gmail.com)
|
7
|
-
# Date: Feb
|
7
|
+
# Date: Feb 5, 2023
|
8
8
|
#
|
9
9
|
# https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
|
10
10
|
# https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
|
11
|
-
#
|
12
|
-
#
|
11
|
+
#
|
12
|
+
# Thanks to Sutou Kouhei (kou) for his excellent advice on using scan
|
13
13
|
# ============================================================================
|
14
14
|
# GOALS:
|
15
15
|
# 1. Faster than Ruby's default CSV library
|
@@ -19,7 +19,6 @@
|
|
19
19
|
# TODO: Support IO streaming
|
20
20
|
# ============================================================================
|
21
21
|
|
22
|
-
require "bundler/setup"
|
23
22
|
require "strscan"
|
24
23
|
|
25
24
|
class Censive < StringScanner
|
@@ -33,41 +32,44 @@ class Censive < StringScanner
|
|
33
32
|
end
|
34
33
|
|
35
34
|
def initialize(str=nil,
|
36
|
-
drop:
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
sep:
|
44
|
-
strip:
|
45
|
-
**opts
|
35
|
+
drop: false , # drop trailing empty fields?
|
36
|
+
excel: false , # literals ="01" formulas =A1 + B2 http://bit.ly/3Y7jIvc
|
37
|
+
mode: :compact, # export mode: compact or full
|
38
|
+
out: $stdout , # output stream, needs to respond to <<
|
39
|
+
quote: '"' , # quote character
|
40
|
+
relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
|
41
|
+
rowsep: "\n" , # row separator for export
|
42
|
+
sep: "," , # column separator character
|
43
|
+
strip: false , # strip fields when reading
|
44
|
+
**opts # grab bag
|
46
45
|
)
|
47
46
|
super(str || "")
|
48
47
|
reset
|
49
48
|
|
50
|
-
|
51
|
-
@
|
52
|
-
@excel
|
53
|
-
@mode
|
54
|
-
@out
|
55
|
-
@quote
|
56
|
-
@relax
|
57
|
-
@
|
58
|
-
@
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
@
|
63
|
-
@
|
64
|
-
@
|
49
|
+
# options
|
50
|
+
@drop = drop
|
51
|
+
@excel = excel
|
52
|
+
@mode = mode
|
53
|
+
@out = out
|
54
|
+
@quote = quote
|
55
|
+
@relax = relax
|
56
|
+
@rowsep = rowsep
|
57
|
+
@sep = sep
|
58
|
+
@strip = strip
|
59
|
+
|
60
|
+
# determined
|
61
|
+
@cr = "\r"
|
62
|
+
@lf = "\n"
|
63
|
+
@es = ""
|
64
|
+
@eq = "="
|
65
|
+
@esc = (@quote * 2)
|
66
|
+
@eol = /#{@cr}#{@lf}?|#{@lf}|\z/o # end of line
|
67
|
+
@eoc = /(?=#{"\\" + @sep}|#{@cr}|#{@lf}|\z)/o # end of cell
|
65
68
|
end
|
66
69
|
|
67
70
|
def reset(str=nil)
|
68
71
|
self.string = str if str
|
69
72
|
super()
|
70
|
-
@char = currchar
|
71
73
|
@rows = nil
|
72
74
|
@cols = @cells = 0
|
73
75
|
end
|
@@ -75,36 +77,26 @@ class Censive < StringScanner
|
|
75
77
|
# ==[ Lexer ]==
|
76
78
|
|
77
79
|
def next_token
|
78
|
-
if @excel && @
|
79
|
-
excel = true
|
80
|
-
@char = nextchar
|
81
|
-
end
|
80
|
+
excel = true if @excel && scan(@eq)
|
82
81
|
|
83
|
-
if @
|
82
|
+
if scan(@quote) # consume quoted cell
|
84
83
|
token = ""
|
85
84
|
while true
|
86
|
-
@
|
87
|
-
token <<
|
88
|
-
|
89
|
-
break if [@sep,@cr,@lf,@es,nil].include?(@char)
|
85
|
+
token << (scan_until(/#{@quote}/o) or bomb "unclosed quote")[0..-2]
|
86
|
+
token << @quote and next if scan(@quote)
|
87
|
+
break if scan(@eoc)
|
90
88
|
@relax or bomb "invalid character after quote"
|
91
|
-
|
92
|
-
|
93
|
-
@char = nextchar if @char == @sep
|
94
|
-
@strip ? token.strip : token
|
95
|
-
elsif [@sep,@cr,@lf,@es,nil].include?(@char)
|
96
|
-
case @char
|
97
|
-
when @sep then @char = nextchar ; @es
|
98
|
-
when @cr then (@char = nextchar) == @lf and @char = nextchar; nil
|
99
|
-
when @lf then @char = nextchar ; nil
|
100
|
-
else nil
|
89
|
+
quoted = scan_until(/#{@quote}/o) or bomb "invalid inline quote"
|
90
|
+
token << @quote + quoted + @quote
|
101
91
|
end
|
92
|
+
elsif scan(@sep) then return @es
|
93
|
+
elsif scan(@eol) then return nil
|
102
94
|
else # consume unquoted cell
|
103
|
-
token = scan_until(
|
95
|
+
token = scan_until(@eoc) or bomb "unexpected character"
|
104
96
|
token.prepend(@eq) if excel
|
105
|
-
@char = nextchar if (@char = currchar) == @sep
|
106
|
-
@strip ? token.strip : token
|
107
97
|
end
|
98
|
+
scan(@sep)
|
99
|
+
@strip ? token.strip : token
|
108
100
|
end
|
109
101
|
|
110
102
|
def bomb(msg)
|
@@ -178,7 +170,7 @@ class Censive < StringScanner
|
|
178
170
|
end
|
179
171
|
end.join(s)
|
180
172
|
|
181
|
-
@out << out + @
|
173
|
+
@out << out + @rowsep
|
182
174
|
end
|
183
175
|
|
184
176
|
def each
|
@@ -201,11 +193,10 @@ class Censive < StringScanner
|
|
201
193
|
end
|
202
194
|
|
203
195
|
if __FILE__ == $0
|
204
|
-
raw = DATA.
|
205
|
-
# raw = DATA.gets("\n\n").chomp
|
196
|
+
raw = DATA.gets("\n\n").chomp
|
206
197
|
# raw = File.read(ARGV.first || "lc-2023.csv")
|
207
|
-
csv = Censive.new(raw, excel: true, relax: true
|
208
|
-
csv.export(sep: "
|
198
|
+
csv = Censive.new(raw, excel: true, relax: true)
|
199
|
+
csv.export # (sep: ",", excel: true)
|
209
200
|
end
|
210
201
|
|
211
202
|
__END__
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: censive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.15'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Steve Shreeve
|
@@ -9,21 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
date: 2023-02-05 00:00:00.000000000 Z
|
12
|
-
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: strscan
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: 3.0.6
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ">="
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: 3.0.6
|
12
|
+
dependencies: []
|
27
13
|
description: A quick and lightweight CSV handling library for Ruby
|
28
14
|
email: steve.shreeve@gmail.com
|
29
15
|
executables: []
|