censive 0.14 → 0.16
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/censive.gemspec +1 -2
- data/lib/censive.rb +49 -57
- metadata +2 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7abd1490426ab80b6b1cc3c7b28a590eb22c5dd565405662dd82bd1c427b013b
|
4
|
+
data.tar.gz: adb90312a493b92ccb36e7354153a30f530f33d2b205fd6ebbea49ca1e80efe5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f603db91e71cce8d72322d9a248f81f625238aa2a7d5107a6849d18ecdb00cf94865f2ad716afde1df79e4d0dd126765968822e3e9d53f3bea09886f2ae08d89
|
7
|
+
data.tar.gz: b5fc1fa9b2a309677091334d50200ada240ea2490d23d427cc32774a828a672ce1be1c22e156d21a7ef0ebdfc604f7b003386f46d4bc7fb2b6fdb50dc6d2a857
|
data/censive.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = "censive"
|
5
|
-
s.version = "0.
|
5
|
+
s.version = "0.16"
|
6
6
|
s.author = "Steve Shreeve"
|
7
7
|
s.email = "steve.shreeve@gmail.com"
|
8
8
|
s.summary =
|
@@ -11,5 +11,4 @@ Gem::Specification.new do |s|
|
|
11
11
|
s.license = "MIT"
|
12
12
|
s.files = `git ls-files`.split("\n") - %w[.gitignore]
|
13
13
|
s.executables = `(cd bin 2>&1) > /dev/null && git ls-files .`.split("\n")
|
14
|
-
s.add_dependency "strscan", ">= 3.0.6"
|
15
14
|
end
|
data/lib/censive.rb
CHANGED
@@ -4,12 +4,12 @@
|
|
4
4
|
# censive - A quick and lightweight CSV handling library for Ruby
|
5
5
|
#
|
6
6
|
# Author: Steve Shreeve (steve.shreeve@gmail.com)
|
7
|
-
# Date: Feb
|
7
|
+
# Date: Feb 5, 2023
|
8
8
|
#
|
9
9
|
# https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
|
10
10
|
# https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
|
11
|
-
#
|
12
|
-
#
|
11
|
+
#
|
12
|
+
# Thanks to Sutou Kouhei (kou) for his excellent advice on using scan
|
13
13
|
# ============================================================================
|
14
14
|
# GOALS:
|
15
15
|
# 1. Faster than Ruby's default CSV library
|
@@ -19,7 +19,6 @@
|
|
19
19
|
# TODO: Support IO streaming
|
20
20
|
# ============================================================================
|
21
21
|
|
22
|
-
require "bundler/setup"
|
23
22
|
require "strscan"
|
24
23
|
|
25
24
|
class Censive < StringScanner
|
@@ -33,41 +32,44 @@ class Censive < StringScanner
|
|
33
32
|
end
|
34
33
|
|
35
34
|
def initialize(str=nil,
|
36
|
-
drop:
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
sep:
|
44
|
-
strip:
|
45
|
-
**opts
|
35
|
+
drop: false , # drop trailing empty fields?
|
36
|
+
excel: false , # literals ="01" formulas =A1 + B2 http://bit.ly/3Y7jIvc
|
37
|
+
mode: :compact, # export mode: compact or full
|
38
|
+
out: $stdout , # output stream, needs to respond to <<
|
39
|
+
quote: '"' , # quote character
|
40
|
+
relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
|
41
|
+
rowsep: "\n" , # row separator for export
|
42
|
+
sep: "," , # column separator character
|
43
|
+
strip: false , # strip fields when reading
|
44
|
+
**opts # grab bag
|
46
45
|
)
|
47
46
|
super(str || "")
|
48
47
|
reset
|
49
48
|
|
50
|
-
|
51
|
-
@
|
52
|
-
@excel
|
53
|
-
@mode
|
54
|
-
@out
|
55
|
-
@quote
|
56
|
-
@relax
|
57
|
-
@
|
58
|
-
@
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
@
|
63
|
-
@
|
64
|
-
@
|
49
|
+
# options
|
50
|
+
@drop = drop
|
51
|
+
@excel = excel
|
52
|
+
@mode = mode
|
53
|
+
@out = out
|
54
|
+
@quote = quote
|
55
|
+
@relax = relax
|
56
|
+
@rowsep = rowsep
|
57
|
+
@sep = sep
|
58
|
+
@strip = strip
|
59
|
+
|
60
|
+
# determined
|
61
|
+
@cr = "\r"
|
62
|
+
@lf = "\n"
|
63
|
+
@es = ""
|
64
|
+
@eq = "="
|
65
|
+
@esc = (@quote * 2)
|
66
|
+
@eol = /#{@cr}#{@lf}?|#{@lf}|\z/o # end of line
|
67
|
+
@eoc = /(?=#{"\\" + @sep}|#{@cr}|#{@lf}|\z)/o # end of cell
|
65
68
|
end
|
66
69
|
|
67
70
|
def reset(str=nil)
|
68
71
|
self.string = str if str
|
69
72
|
super()
|
70
|
-
@char = currchar
|
71
73
|
@rows = nil
|
72
74
|
@cols = @cells = 0
|
73
75
|
end
|
@@ -75,36 +77,25 @@ class Censive < StringScanner
|
|
75
77
|
# ==[ Lexer ]==
|
76
78
|
|
77
79
|
def next_token
|
78
|
-
if @excel && @
|
79
|
-
excel = true
|
80
|
-
@char = nextchar
|
81
|
-
end
|
80
|
+
excel = true if @excel && scan(@eq)
|
82
81
|
|
83
|
-
if @
|
82
|
+
if scan(@quote) # consume quoted cell
|
84
83
|
token = ""
|
85
84
|
while true
|
86
|
-
@
|
87
|
-
token <<
|
88
|
-
|
89
|
-
break if [@sep,@cr,@lf,@es,nil].include?(@char)
|
85
|
+
token << (scan_until(/#{@quote}/o) or bomb "unclosed quote")[0..-2]
|
86
|
+
token << @quote and next if scan(@quote)
|
87
|
+
break if scan(@eoc)
|
90
88
|
@relax or bomb "invalid character after quote"
|
91
|
-
token << @quote + scan_until(
|
92
|
-
end
|
93
|
-
@char = nextchar if @char == @sep
|
94
|
-
@strip ? token.strip : token
|
95
|
-
elsif [@sep,@cr,@lf,@es,nil].include?(@char)
|
96
|
-
case @char
|
97
|
-
when @sep then @char = nextchar ; @es
|
98
|
-
when @cr then (@char = nextchar) == @lf and @char = nextchar; nil
|
99
|
-
when @lf then @char = nextchar ; nil
|
100
|
-
else nil
|
89
|
+
token << @quote + (scan_until(/#{@quote}/o) or bomb "bad inline quote")
|
101
90
|
end
|
91
|
+
elsif scan(@sep) then return @es
|
92
|
+
elsif scan(@eol) then return nil
|
102
93
|
else # consume unquoted cell
|
103
|
-
token = scan_until(
|
94
|
+
token = scan_until(@eoc) or bomb "unexpected character"
|
104
95
|
token.prepend(@eq) if excel
|
105
|
-
@char = nextchar if (@char = currchar) == @sep
|
106
|
-
@strip ? token.strip : token
|
107
96
|
end
|
97
|
+
scan(@sep)
|
98
|
+
@strip ? token.strip : token
|
108
99
|
end
|
109
100
|
|
110
101
|
def bomb(msg)
|
@@ -178,7 +169,7 @@ class Censive < StringScanner
|
|
178
169
|
end
|
179
170
|
end.join(s)
|
180
171
|
|
181
|
-
@out << out + @
|
172
|
+
@out << out + @rowsep
|
182
173
|
end
|
183
174
|
|
184
175
|
def each
|
@@ -201,14 +192,15 @@ class Censive < StringScanner
|
|
201
192
|
end
|
202
193
|
|
203
194
|
if __FILE__ == $0
|
204
|
-
raw = DATA.
|
205
|
-
# raw = DATA.gets("\n\n").chomp
|
195
|
+
raw = DATA.gets("\n\n").chomp
|
206
196
|
# raw = File.read(ARGV.first || "lc-2023.csv")
|
207
|
-
csv = Censive.new(raw, excel: true, relax: true
|
208
|
-
csv.export(sep: "
|
197
|
+
csv = Censive.new(raw, excel: true, relax: true)
|
198
|
+
csv.export # (sep: ",", excel: true)
|
209
199
|
end
|
210
200
|
|
211
201
|
__END__
|
202
|
+
123,"CHO, JOELLE "JOJO"",456
|
203
|
+
|
212
204
|
Name,Age,Shoe
|
213
205
|
Alice,27,5
|
214
206
|
Bob,33,10 1/2
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: censive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.16'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Steve Shreeve
|
@@ -9,21 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
date: 2023-02-05 00:00:00.000000000 Z
|
12
|
-
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: strscan
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: 3.0.6
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ">="
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: 3.0.6
|
12
|
+
dependencies: []
|
27
13
|
description: A quick and lightweight CSV handling library for Ruby
|
28
14
|
email: steve.shreeve@gmail.com
|
29
15
|
executables: []
|