censive 0.14 → 0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/censive.gemspec +1 -2
- data/lib/censive.rb +49 -57
- metadata +2 -16
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7abd1490426ab80b6b1cc3c7b28a590eb22c5dd565405662dd82bd1c427b013b
|
|
4
|
+
data.tar.gz: adb90312a493b92ccb36e7354153a30f530f33d2b205fd6ebbea49ca1e80efe5
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f603db91e71cce8d72322d9a248f81f625238aa2a7d5107a6849d18ecdb00cf94865f2ad716afde1df79e4d0dd126765968822e3e9d53f3bea09886f2ae08d89
|
|
7
|
+
data.tar.gz: b5fc1fa9b2a309677091334d50200ada240ea2490d23d427cc32774a828a672ce1be1c22e156d21a7ef0ebdfc604f7b003386f46d4bc7fb2b6fdb50dc6d2a857
|
data/censive.gemspec
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
Gem::Specification.new do |s|
|
|
4
4
|
s.name = "censive"
|
|
5
|
-
s.version = "0.
|
|
5
|
+
s.version = "0.16"
|
|
6
6
|
s.author = "Steve Shreeve"
|
|
7
7
|
s.email = "steve.shreeve@gmail.com"
|
|
8
8
|
s.summary =
|
|
@@ -11,5 +11,4 @@ Gem::Specification.new do |s|
|
|
|
11
11
|
s.license = "MIT"
|
|
12
12
|
s.files = `git ls-files`.split("\n") - %w[.gitignore]
|
|
13
13
|
s.executables = `(cd bin 2>&1) > /dev/null && git ls-files .`.split("\n")
|
|
14
|
-
s.add_dependency "strscan", ">= 3.0.6"
|
|
15
14
|
end
|
data/lib/censive.rb
CHANGED
|
@@ -4,12 +4,12 @@
|
|
|
4
4
|
# censive - A quick and lightweight CSV handling library for Ruby
|
|
5
5
|
#
|
|
6
6
|
# Author: Steve Shreeve (steve.shreeve@gmail.com)
|
|
7
|
-
# Date: Feb
|
|
7
|
+
# Date: Feb 5, 2023
|
|
8
8
|
#
|
|
9
9
|
# https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
|
|
10
10
|
# https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
|
|
11
|
-
#
|
|
12
|
-
#
|
|
11
|
+
#
|
|
12
|
+
# Thanks to Sutou Kouhei (kou) for his excellent advice on using scan
|
|
13
13
|
# ============================================================================
|
|
14
14
|
# GOALS:
|
|
15
15
|
# 1. Faster than Ruby's default CSV library
|
|
@@ -19,7 +19,6 @@
|
|
|
19
19
|
# TODO: Support IO streaming
|
|
20
20
|
# ============================================================================
|
|
21
21
|
|
|
22
|
-
require "bundler/setup"
|
|
23
22
|
require "strscan"
|
|
24
23
|
|
|
25
24
|
class Censive < StringScanner
|
|
@@ -33,41 +32,44 @@ class Censive < StringScanner
|
|
|
33
32
|
end
|
|
34
33
|
|
|
35
34
|
def initialize(str=nil,
|
|
36
|
-
drop:
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
sep:
|
|
44
|
-
strip:
|
|
45
|
-
**opts
|
|
35
|
+
drop: false , # drop trailing empty fields?
|
|
36
|
+
excel: false , # literals ="01" formulas =A1 + B2 http://bit.ly/3Y7jIvc
|
|
37
|
+
mode: :compact, # export mode: compact or full
|
|
38
|
+
out: $stdout , # output stream, needs to respond to <<
|
|
39
|
+
quote: '"' , # quote character
|
|
40
|
+
relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
|
|
41
|
+
rowsep: "\n" , # row separator for export
|
|
42
|
+
sep: "," , # column separator character
|
|
43
|
+
strip: false , # strip fields when reading
|
|
44
|
+
**opts # grab bag
|
|
46
45
|
)
|
|
47
46
|
super(str || "")
|
|
48
47
|
reset
|
|
49
48
|
|
|
50
|
-
|
|
51
|
-
@
|
|
52
|
-
@excel
|
|
53
|
-
@mode
|
|
54
|
-
@out
|
|
55
|
-
@quote
|
|
56
|
-
@relax
|
|
57
|
-
@
|
|
58
|
-
@
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
@
|
|
63
|
-
@
|
|
64
|
-
@
|
|
49
|
+
# options
|
|
50
|
+
@drop = drop
|
|
51
|
+
@excel = excel
|
|
52
|
+
@mode = mode
|
|
53
|
+
@out = out
|
|
54
|
+
@quote = quote
|
|
55
|
+
@relax = relax
|
|
56
|
+
@rowsep = rowsep
|
|
57
|
+
@sep = sep
|
|
58
|
+
@strip = strip
|
|
59
|
+
|
|
60
|
+
# determined
|
|
61
|
+
@cr = "\r"
|
|
62
|
+
@lf = "\n"
|
|
63
|
+
@es = ""
|
|
64
|
+
@eq = "="
|
|
65
|
+
@esc = (@quote * 2)
|
|
66
|
+
@eol = /#{@cr}#{@lf}?|#{@lf}|\z/o # end of line
|
|
67
|
+
@eoc = /(?=#{"\\" + @sep}|#{@cr}|#{@lf}|\z)/o # end of cell
|
|
65
68
|
end
|
|
66
69
|
|
|
67
70
|
def reset(str=nil)
|
|
68
71
|
self.string = str if str
|
|
69
72
|
super()
|
|
70
|
-
@char = currchar
|
|
71
73
|
@rows = nil
|
|
72
74
|
@cols = @cells = 0
|
|
73
75
|
end
|
|
@@ -75,36 +77,25 @@ class Censive < StringScanner
|
|
|
75
77
|
# ==[ Lexer ]==
|
|
76
78
|
|
|
77
79
|
def next_token
|
|
78
|
-
if @excel && @
|
|
79
|
-
excel = true
|
|
80
|
-
@char = nextchar
|
|
81
|
-
end
|
|
80
|
+
excel = true if @excel && scan(@eq)
|
|
82
81
|
|
|
83
|
-
if @
|
|
82
|
+
if scan(@quote) # consume quoted cell
|
|
84
83
|
token = ""
|
|
85
84
|
while true
|
|
86
|
-
@
|
|
87
|
-
token <<
|
|
88
|
-
|
|
89
|
-
break if [@sep,@cr,@lf,@es,nil].include?(@char)
|
|
85
|
+
token << (scan_until(/#{@quote}/o) or bomb "unclosed quote")[0..-2]
|
|
86
|
+
token << @quote and next if scan(@quote)
|
|
87
|
+
break if scan(@eoc)
|
|
90
88
|
@relax or bomb "invalid character after quote"
|
|
91
|
-
token << @quote + scan_until(
|
|
92
|
-
end
|
|
93
|
-
@char = nextchar if @char == @sep
|
|
94
|
-
@strip ? token.strip : token
|
|
95
|
-
elsif [@sep,@cr,@lf,@es,nil].include?(@char)
|
|
96
|
-
case @char
|
|
97
|
-
when @sep then @char = nextchar ; @es
|
|
98
|
-
when @cr then (@char = nextchar) == @lf and @char = nextchar; nil
|
|
99
|
-
when @lf then @char = nextchar ; nil
|
|
100
|
-
else nil
|
|
89
|
+
token << @quote + (scan_until(/#{@quote}/o) or bomb "bad inline quote")
|
|
101
90
|
end
|
|
91
|
+
elsif scan(@sep) then return @es
|
|
92
|
+
elsif scan(@eol) then return nil
|
|
102
93
|
else # consume unquoted cell
|
|
103
|
-
token = scan_until(
|
|
94
|
+
token = scan_until(@eoc) or bomb "unexpected character"
|
|
104
95
|
token.prepend(@eq) if excel
|
|
105
|
-
@char = nextchar if (@char = currchar) == @sep
|
|
106
|
-
@strip ? token.strip : token
|
|
107
96
|
end
|
|
97
|
+
scan(@sep)
|
|
98
|
+
@strip ? token.strip : token
|
|
108
99
|
end
|
|
109
100
|
|
|
110
101
|
def bomb(msg)
|
|
@@ -178,7 +169,7 @@ class Censive < StringScanner
|
|
|
178
169
|
end
|
|
179
170
|
end.join(s)
|
|
180
171
|
|
|
181
|
-
@out << out + @
|
|
172
|
+
@out << out + @rowsep
|
|
182
173
|
end
|
|
183
174
|
|
|
184
175
|
def each
|
|
@@ -201,14 +192,15 @@ class Censive < StringScanner
|
|
|
201
192
|
end
|
|
202
193
|
|
|
203
194
|
if __FILE__ == $0
|
|
204
|
-
raw = DATA.
|
|
205
|
-
# raw = DATA.gets("\n\n").chomp
|
|
195
|
+
raw = DATA.gets("\n\n").chomp
|
|
206
196
|
# raw = File.read(ARGV.first || "lc-2023.csv")
|
|
207
|
-
csv = Censive.new(raw, excel: true, relax: true
|
|
208
|
-
csv.export(sep: "
|
|
197
|
+
csv = Censive.new(raw, excel: true, relax: true)
|
|
198
|
+
csv.export # (sep: ",", excel: true)
|
|
209
199
|
end
|
|
210
200
|
|
|
211
201
|
__END__
|
|
202
|
+
123,"CHO, JOELLE "JOJO"",456
|
|
203
|
+
|
|
212
204
|
Name,Age,Shoe
|
|
213
205
|
Alice,27,5
|
|
214
206
|
Bob,33,10 1/2
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: censive
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: '0.
|
|
4
|
+
version: '0.16'
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Steve Shreeve
|
|
@@ -9,21 +9,7 @@ autorequire:
|
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
11
|
date: 2023-02-05 00:00:00.000000000 Z
|
|
12
|
-
dependencies:
|
|
13
|
-
- !ruby/object:Gem::Dependency
|
|
14
|
-
name: strscan
|
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
|
16
|
-
requirements:
|
|
17
|
-
- - ">="
|
|
18
|
-
- !ruby/object:Gem::Version
|
|
19
|
-
version: 3.0.6
|
|
20
|
-
type: :runtime
|
|
21
|
-
prerelease: false
|
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
-
requirements:
|
|
24
|
-
- - ">="
|
|
25
|
-
- !ruby/object:Gem::Version
|
|
26
|
-
version: 3.0.6
|
|
12
|
+
dependencies: []
|
|
27
13
|
description: A quick and lightweight CSV handling library for Ruby
|
|
28
14
|
email: steve.shreeve@gmail.com
|
|
29
15
|
executables: []
|