censive 0.2 → 0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +16 -0
- data/censive.gemspec +1 -1
- data/lib/censive.rb +87 -15
- data/test/a-uses-tabs-and-single-quotes-and-no-trailing-newline.csv +3 -0
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 99c4cd0527e02e78f1fc12c95eda569b23849940d105a6e79442e7eb56623384
|
4
|
+
data.tar.gz: 173aa495f225aa40c1a75686eea8f98e91828cf03e92ab1fee43aba92c27ccfe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 10010211f2d2f50f075b6ffff16517bea7a9c821cfb9143ea38ca6cce4323a9aa5c59529f84d96f9ed7077aa1fd93418312fa084774f7f34027fd436e7dc9f49
|
7
|
+
data.tar.gz: 61fbef5d03f87a0f92f0c1456699d2489d9f13a1fc4ae1dbb7f1f7196b1e7f10b2f94429cc8e18d23e7edca458189bdb535da833ab4c1968bcb1ace92d9f409f
|
data/README.md
CHANGED
@@ -1,3 +1,19 @@
|
|
1
1
|
# censive
|
2
2
|
|
3
3
|
A quick and lightweight CVS handling library for Ruby
|
4
|
+
|
5
|
+
## Writing CSV
|
6
|
+
|
7
|
+
```ruby
|
8
|
+
require 'censive'
|
9
|
+
|
10
|
+
# read in a comma-separated csv file
|
11
|
+
data = File.read('data.csv')
|
12
|
+
|
13
|
+
# write out a tab-separated tsv file
|
14
|
+
Censive.writer('out.tsv', sep: "\t") do |out|
|
15
|
+
Censive.new(data).each do |row|
|
16
|
+
out << row
|
17
|
+
end
|
18
|
+
end
|
19
|
+
```
|
data/censive.gemspec
CHANGED
data/lib/censive.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
# ==============================================================================
|
4
|
-
# censive - A quick and lightweight
|
4
|
+
# censive - A quick and lightweight CSV handling library for Ruby
|
5
5
|
#
|
6
6
|
# Author: Steve Shreeve (steve.shreeve@gmail.com)
|
7
7
|
# Date: Jan 30, 2023
|
@@ -10,7 +10,15 @@
|
|
10
10
|
#
|
11
11
|
# 1. Faster than Ruby's default CSV library
|
12
12
|
# 2. Lightweight code base with streamlined method calls
|
13
|
-
#
|
13
|
+
#
|
14
|
+
# To consider:
|
15
|
+
#
|
16
|
+
# 1. Option to support IO streaming
|
17
|
+
# 2. Option to strip whitespace
|
18
|
+
# 3. Option to change output line endings
|
19
|
+
# 4. Option to force quotes in output
|
20
|
+
# 5. Option to allow reading excel CSV (="Text" for cells)
|
21
|
+
# 6. Confirm file encodings such as UTF-8, UTF-16, etc.
|
14
22
|
#
|
15
23
|
# NOTE: Only getch and scan_until advance strscan's position
|
16
24
|
# ==============================================================================
|
@@ -18,22 +26,34 @@
|
|
18
26
|
require 'strscan'
|
19
27
|
|
20
28
|
class Censive < StringScanner
|
21
|
-
def initialize(string, sep: ',', quote: '"')
|
22
|
-
super(string)
|
23
|
-
reset
|
24
29
|
|
30
|
+
def self.writer(path, **opts)
|
31
|
+
File.open(path, 'w') do |file|
|
32
|
+
yield new(out: file, **opts)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def initialize(str=nil, sep: ',', quote: '"', out: nil, mode: :compact)
|
37
|
+
super(str || '')
|
38
|
+
reset
|
25
39
|
@sep = sep .freeze
|
26
40
|
@quote = quote.freeze
|
27
|
-
|
28
41
|
@es = "" .freeze
|
29
42
|
@cr = "\r" .freeze
|
30
43
|
@lf = "\n" .freeze
|
44
|
+
@out = out
|
45
|
+
@esc = (@quote * 2).freeze
|
46
|
+
@mode = mode
|
31
47
|
end
|
32
48
|
|
33
|
-
def reset
|
34
|
-
|
49
|
+
def reset(str=nil)
|
50
|
+
self.string = str if str
|
51
|
+
super()
|
35
52
|
@char = string[pos]
|
36
53
|
@flag = nil
|
54
|
+
|
55
|
+
@rows = nil
|
56
|
+
@cols = @cells = 0
|
37
57
|
end
|
38
58
|
|
39
59
|
# ==[ Lexer ]==
|
@@ -58,7 +78,7 @@ class Censive < StringScanner
|
|
58
78
|
getch # consume the quote (optimized by not calling next_char)
|
59
79
|
match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
|
60
80
|
case next_char
|
61
|
-
when @sep then next_char; break
|
81
|
+
when @sep then @flag = @es; next_char; break
|
62
82
|
when @quote then match << @quote
|
63
83
|
when @cr,@lf,nil then break
|
64
84
|
else bomb "unexpected character after quote"
|
@@ -73,20 +93,19 @@ class Censive < StringScanner
|
|
73
93
|
else # consume_unquoted_cell
|
74
94
|
match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
|
75
95
|
@char = string[pos]
|
76
|
-
@char == @sep and next_char
|
96
|
+
@char == @sep and @flag = @es and next_char
|
77
97
|
match
|
78
98
|
end
|
79
99
|
end
|
80
100
|
|
81
101
|
def bomb(msg)
|
82
|
-
abort "
|
102
|
+
abort "#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
|
83
103
|
end
|
84
104
|
|
85
105
|
# ==[ Parser ]==
|
86
106
|
|
87
107
|
def parse
|
88
|
-
@rows
|
89
|
-
@cols = @cells = 0
|
108
|
+
@rows ||= []
|
90
109
|
while row = next_row
|
91
110
|
@rows << row
|
92
111
|
size = row.size
|
@@ -105,6 +124,50 @@ class Censive < StringScanner
|
|
105
124
|
|
106
125
|
# ==[ Helpers ]==
|
107
126
|
|
127
|
+
# grok returns 2 (seps and quotes), 1 (seps only), 0 (neither)
|
128
|
+
def grok(str)
|
129
|
+
if pos = str.index(/(#{@quote})|#{@sep}/o)
|
130
|
+
$1 ? 2 : str.index(/#{@quote}/o, pos) ? 2 : 1
|
131
|
+
else
|
132
|
+
0
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def <<(row)
|
137
|
+
@out or return super
|
138
|
+
|
139
|
+
# most compact export format
|
140
|
+
s,q = @sep, @quote
|
141
|
+
out = case @mode
|
142
|
+
when :compact
|
143
|
+
case grok(row.join)
|
144
|
+
when 0 then row
|
145
|
+
when 1 then row.map {|col| col.include?(s) ? "#{q}#{col}#{q}" : col }
|
146
|
+
else
|
147
|
+
row.map do |col|
|
148
|
+
case grok(col)
|
149
|
+
when 0 then col
|
150
|
+
when 1 then "#{q}#{col}#{q}"
|
151
|
+
else "#{q}#{col.gsub(q, @esc)}#{q}"
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
when :full
|
156
|
+
row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
|
157
|
+
end.join(s)
|
158
|
+
|
159
|
+
#!# TODO: allow an option to remove trailing seps in the output
|
160
|
+
# out.gsub!(/#{s}+\z/,'')
|
161
|
+
|
162
|
+
#!# TODO: allow these line endings to be configurable
|
163
|
+
@out << out + @lf
|
164
|
+
end
|
165
|
+
|
166
|
+
def each
|
167
|
+
@rows ||= parse
|
168
|
+
@rows.each {|row| yield row }
|
169
|
+
end
|
170
|
+
|
108
171
|
def stats
|
109
172
|
wide = string.size.to_s.size
|
110
173
|
puts "%#{wide}d rows" % @rows.size
|
@@ -113,8 +176,17 @@ class Censive < StringScanner
|
|
113
176
|
puts "%#{wide}d bytes" % string.size
|
114
177
|
end
|
115
178
|
end
|
116
|
-
|
117
|
-
#
|
179
|
+
|
180
|
+
# ==[ Playground... ]==
|
181
|
+
|
182
|
+
data = File.read('1.csv')
|
183
|
+
|
184
|
+
Censive.writer('out.csv', sep: ',', quote: "'", mode: :compact) do |out|
|
185
|
+
Censive.new(data).each do |row|
|
186
|
+
out << row
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
118
190
|
#
|
119
191
|
# ARGV << "z.csv" if ARGV.empty?
|
120
192
|
#
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: censive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.4'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Steve Shreeve
|
@@ -20,6 +20,7 @@ files:
|
|
20
20
|
- README.md
|
21
21
|
- censive.gemspec
|
22
22
|
- lib/censive.rb
|
23
|
+
- test/a-uses-tabs-and-single-quotes-and-no-trailing-newline.csv
|
23
24
|
homepage: https://github.com/shreeve/censive
|
24
25
|
licenses:
|
25
26
|
- MIT
|