censive 0.1 → 0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +16 -0
- data/censive.gemspec +1 -1
- data/lib/censive.rb +105 -31
- data/test/a-uses-tabs-and-single-quotes-and-no-trailing-newline.csv +3 -0
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 99c4cd0527e02e78f1fc12c95eda569b23849940d105a6e79442e7eb56623384
|
|
4
|
+
data.tar.gz: 173aa495f225aa40c1a75686eea8f98e91828cf03e92ab1fee43aba92c27ccfe
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 10010211f2d2f50f075b6ffff16517bea7a9c821cfb9143ea38ca6cce4323a9aa5c59529f84d96f9ed7077aa1fd93418312fa084774f7f34027fd436e7dc9f49
|
|
7
|
+
data.tar.gz: 61fbef5d03f87a0f92f0c1456699d2489d9f13a1fc4ae1dbb7f1f7196b1e7f10b2f94429cc8e18d23e7edca458189bdb535da833ab4c1968bcb1ace92d9f409f
|
data/README.md
CHANGED
|
@@ -1,3 +1,19 @@
|
|
|
1
1
|
# censive
|
|
2
2
|
|
|
3
3
|
A quick and lightweight CVS handling library for Ruby
|
|
4
|
+
|
|
5
|
+
## Writing CSV
|
|
6
|
+
|
|
7
|
+
```ruby
|
|
8
|
+
require 'censive'
|
|
9
|
+
|
|
10
|
+
# read in a comma-separated csv file
|
|
11
|
+
data = File.read('data.csv')
|
|
12
|
+
|
|
13
|
+
# write out a tab-separated tsv file
|
|
14
|
+
Censive.writer('out.tsv', sep: "\t") do |out|
|
|
15
|
+
Censive.new(data).each do |row|
|
|
16
|
+
out << row
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
```
|
data/censive.gemspec
CHANGED
data/lib/censive.rb
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
2
|
|
|
3
3
|
# ==============================================================================
|
|
4
|
-
# censive - A quick and lightweight
|
|
4
|
+
# censive - A quick and lightweight CSV handling library for Ruby
|
|
5
5
|
#
|
|
6
6
|
# Author: Steve Shreeve (steve.shreeve@gmail.com)
|
|
7
7
|
# Date: Jan 30, 2023
|
|
@@ -10,7 +10,15 @@
|
|
|
10
10
|
#
|
|
11
11
|
# 1. Faster than Ruby's default CSV library
|
|
12
12
|
# 2. Lightweight code base with streamlined method calls
|
|
13
|
-
#
|
|
13
|
+
#
|
|
14
|
+
# To consider:
|
|
15
|
+
#
|
|
16
|
+
# 1. Option to support IO streaming
|
|
17
|
+
# 2. Option to strip whitespace
|
|
18
|
+
# 3. Option to change output line endings
|
|
19
|
+
# 4. Option to force quotes in output
|
|
20
|
+
# 5. Option to allow reading excel CSV (="Text" for cells)
|
|
21
|
+
# 6. Confirm file encodings such as UTF-8, UTF-16, etc.
|
|
14
22
|
#
|
|
15
23
|
# NOTE: Only getch and scan_until advance strscan's position
|
|
16
24
|
# ==============================================================================
|
|
@@ -18,20 +26,34 @@
|
|
|
18
26
|
require 'strscan'
|
|
19
27
|
|
|
20
28
|
class Censive < StringScanner
|
|
21
|
-
|
|
22
|
-
|
|
29
|
+
|
|
30
|
+
def self.writer(path, **opts)
|
|
31
|
+
File.open(path, 'w') do |file|
|
|
32
|
+
yield new(out: file, **opts)
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def initialize(str=nil, sep: ',', quote: '"', out: nil, mode: :compact)
|
|
37
|
+
super(str || '')
|
|
23
38
|
reset
|
|
24
|
-
@sep =
|
|
25
|
-
@quote =
|
|
26
|
-
@es = ""
|
|
27
|
-
@cr = "\r".freeze
|
|
28
|
-
@lf = "\n".freeze
|
|
39
|
+
@sep = sep .freeze
|
|
40
|
+
@quote = quote.freeze
|
|
41
|
+
@es = "" .freeze
|
|
42
|
+
@cr = "\r" .freeze
|
|
43
|
+
@lf = "\n" .freeze
|
|
44
|
+
@out = out
|
|
45
|
+
@esc = (@quote * 2).freeze
|
|
46
|
+
@mode = mode
|
|
29
47
|
end
|
|
30
48
|
|
|
31
|
-
def reset
|
|
32
|
-
|
|
49
|
+
def reset(str=nil)
|
|
50
|
+
self.string = str if str
|
|
51
|
+
super()
|
|
33
52
|
@char = string[pos]
|
|
34
53
|
@flag = nil
|
|
54
|
+
|
|
55
|
+
@rows = nil
|
|
56
|
+
@cols = @cells = 0
|
|
35
57
|
end
|
|
36
58
|
|
|
37
59
|
# ==[ Lexer ]==
|
|
@@ -56,7 +78,7 @@ class Censive < StringScanner
|
|
|
56
78
|
getch # consume the quote (optimized by not calling next_char)
|
|
57
79
|
match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
|
|
58
80
|
case next_char
|
|
59
|
-
when @sep then next_char; break
|
|
81
|
+
when @sep then @flag = @es; next_char; break
|
|
60
82
|
when @quote then match << @quote
|
|
61
83
|
when @cr,@lf,nil then break
|
|
62
84
|
else bomb "unexpected character after quote"
|
|
@@ -71,20 +93,19 @@ class Censive < StringScanner
|
|
|
71
93
|
else # consume_unquoted_cell
|
|
72
94
|
match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
|
|
73
95
|
@char = string[pos]
|
|
74
|
-
@char == @sep and next_char
|
|
96
|
+
@char == @sep and @flag = @es and next_char
|
|
75
97
|
match
|
|
76
98
|
end
|
|
77
99
|
end
|
|
78
100
|
|
|
79
101
|
def bomb(msg)
|
|
80
|
-
abort "
|
|
102
|
+
abort "#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
|
|
81
103
|
end
|
|
82
104
|
|
|
83
105
|
# ==[ Parser ]==
|
|
84
106
|
|
|
85
107
|
def parse
|
|
86
|
-
@rows
|
|
87
|
-
@cols = @cells = 0
|
|
108
|
+
@rows ||= []
|
|
88
109
|
while row = next_row
|
|
89
110
|
@rows << row
|
|
90
111
|
size = row.size
|
|
@@ -103,6 +124,50 @@ class Censive < StringScanner
|
|
|
103
124
|
|
|
104
125
|
# ==[ Helpers ]==
|
|
105
126
|
|
|
127
|
+
# grok returns 2 (seps and quotes), 1 (seps only), 0 (neither)
|
|
128
|
+
def grok(str)
|
|
129
|
+
if pos = str.index(/(#{@quote})|#{@sep}/o)
|
|
130
|
+
$1 ? 2 : str.index(/#{@quote}/o, pos) ? 2 : 1
|
|
131
|
+
else
|
|
132
|
+
0
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def <<(row)
|
|
137
|
+
@out or return super
|
|
138
|
+
|
|
139
|
+
# most compact export format
|
|
140
|
+
s,q = @sep, @quote
|
|
141
|
+
out = case @mode
|
|
142
|
+
when :compact
|
|
143
|
+
case grok(row.join)
|
|
144
|
+
when 0 then row
|
|
145
|
+
when 1 then row.map {|col| col.include?(s) ? "#{q}#{col}#{q}" : col }
|
|
146
|
+
else
|
|
147
|
+
row.map do |col|
|
|
148
|
+
case grok(col)
|
|
149
|
+
when 0 then col
|
|
150
|
+
when 1 then "#{q}#{col}#{q}"
|
|
151
|
+
else "#{q}#{col.gsub(q, @esc)}#{q}"
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
when :full
|
|
156
|
+
row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
|
|
157
|
+
end.join(s)
|
|
158
|
+
|
|
159
|
+
#!# TODO: allow an option to remove trailing seps in the output
|
|
160
|
+
# out.gsub!(/#{s}+\z/,'')
|
|
161
|
+
|
|
162
|
+
#!# TODO: allow these line endings to be configurable
|
|
163
|
+
@out << out + @lf
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def each
|
|
167
|
+
@rows ||= parse
|
|
168
|
+
@rows.each {|row| yield row }
|
|
169
|
+
end
|
|
170
|
+
|
|
106
171
|
def stats
|
|
107
172
|
wide = string.size.to_s.size
|
|
108
173
|
puts "%#{wide}d rows" % @rows.size
|
|
@@ -112,22 +177,31 @@ class Censive < StringScanner
|
|
|
112
177
|
end
|
|
113
178
|
end
|
|
114
179
|
|
|
115
|
-
# ==[
|
|
180
|
+
# ==[ Playground... ]==
|
|
116
181
|
|
|
117
|
-
|
|
182
|
+
data = File.read('1.csv')
|
|
118
183
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
when 2
|
|
124
|
-
data = DATA.gets("\n\n").rstrip
|
|
184
|
+
Censive.writer('out.csv', sep: ',', quote: "'", mode: :compact) do |out|
|
|
185
|
+
Censive.new(data).each do |row|
|
|
186
|
+
out << row
|
|
187
|
+
end
|
|
125
188
|
end
|
|
126
189
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
190
|
+
#
|
|
191
|
+
# ARGV << "z.csv" if ARGV.empty?
|
|
192
|
+
#
|
|
193
|
+
# case 1
|
|
194
|
+
# when 1
|
|
195
|
+
# path = ARGV.first
|
|
196
|
+
# data = File.read(path)
|
|
197
|
+
# when 2
|
|
198
|
+
# data = DATA.gets("\n\n").rstrip
|
|
199
|
+
# end
|
|
200
|
+
#
|
|
201
|
+
# STDOUT.sync = true
|
|
202
|
+
#
|
|
203
|
+
# csv = Censive.new(data)
|
|
204
|
+
#
|
|
205
|
+
# data.size > 1e6 ? csv.parse : csv.parse.each {|cols| p cols }
|
|
206
|
+
#
|
|
207
|
+
# csv.stats
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: censive
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: '0.
|
|
4
|
+
version: '0.4'
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Steve Shreeve
|
|
@@ -20,6 +20,7 @@ files:
|
|
|
20
20
|
- README.md
|
|
21
21
|
- censive.gemspec
|
|
22
22
|
- lib/censive.rb
|
|
23
|
+
- test/a-uses-tabs-and-single-quotes-and-no-trailing-newline.csv
|
|
23
24
|
homepage: https://github.com/shreeve/censive
|
|
24
25
|
licenses:
|
|
25
26
|
- MIT
|