censive 0.2 → 0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +16 -0
- data/censive.gemspec +1 -1
- data/lib/censive.rb +87 -15
- data/test/a-uses-tabs-and-single-quotes-and-no-trailing-newline.csv +3 -0
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 99c4cd0527e02e78f1fc12c95eda569b23849940d105a6e79442e7eb56623384
|
4
|
+
data.tar.gz: 173aa495f225aa40c1a75686eea8f98e91828cf03e92ab1fee43aba92c27ccfe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 10010211f2d2f50f075b6ffff16517bea7a9c821cfb9143ea38ca6cce4323a9aa5c59529f84d96f9ed7077aa1fd93418312fa084774f7f34027fd436e7dc9f49
|
7
|
+
data.tar.gz: 61fbef5d03f87a0f92f0c1456699d2489d9f13a1fc4ae1dbb7f1f7196b1e7f10b2f94429cc8e18d23e7edca458189bdb535da833ab4c1968bcb1ace92d9f409f
|
data/README.md
CHANGED
@@ -1,3 +1,19 @@
|
|
1
1
|
# censive
|
2
2
|
|
3
3
|
A quick and lightweight CVS handling library for Ruby
|
4
|
+
|
5
|
+
## Writing CSV
|
6
|
+
|
7
|
+
```ruby
|
8
|
+
require 'censive'
|
9
|
+
|
10
|
+
# read in a comma-separated csv file
|
11
|
+
data = File.read('data.csv')
|
12
|
+
|
13
|
+
# write out a tab-separated tsv file
|
14
|
+
Censive.writer('out.tsv', sep: "\t") do |out|
|
15
|
+
Censive.new(data).each do |row|
|
16
|
+
out << row
|
17
|
+
end
|
18
|
+
end
|
19
|
+
```
|
data/censive.gemspec
CHANGED
data/lib/censive.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
# ==============================================================================
|
4
|
-
# censive - A quick and lightweight
|
4
|
+
# censive - A quick and lightweight CSV handling library for Ruby
|
5
5
|
#
|
6
6
|
# Author: Steve Shreeve (steve.shreeve@gmail.com)
|
7
7
|
# Date: Jan 30, 2023
|
@@ -10,7 +10,15 @@
|
|
10
10
|
#
|
11
11
|
# 1. Faster than Ruby's default CSV library
|
12
12
|
# 2. Lightweight code base with streamlined method calls
|
13
|
-
#
|
13
|
+
#
|
14
|
+
# To consider:
|
15
|
+
#
|
16
|
+
# 1. Option to support IO streaming
|
17
|
+
# 2. Option to strip whitespace
|
18
|
+
# 3. Option to change output line endings
|
19
|
+
# 4. Option to force quotes in output
|
20
|
+
# 5. Option to allow reading excel CSV (="Text" for cells)
|
21
|
+
# 6. Confirm file encodings such as UTF-8, UTF-16, etc.
|
14
22
|
#
|
15
23
|
# NOTE: Only getch and scan_until advance strscan's position
|
16
24
|
# ==============================================================================
|
@@ -18,22 +26,34 @@
|
|
18
26
|
require 'strscan'
|
19
27
|
|
20
28
|
class Censive < StringScanner
|
21
|
-
def initialize(string, sep: ',', quote: '"')
|
22
|
-
super(string)
|
23
|
-
reset
|
24
29
|
|
30
|
+
def self.writer(path, **opts)
|
31
|
+
File.open(path, 'w') do |file|
|
32
|
+
yield new(out: file, **opts)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def initialize(str=nil, sep: ',', quote: '"', out: nil, mode: :compact)
|
37
|
+
super(str || '')
|
38
|
+
reset
|
25
39
|
@sep = sep .freeze
|
26
40
|
@quote = quote.freeze
|
27
|
-
|
28
41
|
@es = "" .freeze
|
29
42
|
@cr = "\r" .freeze
|
30
43
|
@lf = "\n" .freeze
|
44
|
+
@out = out
|
45
|
+
@esc = (@quote * 2).freeze
|
46
|
+
@mode = mode
|
31
47
|
end
|
32
48
|
|
33
|
-
def reset
|
34
|
-
|
49
|
+
def reset(str=nil)
|
50
|
+
self.string = str if str
|
51
|
+
super()
|
35
52
|
@char = string[pos]
|
36
53
|
@flag = nil
|
54
|
+
|
55
|
+
@rows = nil
|
56
|
+
@cols = @cells = 0
|
37
57
|
end
|
38
58
|
|
39
59
|
# ==[ Lexer ]==
|
@@ -58,7 +78,7 @@ class Censive < StringScanner
|
|
58
78
|
getch # consume the quote (optimized by not calling next_char)
|
59
79
|
match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
|
60
80
|
case next_char
|
61
|
-
when @sep then next_char; break
|
81
|
+
when @sep then @flag = @es; next_char; break
|
62
82
|
when @quote then match << @quote
|
63
83
|
when @cr,@lf,nil then break
|
64
84
|
else bomb "unexpected character after quote"
|
@@ -73,20 +93,19 @@ class Censive < StringScanner
|
|
73
93
|
else # consume_unquoted_cell
|
74
94
|
match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
|
75
95
|
@char = string[pos]
|
76
|
-
@char == @sep and next_char
|
96
|
+
@char == @sep and @flag = @es and next_char
|
77
97
|
match
|
78
98
|
end
|
79
99
|
end
|
80
100
|
|
81
101
|
def bomb(msg)
|
82
|
-
abort "
|
102
|
+
abort "#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
|
83
103
|
end
|
84
104
|
|
85
105
|
# ==[ Parser ]==
|
86
106
|
|
87
107
|
def parse
|
88
|
-
@rows
|
89
|
-
@cols = @cells = 0
|
108
|
+
@rows ||= []
|
90
109
|
while row = next_row
|
91
110
|
@rows << row
|
92
111
|
size = row.size
|
@@ -105,6 +124,50 @@ class Censive < StringScanner
|
|
105
124
|
|
106
125
|
# ==[ Helpers ]==
|
107
126
|
|
127
|
+
# grok returns 2 (seps and quotes), 1 (seps only), 0 (neither)
|
128
|
+
def grok(str)
|
129
|
+
if pos = str.index(/(#{@quote})|#{@sep}/o)
|
130
|
+
$1 ? 2 : str.index(/#{@quote}/o, pos) ? 2 : 1
|
131
|
+
else
|
132
|
+
0
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def <<(row)
|
137
|
+
@out or return super
|
138
|
+
|
139
|
+
# most compact export format
|
140
|
+
s,q = @sep, @quote
|
141
|
+
out = case @mode
|
142
|
+
when :compact
|
143
|
+
case grok(row.join)
|
144
|
+
when 0 then row
|
145
|
+
when 1 then row.map {|col| col.include?(s) ? "#{q}#{col}#{q}" : col }
|
146
|
+
else
|
147
|
+
row.map do |col|
|
148
|
+
case grok(col)
|
149
|
+
when 0 then col
|
150
|
+
when 1 then "#{q}#{col}#{q}"
|
151
|
+
else "#{q}#{col.gsub(q, @esc)}#{q}"
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
when :full
|
156
|
+
row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
|
157
|
+
end.join(s)
|
158
|
+
|
159
|
+
#!# TODO: allow an option to remove trailing seps in the output
|
160
|
+
# out.gsub!(/#{s}+\z/,'')
|
161
|
+
|
162
|
+
#!# TODO: allow these line endings to be configurable
|
163
|
+
@out << out + @lf
|
164
|
+
end
|
165
|
+
|
166
|
+
def each
|
167
|
+
@rows ||= parse
|
168
|
+
@rows.each {|row| yield row }
|
169
|
+
end
|
170
|
+
|
108
171
|
def stats
|
109
172
|
wide = string.size.to_s.size
|
110
173
|
puts "%#{wide}d rows" % @rows.size
|
@@ -113,8 +176,17 @@ class Censive < StringScanner
|
|
113
176
|
puts "%#{wide}d bytes" % string.size
|
114
177
|
end
|
115
178
|
end
|
116
|
-
|
117
|
-
#
|
179
|
+
|
180
|
+
# ==[ Playground... ]==
|
181
|
+
|
182
|
+
data = File.read('1.csv')
|
183
|
+
|
184
|
+
Censive.writer('out.csv', sep: ',', quote: "'", mode: :compact) do |out|
|
185
|
+
Censive.new(data).each do |row|
|
186
|
+
out << row
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
118
190
|
#
|
119
191
|
# ARGV << "z.csv" if ARGV.empty?
|
120
192
|
#
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: censive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.4'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Steve Shreeve
|
@@ -20,6 +20,7 @@ files:
|
|
20
20
|
- README.md
|
21
21
|
- censive.gemspec
|
22
22
|
- lib/censive.rb
|
23
|
+
- test/a-uses-tabs-and-single-quotes-and-no-trailing-newline.csv
|
23
24
|
homepage: https://github.com/shreeve/censive
|
24
25
|
licenses:
|
25
26
|
- MIT
|