censive 0.1 → 0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 78a04712f2c737b263765b117a0614bddeda6e4c233409827ed900983b061a64
4
- data.tar.gz: e42610ad444e4b4ed9db374f1ee1764c97619fa54235c3bb18e537513804f940
3
+ metadata.gz: 99c4cd0527e02e78f1fc12c95eda569b23849940d105a6e79442e7eb56623384
4
+ data.tar.gz: 173aa495f225aa40c1a75686eea8f98e91828cf03e92ab1fee43aba92c27ccfe
5
5
  SHA512:
6
- metadata.gz: 294e8879052426037bbaed072171ecb8ca0abff47156a4a1d51a3f4531229a6f755ae7db4f8e5dfb7422ad25edf1e145d497bd39d99e46d895cdf6adb4988edc
7
- data.tar.gz: d087b32edd1e467143a1e785bd1aa89d2e096920027185ff2dc07a275317b034521b1143f962774270040ae57b079c3a61fa045d10e79f194312787125527fa0
6
+ metadata.gz: 10010211f2d2f50f075b6ffff16517bea7a9c821cfb9143ea38ca6cce4323a9aa5c59529f84d96f9ed7077aa1fd93418312fa084774f7f34027fd436e7dc9f49
7
+ data.tar.gz: 61fbef5d03f87a0f92f0c1456699d2489d9f13a1fc4ae1dbb7f1f7196b1e7f10b2f94429cc8e18d23e7edca458189bdb535da833ab4c1968bcb1ace92d9f409f
data/README.md CHANGED
@@ -1,3 +1,19 @@
1
1
  # censive
2
2
 
3
3
  A quick and lightweight CVS handling library for Ruby
4
+
5
+ ## Writing CSV
6
+
7
+ ```ruby
8
+ require 'censive'
9
+
10
+ # read in a comma-separated csv file
11
+ data = File.read('data.csv')
12
+
13
+ # write out a tab-separated tsv file
14
+ Censive.writer('out.tsv', sep: "\t") do |out|
15
+ Censive.new(data).each do |row|
16
+ out << row
17
+ end
18
+ end
19
+ ```
data/censive.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "censive"
5
- s.version = "0.1"
5
+ s.version = "0.4"
6
6
  s.author = "Steve Shreeve"
7
7
  s.email = "steve.shreeve@gmail.com"
8
8
  s.summary = "A quick and lightweight CVS handling library for Ruby"
data/lib/censive.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  # ==============================================================================
4
- # censive - A quick and lightweight CVS handling library for Ruby
4
+ # censive - A quick and lightweight CSV handling library for Ruby
5
5
  #
6
6
  # Author: Steve Shreeve (steve.shreeve@gmail.com)
7
7
  # Date: Jan 30, 2023
@@ -10,7 +10,15 @@
10
10
  #
11
11
  # 1. Faster than Ruby's default CSV library
12
12
  # 2. Lightweight code base with streamlined method calls
13
- # 3. Eventually support IO streaming
13
+ #
14
+ # To consider:
15
+ #
16
+ # 1. Option to support IO streaming
17
+ # 2. Option to strip whitespace
18
+ # 3. Option to change output line endings
19
+ # 4. Option to force quotes in output
20
+ # 5. Option to allow reading excel CSV (="Text" for cells)
21
+ # 6. Confirm file encodings such as UTF-8, UTF-16, etc.
14
22
  #
15
23
  # NOTE: Only getch and scan_until advance strscan's position
16
24
  # ==============================================================================
@@ -18,20 +26,34 @@
18
26
  require 'strscan'
19
27
 
20
28
  class Censive < StringScanner
21
- def initialize(...)
22
- super
29
+
30
+ def self.writer(path, **opts)
31
+ File.open(path, 'w') do |file|
32
+ yield new(out: file, **opts)
33
+ end
34
+ end
35
+
36
+ def initialize(str=nil, sep: ',', quote: '"', out: nil, mode: :compact)
37
+ super(str || '')
23
38
  reset
24
- @sep = ',' .freeze # make this a param
25
- @quote = '"' .freeze # make this a param
26
- @es = "" .freeze
27
- @cr = "\r".freeze
28
- @lf = "\n".freeze
39
+ @sep = sep .freeze
40
+ @quote = quote.freeze
41
+ @es = "" .freeze
42
+ @cr = "\r" .freeze
43
+ @lf = "\n" .freeze
44
+ @out = out
45
+ @esc = (@quote * 2).freeze
46
+ @mode = mode
29
47
  end
30
48
 
31
- def reset
32
- super
49
+ def reset(str=nil)
50
+ self.string = str if str
51
+ super()
33
52
  @char = string[pos]
34
53
  @flag = nil
54
+
55
+ @rows = nil
56
+ @cols = @cells = 0
35
57
  end
36
58
 
37
59
  # ==[ Lexer ]==
@@ -56,7 +78,7 @@ class Censive < StringScanner
56
78
  getch # consume the quote (optimized by not calling next_char)
57
79
  match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
58
80
  case next_char
59
- when @sep then next_char; break
81
+ when @sep then @flag = @es; next_char; break
60
82
  when @quote then match << @quote
61
83
  when @cr,@lf,nil then break
62
84
  else bomb "unexpected character after quote"
@@ -71,20 +93,19 @@ class Censive < StringScanner
71
93
  else # consume_unquoted_cell
72
94
  match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
73
95
  @char = string[pos]
74
- @char == @sep and next_char
96
+ @char == @sep and @flag = @es and next_char
75
97
  match
76
98
  end
77
99
  end
78
100
 
79
101
  def bomb(msg)
80
- abort "censive: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
102
+ abort "#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
81
103
  end
82
104
 
83
105
  # ==[ Parser ]==
84
106
 
85
107
  def parse
86
- @rows = []
87
- @cols = @cells = 0
108
+ @rows ||= []
88
109
  while row = next_row
89
110
  @rows << row
90
111
  size = row.size
@@ -103,6 +124,50 @@ class Censive < StringScanner
103
124
 
104
125
  # ==[ Helpers ]==
105
126
 
127
+ # grok returns 2 (seps and quotes), 1 (seps only), 0 (neither)
128
+ def grok(str)
129
+ if pos = str.index(/(#{@quote})|#{@sep}/o)
130
+ $1 ? 2 : str.index(/#{@quote}/o, pos) ? 2 : 1
131
+ else
132
+ 0
133
+ end
134
+ end
135
+
136
+ def <<(row)
137
+ @out or return super
138
+
139
+ # most compact export format
140
+ s,q = @sep, @quote
141
+ out = case @mode
142
+ when :compact
143
+ case grok(row.join)
144
+ when 0 then row
145
+ when 1 then row.map {|col| col.include?(s) ? "#{q}#{col}#{q}" : col }
146
+ else
147
+ row.map do |col|
148
+ case grok(col)
149
+ when 0 then col
150
+ when 1 then "#{q}#{col}#{q}"
151
+ else "#{q}#{col.gsub(q, @esc)}#{q}"
152
+ end
153
+ end
154
+ end
155
+ when :full
156
+ row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
157
+ end.join(s)
158
+
159
+ #!# TODO: allow an option to remove trailing seps in the output
160
+ # out.gsub!(/#{s}+\z/,'')
161
+
162
+ #!# TODO: allow these line endings to be configurable
163
+ @out << out + @lf
164
+ end
165
+
166
+ def each
167
+ @rows ||= parse
168
+ @rows.each {|row| yield row }
169
+ end
170
+
106
171
  def stats
107
172
  wide = string.size.to_s.size
108
173
  puts "%#{wide}d rows" % @rows.size
@@ -112,22 +177,31 @@ class Censive < StringScanner
112
177
  end
113
178
  end
114
179
 
115
- # ==[ Test it out... ]==
180
+ # ==[ Playground... ]==
116
181
 
117
- ARGV << "z.csv" if ARGV.empty?
182
+ data = File.read('1.csv')
118
183
 
119
- case 1
120
- when 1
121
- path = ARGV.first
122
- data = File.read(path)
123
- when 2
124
- data = DATA.gets("\n\n").rstrip
184
+ Censive.writer('out.csv', sep: ',', quote: "'", mode: :compact) do |out|
185
+ Censive.new(data).each do |row|
186
+ out << row
187
+ end
125
188
  end
126
189
 
127
- STDOUT.sync = true
128
-
129
- csv = Censive.new(data)
130
-
131
- data.size > 1e6 ? csv.parse : csv.parse.each {|cols| p cols }
132
-
133
- csv.stats
190
+ #
191
+ # ARGV << "z.csv" if ARGV.empty?
192
+ #
193
+ # case 1
194
+ # when 1
195
+ # path = ARGV.first
196
+ # data = File.read(path)
197
+ # when 2
198
+ # data = DATA.gets("\n\n").rstrip
199
+ # end
200
+ #
201
+ # STDOUT.sync = true
202
+ #
203
+ # csv = Censive.new(data)
204
+ #
205
+ # data.size > 1e6 ? csv.parse : csv.parse.each {|cols| p cols }
206
+ #
207
+ # csv.stats
@@ -0,0 +1,3 @@
1
+ age name
2
+ 5 'Mike the ''man!'''
3
+ 10 Tommy
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: censive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.1'
4
+ version: '0.4'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steve Shreeve
@@ -20,6 +20,7 @@ files:
20
20
  - README.md
21
21
  - censive.gemspec
22
22
  - lib/censive.rb
23
+ - test/a-uses-tabs-and-single-quotes-and-no-trailing-newline.csv
23
24
  homepage: https://github.com/shreeve/censive
24
25
  licenses:
25
26
  - MIT