censive 0.9 → 0.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cbca33c415269ae1fafea5297f2b409879a46c37c884a0a7017bca322bcff2a6
4
- data.tar.gz: ac021ddf3d7503aebc5791b0912c6409a0888627060b532e65f6eb72b94965a3
3
+ metadata.gz: f4b06c1c42b5f813f8901c4e7240cdd43df1ccc22cf87327dc3ed7d850720eb4
4
+ data.tar.gz: 97ab27b79eead81517fa28a4c51923fa02ec2fa95922f6f61dc509c7a4890b2e
5
5
  SHA512:
6
- metadata.gz: 8095c0c7704e3a6ee66930b36f0131b38d52a68cdd066d9677e8ceb58c4ecd7ce7eed496c78b1841cabe845b8c82624ca808b33a7cf7ec4c8fd504b287b3ffb5
7
- data.tar.gz: 2e363b63b37977784a38c06e091f3201a1cd7a13138e8101e0e41ca49c47b3c4b433e7e6f2843a6816ddcbf9c1c8293da0d858f6be38bd0d3d82ed5dbd904bfe
6
+ metadata.gz: a2f297ac516f5e01510a9ceb90cdb2cc1e782ff97a4f67515d73f6d56d8512cd4d9cbb5d04425bcdb8a7a5cdb63aeb2835e7bed2a76dcc149dae0bd63c4cc17b
7
+ data.tar.gz: 85762c69bc669db5a48f0e3b58e4319afdc9f1765cc18cd2b6c9501aaaccf3e41dddb58d3654bc4c76242632eaf8988f348bb652c6c40105e0373b8afdf463d3
data/README.md CHANGED
@@ -2,7 +2,58 @@
2
2
 
3
3
  A quick and lightweight CSV handling library for Ruby
4
4
 
5
- ## Writing CSV
5
+ ## Example
6
+
7
+ ```ruby
8
+ #!/usr/bin/env ruby
9
+
10
+ STDOUT.sync = true
11
+
12
+ require 'censive'
13
+ require 'fileutils'
14
+
15
+ abort "usage: #{File.basename($0)} <files>" if ARGV.empty?
16
+
17
+ rand = `LC_ALL=C tr -dc a-zA-Z0-9 < /dev/random | head -c12`
18
+
19
+ rows = []
20
+ cols = []
21
+ coun = 0
22
+ full = 0
23
+
24
+ ARGV.each do |path|
25
+ File.file?(path) or next
26
+
27
+ print "Processing #{path.inspect}"
28
+
29
+ rows.clear
30
+ cols.clear
31
+ seen = 0
32
+ coun += 1
33
+
34
+ dest = "#{path}-#{rand}"
35
+
36
+ begin
37
+ Censive.writer(dest) do |file|
38
+ Censive.reader(path, excel: true, relax: true).each do |cols|
39
+ file << cols
40
+ seen += 1
41
+ print "." if (seen % 1e5) == 0 # give a status update every so often
42
+ end
43
+ end
44
+ FileUtils.mv(dest, path)
45
+ full += (seen - 1)
46
+ puts " (#{seen - 1} rows of data)"
47
+ rescue
48
+ puts " - unable to process (#{$!})"
49
+ FileUtils.rm_f(dest)
50
+ end
51
+ end
52
+
53
+ puts "Processed #{coun} files with a total of #{full} rows of data" if coun > 1
54
+ ```
55
+
56
+ ## Convert a CSV file to a TSV file
6
57
 
7
58
  ```ruby
8
59
  require 'censive'
data/censive.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "censive"
5
- s.version = "0.9"
5
+ s.version = "0.10"
6
6
  s.author = "Steve Shreeve"
7
7
  s.email = "steve.shreeve@gmail.com"
8
8
  s.summary = "A quick and lightweight CSV handling library for Ruby"
data/lib/censive.rb CHANGED
@@ -28,37 +28,36 @@ require 'strscan'
28
28
 
29
29
  class Censive < StringScanner
30
30
 
31
- def self.writer(path, **opts)
32
- File.open(path, 'w') do |file|
33
- yield new(out: file, **opts)
31
+ def self.writer(obj=$stdout, **opts, &code)
32
+ case obj
33
+ when String then File.open(path, 'w') {|file| yield new(out: obj, **opts, &code) }
34
+ when IO then new(out: obj, **opts, &code)
35
+ else abort "#{File.basename($0)}: invalid #{obj.class} object in writer"
34
36
  end
35
37
  end
36
38
 
37
39
  def initialize(str=nil,
38
- sep: ',' , # column separator character
39
- quote: '"' , # quote character
40
-
41
- drop: false , # enable to drop trailing separators
42
- eol: "\n" , # desired line endings for exports
43
- excel: false , # literals (="01"), formulas (=A1 + B2), see http://bit.ly/3Y7jIvc
40
+ drop: false , # drop trailing empty fields?
41
+ eol: "\n" , # line endings for exports
42
+ excel: false , # literals(="01") formulas(=A1 + B2); http://bit.ly/3Y7jIvc
44
43
  mode: :compact, # export mode: compact or full
45
- out: nil , # output IO/file
46
- relax: false , # relax parsing of quotes
47
-
44
+ out: nil , # output stream, needs to respond to <<
45
+ quote: '"' , # quote character
46
+ relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
47
+ sep: ',' , # column separator character
48
48
  **opts # grab bag
49
49
  )
50
50
  super(str || '')
51
51
  reset
52
52
 
53
- @sep = sep .freeze
54
- @quote = quote.freeze
55
-
56
53
  @drop = drop
57
- @eol = eol.freeze
54
+ @eol = eol .freeze #!# TODO: are the '.freeze' statements helpful?
58
55
  @excel = excel
59
56
  @mode = mode
60
57
  @out = out
58
+ @quote = quote.freeze
61
59
  @relax = relax
60
+ @sep = sep .freeze
62
61
 
63
62
  @es = "" .freeze
64
63
  @cr = "\r" .freeze
@@ -96,7 +95,7 @@ class Censive < StringScanner
96
95
  else @flag = nil
97
96
  end if @flag
98
97
 
99
- # See http://bit.ly/3Y7jIvc
98
+ # Excel literals ="0123" and formulas =A1 + B2 (see http://bit.ly/3Y7jIvc)
100
99
  if @excel && @char == @eq
101
100
  @flag = @eq
102
101
  next_char
@@ -107,18 +106,13 @@ class Censive < StringScanner
107
106
  when @quote # consume quoted cell
108
107
  match = ""
109
108
  while true
110
- getch # consume the quote that got us here
109
+ getch # move past the quote that got us here
111
110
  match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
112
111
  case next_char
113
112
  when @sep then @flag = @es; next_char; break
114
113
  when @quote then match << @quote
115
114
  when @cr,@lf,@es,nil then break
116
- else
117
- if @relax
118
- match << @quote + @char
119
- else
120
- bomb "invalid character after quote"
121
- end
115
+ else @relax ? match << (@quote + @char) : bomb("invalid character after quote")
122
116
  end
123
117
  end
124
118
  match
@@ -129,7 +123,7 @@ class Censive < StringScanner
129
123
  end
130
124
  else # consume unquoted cell
131
125
  match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
132
- match = @eq + match if @flag == @eq # preserve @eq for excel formulas
126
+ match = @eq + match and @flag = nil if @flag == @eq
133
127
  @char = peek(1)
134
128
  @char == @sep and @flag = @es and next_char
135
129
  match
@@ -143,12 +137,12 @@ class Censive < StringScanner
143
137
  # ==[ Parser ]==
144
138
 
145
139
  def parse
146
- @rows ||= []
140
+ @rows = []
147
141
  while row = next_row
148
142
  @rows << row
149
- size = row.size
150
- @cols = size if size > @cols
151
- @cells += size
143
+ count = row.size
144
+ @cols = count if count > @cols
145
+ @cells += count
152
146
  end
153
147
  @rows
154
148
  end
@@ -171,13 +165,15 @@ class Censive < StringScanner
171
165
  end
172
166
  end
173
167
 
168
+ # output a row
174
169
  def <<(row)
175
170
  @out or return super
176
171
 
177
- # drop trailing seps, if specified
172
+ # drop trailing empty columns
178
173
  row.pop while row.last.empty? if @drop
179
174
 
180
- # most compact export format
175
+ #!# FIXME: Excel output needs to protect 0-leading numbers
176
+
181
177
  s,q = @sep, @quote
182
178
  out = case @mode
183
179
  when :compact
@@ -201,7 +197,7 @@ class Censive < StringScanner
201
197
  row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
202
198
  end.join(s)
203
199
 
204
- # write output, using desired line endings
200
+ # add line ending
205
201
  @out << out + @eol
206
202
  end
207
203
 
@@ -210,6 +206,11 @@ class Censive < StringScanner
210
206
  @rows.each {|row| yield row }
211
207
  end
212
208
 
209
+ def export(...)
210
+ out = self.class.writer(...)
211
+ each {|row| out << row }
212
+ end
213
+
213
214
  def stats
214
215
  wide = string.size.to_s.size
215
216
  puts "%#{wide}d rows" % @rows.size
@@ -219,9 +220,35 @@ class Censive < StringScanner
219
220
  end
220
221
  end
221
222
 
223
+ # ==[ Command line ]==
224
+
225
+ if __FILE__ == $0
226
+ raw = DATA.gets("\n\n").chomp
227
+ csv = Censive.new(raw, excel: true)
228
+ csv.export # (sep: "\t", excel: true)
229
+ end
230
+
222
231
  __END__
232
+ Name,Age,Shoe
233
+ Alice,27,5
234
+ Bob,33,10 1/2
235
+ Charlie or "Chuck",=B2 + B3,9
236
+ "Doug E Fresh",="007",10
237
+ Subtotal,=sum(B2:B5),="01234"
238
+
223
239
 
224
- # ==[ Playground... ]==
240
+
241
+
242
+ path = '../test/a-uses-tabs-and-single-quotes-and-no-trailing-newline.tsv'
243
+ data = File.read(path)
244
+
245
+ out = Censive.writer
246
+
247
+ Censive.new(data, sep: "\t", quote: "'").each do |row|
248
+ p row
249
+ end
250
+
251
+ Censive.reader(path, sep: "\t", quote: "'").each {|r| p r}
225
252
 
226
253
  # STDOUT.sync = true
227
254
  #
@@ -234,14 +261,14 @@ __END__
234
261
  # end
235
262
  #
236
263
  # __END__
237
-
238
- ARGV << "z.csv" if ARGV.empty?
239
-
240
- path = ARGV.first
241
- data = File.read(path)
242
-
243
- csv = Censive.new(data)
244
-
245
- data.size > 1e6 ? csv.parse : csv.parse.each {|cols| p cols }
246
-
247
- csv.stats
264
+ #
265
+ # ARGV << "z.csv" if ARGV.empty?
266
+ #
267
+ # path = ARGV.first
268
+ # data = File.read(path)
269
+ #
270
+ # csv = Censive.new(data)
271
+ #
272
+ # data.size > 1e6 ? csv.parse : csv.parse.each {|cols| p cols }
273
+ #
274
+ # csv.stats
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: censive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.9'
4
+ version: '0.10'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steve Shreeve
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-31 00:00:00.000000000 Z
11
+ date: 2023-02-02 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A quick and lightweight CSV handling library for Ruby
14
14
  email: steve.shreeve@gmail.com
@@ -20,7 +20,7 @@ files:
20
20
  - README.md
21
21
  - censive.gemspec
22
22
  - lib/censive.rb
23
- - test/a-uses-tabs-and-single-quotes-and-no-trailing-newline.csv
23
+ - test/a-uses-tabs-and-single-quotes-and-no-trailing-newline.tsv
24
24
  homepage: https://github.com/shreeve/censive
25
25
  licenses:
26
26
  - MIT