censive 0.9 → 0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cbca33c415269ae1fafea5297f2b409879a46c37c884a0a7017bca322bcff2a6
4
- data.tar.gz: ac021ddf3d7503aebc5791b0912c6409a0888627060b532e65f6eb72b94965a3
3
+ metadata.gz: f4b06c1c42b5f813f8901c4e7240cdd43df1ccc22cf87327dc3ed7d850720eb4
4
+ data.tar.gz: 97ab27b79eead81517fa28a4c51923fa02ec2fa95922f6f61dc509c7a4890b2e
5
5
  SHA512:
6
- metadata.gz: 8095c0c7704e3a6ee66930b36f0131b38d52a68cdd066d9677e8ceb58c4ecd7ce7eed496c78b1841cabe845b8c82624ca808b33a7cf7ec4c8fd504b287b3ffb5
7
- data.tar.gz: 2e363b63b37977784a38c06e091f3201a1cd7a13138e8101e0e41ca49c47b3c4b433e7e6f2843a6816ddcbf9c1c8293da0d858f6be38bd0d3d82ed5dbd904bfe
6
+ metadata.gz: a2f297ac516f5e01510a9ceb90cdb2cc1e782ff97a4f67515d73f6d56d8512cd4d9cbb5d04425bcdb8a7a5cdb63aeb2835e7bed2a76dcc149dae0bd63c4cc17b
7
+ data.tar.gz: 85762c69bc669db5a48f0e3b58e4319afdc9f1765cc18cd2b6c9501aaaccf3e41dddb58d3654bc4c76242632eaf8988f348bb652c6c40105e0373b8afdf463d3
data/README.md CHANGED
@@ -2,7 +2,58 @@
2
2
 
3
3
  A quick and lightweight CSV handling library for Ruby
4
4
 
5
- ## Writing CSV
5
+ ## Example
6
+
7
+ ```ruby
8
+ #!/usr/bin/env ruby
9
+
10
+ STDOUT.sync = true
11
+
12
+ require 'censive'
13
+ require 'fileutils'
14
+
15
+ abort "usage: #{File.basename($0)} <files>" if ARGV.empty?
16
+
17
+ rand = `LC_ALL=C tr -dc a-zA-Z0-9 < /dev/random | head -c12`
18
+
19
+ rows = []
20
+ cols = []
21
+ coun = 0
22
+ full = 0
23
+
24
+ ARGV.each do |path|
25
+ File.file?(path) or next
26
+
27
+ print "Processing #{path.inspect}"
28
+
29
+ rows.clear
30
+ cols.clear
31
+ seen = 0
32
+ coun += 1
33
+
34
+ dest = "#{path}-#{rand}"
35
+
36
+ begin
37
+ Censive.writer(dest) do |file|
38
+ Censive.reader(path, excel: true, relax: true).each do |cols|
39
+ file << cols
40
+ seen += 1
41
+ print "." if (seen % 1e5) == 0 # give a status update every so often
42
+ end
43
+ end
44
+ FileUtils.mv(dest, path)
45
+ full += (seen - 1)
46
+ puts " (#{seen - 1} rows of data)"
47
+ rescue
48
+ puts " - unable to process (#{$!})"
49
+ FileUtils.rm_f(dest)
50
+ end
51
+ end
52
+
53
+ puts "Processed #{coun} files with a total of #{full} rows of data" if coun > 1
54
+ ```
55
+
56
+ ## Convert a CSV file to a TSV file
6
57
 
7
58
  ```ruby
8
59
  require 'censive'
data/censive.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "censive"
5
- s.version = "0.9"
5
+ s.version = "0.10"
6
6
  s.author = "Steve Shreeve"
7
7
  s.email = "steve.shreeve@gmail.com"
8
8
  s.summary = "A quick and lightweight CSV handling library for Ruby"
data/lib/censive.rb CHANGED
@@ -28,37 +28,36 @@ require 'strscan'
28
28
 
29
29
  class Censive < StringScanner
30
30
 
31
- def self.writer(path, **opts)
32
- File.open(path, 'w') do |file|
33
- yield new(out: file, **opts)
31
+ def self.writer(obj=$stdout, **opts, &code)
32
+ case obj
33
+ when String then File.open(path, 'w') {|file| yield new(out: obj, **opts, &code) }
34
+ when IO then new(out: obj, **opts, &code)
35
+ else abort "#{File.basename($0)}: invalid #{obj.class} object in writer"
34
36
  end
35
37
  end
36
38
 
37
39
  def initialize(str=nil,
38
- sep: ',' , # column separator character
39
- quote: '"' , # quote character
40
-
41
- drop: false , # enable to drop trailing separators
42
- eol: "\n" , # desired line endings for exports
43
- excel: false , # literals (="01"), formulas (=A1 + B2), see http://bit.ly/3Y7jIvc
40
+ drop: false , # drop trailing empty fields?
41
+ eol: "\n" , # line endings for exports
42
+ excel: false , # literals(="01") formulas(=A1 + B2); http://bit.ly/3Y7jIvc
44
43
  mode: :compact, # export mode: compact or full
45
- out: nil , # output IO/file
46
- relax: false , # relax parsing of quotes
47
-
44
+ out: nil , # output stream, needs to respond to <<
45
+ quote: '"' , # quote character
46
+ relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
47
+ sep: ',' , # column separator character
48
48
  **opts # grab bag
49
49
  )
50
50
  super(str || '')
51
51
  reset
52
52
 
53
- @sep = sep .freeze
54
- @quote = quote.freeze
55
-
56
53
  @drop = drop
57
- @eol = eol.freeze
54
+ @eol = eol .freeze #!# TODO: are the '.freeze' statements helpful?
58
55
  @excel = excel
59
56
  @mode = mode
60
57
  @out = out
58
+ @quote = quote.freeze
61
59
  @relax = relax
60
+ @sep = sep .freeze
62
61
 
63
62
  @es = "" .freeze
64
63
  @cr = "\r" .freeze
@@ -96,7 +95,7 @@ class Censive < StringScanner
96
95
  else @flag = nil
97
96
  end if @flag
98
97
 
99
- # See http://bit.ly/3Y7jIvc
98
+ # Excel literals ="0123" and formulas =A1 + B2 (see http://bit.ly/3Y7jIvc)
100
99
  if @excel && @char == @eq
101
100
  @flag = @eq
102
101
  next_char
@@ -107,18 +106,13 @@ class Censive < StringScanner
107
106
  when @quote # consume quoted cell
108
107
  match = ""
109
108
  while true
110
- getch # consume the quote that got us here
109
+ getch # move past the quote that got us here
111
110
  match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
112
111
  case next_char
113
112
  when @sep then @flag = @es; next_char; break
114
113
  when @quote then match << @quote
115
114
  when @cr,@lf,@es,nil then break
116
- else
117
- if @relax
118
- match << @quote + @char
119
- else
120
- bomb "invalid character after quote"
121
- end
115
+ else @relax ? match << (@quote + @char) : bomb("invalid character after quote")
122
116
  end
123
117
  end
124
118
  match
@@ -129,7 +123,7 @@ class Censive < StringScanner
129
123
  end
130
124
  else # consume unquoted cell
131
125
  match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
132
- match = @eq + match if @flag == @eq # preserve @eq for excel formulas
126
+ match = @eq + match and @flag = nil if @flag == @eq
133
127
  @char = peek(1)
134
128
  @char == @sep and @flag = @es and next_char
135
129
  match
@@ -143,12 +137,12 @@ class Censive < StringScanner
143
137
  # ==[ Parser ]==
144
138
 
145
139
  def parse
146
- @rows ||= []
140
+ @rows = []
147
141
  while row = next_row
148
142
  @rows << row
149
- size = row.size
150
- @cols = size if size > @cols
151
- @cells += size
143
+ count = row.size
144
+ @cols = count if count > @cols
145
+ @cells += count
152
146
  end
153
147
  @rows
154
148
  end
@@ -171,13 +165,15 @@ class Censive < StringScanner
171
165
  end
172
166
  end
173
167
 
168
+ # output a row
174
169
  def <<(row)
175
170
  @out or return super
176
171
 
177
- # drop trailing seps, if specified
172
+ # drop trailing empty columns
178
173
  row.pop while row.last.empty? if @drop
179
174
 
180
- # most compact export format
175
+ #!# FIXME: Excel output needs to protect 0-leading numbers
176
+
181
177
  s,q = @sep, @quote
182
178
  out = case @mode
183
179
  when :compact
@@ -201,7 +197,7 @@ class Censive < StringScanner
201
197
  row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
202
198
  end.join(s)
203
199
 
204
- # write output, using desired line endings
200
+ # add line ending
205
201
  @out << out + @eol
206
202
  end
207
203
 
@@ -210,6 +206,11 @@ class Censive < StringScanner
210
206
  @rows.each {|row| yield row }
211
207
  end
212
208
 
209
+ def export(...)
210
+ out = self.class.writer(...)
211
+ each {|row| out << row }
212
+ end
213
+
213
214
  def stats
214
215
  wide = string.size.to_s.size
215
216
  puts "%#{wide}d rows" % @rows.size
@@ -219,9 +220,35 @@ class Censive < StringScanner
219
220
  end
220
221
  end
221
222
 
223
+ # ==[ Command line ]==
224
+
225
+ if __FILE__ == $0
226
+ raw = DATA.gets("\n\n").chomp
227
+ csv = Censive.new(raw, excel: true)
228
+ csv.export # (sep: "\t", excel: true)
229
+ end
230
+
222
231
  __END__
232
+ Name,Age,Shoe
233
+ Alice,27,5
234
+ Bob,33,10 1/2
235
+ Charlie or "Chuck",=B2 + B3,9
236
+ "Doug E Fresh",="007",10
237
+ Subtotal,=sum(B2:B5),="01234"
238
+
223
239
 
224
- # ==[ Playground... ]==
240
+
241
+
242
+ path = '../test/a-uses-tabs-and-single-quotes-and-no-trailing-newline.tsv'
243
+ data = File.read(path)
244
+
245
+ out = Censive.writer
246
+
247
+ Censive.new(data, sep: "\t", quote: "'").each do |row|
248
+ p row
249
+ end
250
+
251
+ Censive.reader(path, sep: "\t", quote: "'").each {|r| p r}
225
252
 
226
253
  # STDOUT.sync = true
227
254
  #
@@ -234,14 +261,14 @@ __END__
234
261
  # end
235
262
  #
236
263
  # __END__
237
-
238
- ARGV << "z.csv" if ARGV.empty?
239
-
240
- path = ARGV.first
241
- data = File.read(path)
242
-
243
- csv = Censive.new(data)
244
-
245
- data.size > 1e6 ? csv.parse : csv.parse.each {|cols| p cols }
246
-
247
- csv.stats
264
+ #
265
+ # ARGV << "z.csv" if ARGV.empty?
266
+ #
267
+ # path = ARGV.first
268
+ # data = File.read(path)
269
+ #
270
+ # csv = Censive.new(data)
271
+ #
272
+ # data.size > 1e6 ? csv.parse : csv.parse.each {|cols| p cols }
273
+ #
274
+ # csv.stats
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: censive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.9'
4
+ version: '0.10'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steve Shreeve
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-31 00:00:00.000000000 Z
11
+ date: 2023-02-02 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A quick and lightweight CSV handling library for Ruby
14
14
  email: steve.shreeve@gmail.com
@@ -20,7 +20,7 @@ files:
20
20
  - README.md
21
21
  - censive.gemspec
22
22
  - lib/censive.rb
23
- - test/a-uses-tabs-and-single-quotes-and-no-trailing-newline.csv
23
+ - test/a-uses-tabs-and-single-quotes-and-no-trailing-newline.tsv
24
24
  homepage: https://github.com/shreeve/censive
25
25
  licenses:
26
26
  - MIT