censive 0.2 → 0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 654d5b8603cb9d50881c9b4289a27722f0bbc3e3c7607b86f99a5b2e3014de38
4
- data.tar.gz: 18ce8e5b1c8596c314aa90110ac541218ea7382daa5b863ab96925bb8b0dd26d
3
+ metadata.gz: 99c4cd0527e02e78f1fc12c95eda569b23849940d105a6e79442e7eb56623384
4
+ data.tar.gz: 173aa495f225aa40c1a75686eea8f98e91828cf03e92ab1fee43aba92c27ccfe
5
5
  SHA512:
6
- metadata.gz: e94f1c8147463ab51d0aa4bda19792f255a6458ecd0abd4ebcb2a3f2d286a1cc41771af4be85285b901f8b2a856914f482230595d82b735b5fca076d0d8d498e
7
- data.tar.gz: 2949df7cadab7b0e7a091eddeb057bb73288e32f7493d420556fb42886531fbd77e59143d4c2e0119a0f4994245fa726d70bff39c8dd8f3e17195369827ff041
6
+ metadata.gz: 10010211f2d2f50f075b6ffff16517bea7a9c821cfb9143ea38ca6cce4323a9aa5c59529f84d96f9ed7077aa1fd93418312fa084774f7f34027fd436e7dc9f49
7
+ data.tar.gz: 61fbef5d03f87a0f92f0c1456699d2489d9f13a1fc4ae1dbb7f1f7196b1e7f10b2f94429cc8e18d23e7edca458189bdb535da833ab4c1968bcb1ace92d9f409f
data/README.md CHANGED
@@ -1,3 +1,19 @@
1
1
  # censive
2
2
 
3
3
  A quick and lightweight CVS handling library for Ruby
4
+
5
+ ## Writing CSV
6
+
7
+ ```ruby
8
+ require 'censive'
9
+
10
+ # read in a comma-separated csv file
11
+ data = File.read('data.csv')
12
+
13
+ # write out a tab-separated tsv file
14
+ Censive.writer('out.tsv', sep: "\t") do |out|
15
+ Censive.new(data).each do |row|
16
+ out << row
17
+ end
18
+ end
19
+ ```
data/censive.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "censive"
5
- s.version = "0.2"
5
+ s.version = "0.4"
6
6
  s.author = "Steve Shreeve"
7
7
  s.email = "steve.shreeve@gmail.com"
8
8
  s.summary = "A quick and lightweight CVS handling library for Ruby"
data/lib/censive.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  # ==============================================================================
4
- # censive - A quick and lightweight CVS handling library for Ruby
4
+ # censive - A quick and lightweight CSV handling library for Ruby
5
5
  #
6
6
  # Author: Steve Shreeve (steve.shreeve@gmail.com)
7
7
  # Date: Jan 30, 2023
@@ -10,7 +10,15 @@
10
10
  #
11
11
  # 1. Faster than Ruby's default CSV library
12
12
  # 2. Lightweight code base with streamlined method calls
13
- # 3. Eventually support IO streaming
13
+ #
14
+ # To consider:
15
+ #
16
+ # 1. Option to support IO streaming
17
+ # 2. Option to strip whitespace
18
+ # 3. Option to change output line endings
19
+ # 4. Option to force quotes in output
20
+ # 5. Option to allow reading excel CSV (="Text" for cells)
21
+ # 6. Confirm file encodings such as UTF-8, UTF-16, etc.
14
22
  #
15
23
  # NOTE: Only getch and scan_until advance strscan's position
16
24
  # ==============================================================================
@@ -18,22 +26,34 @@
18
26
  require 'strscan'
19
27
 
20
28
  class Censive < StringScanner
21
- def initialize(string, sep: ',', quote: '"')
22
- super(string)
23
- reset
24
29
 
30
+ def self.writer(path, **opts)
31
+ File.open(path, 'w') do |file|
32
+ yield new(out: file, **opts)
33
+ end
34
+ end
35
+
36
+ def initialize(str=nil, sep: ',', quote: '"', out: nil, mode: :compact)
37
+ super(str || '')
38
+ reset
25
39
  @sep = sep .freeze
26
40
  @quote = quote.freeze
27
-
28
41
  @es = "" .freeze
29
42
  @cr = "\r" .freeze
30
43
  @lf = "\n" .freeze
44
+ @out = out
45
+ @esc = (@quote * 2).freeze
46
+ @mode = mode
31
47
  end
32
48
 
33
- def reset
34
- super
49
+ def reset(str=nil)
50
+ self.string = str if str
51
+ super()
35
52
  @char = string[pos]
36
53
  @flag = nil
54
+
55
+ @rows = nil
56
+ @cols = @cells = 0
37
57
  end
38
58
 
39
59
  # ==[ Lexer ]==
@@ -58,7 +78,7 @@ class Censive < StringScanner
58
78
  getch # consume the quote (optimized by not calling next_char)
59
79
  match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
60
80
  case next_char
61
- when @sep then next_char; break
81
+ when @sep then @flag = @es; next_char; break
62
82
  when @quote then match << @quote
63
83
  when @cr,@lf,nil then break
64
84
  else bomb "unexpected character after quote"
@@ -73,20 +93,19 @@ class Censive < StringScanner
73
93
  else # consume_unquoted_cell
74
94
  match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
75
95
  @char = string[pos]
76
- @char == @sep and next_char
96
+ @char == @sep and @flag = @es and next_char
77
97
  match
78
98
  end
79
99
  end
80
100
 
81
101
  def bomb(msg)
82
- abort "censive: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
102
+ abort "#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
83
103
  end
84
104
 
85
105
  # ==[ Parser ]==
86
106
 
87
107
  def parse
88
- @rows = []
89
- @cols = @cells = 0
108
+ @rows ||= []
90
109
  while row = next_row
91
110
  @rows << row
92
111
  size = row.size
@@ -105,6 +124,50 @@ class Censive < StringScanner
105
124
 
106
125
  # ==[ Helpers ]==
107
126
 
127
+ # grok returns 2 (seps and quotes), 1 (seps only), 0 (neither)
128
+ def grok(str)
129
+ if pos = str.index(/(#{@quote})|#{@sep}/o)
130
+ $1 ? 2 : str.index(/#{@quote}/o, pos) ? 2 : 1
131
+ else
132
+ 0
133
+ end
134
+ end
135
+
136
+ def <<(row)
137
+ @out or return super
138
+
139
+ # most compact export format
140
+ s,q = @sep, @quote
141
+ out = case @mode
142
+ when :compact
143
+ case grok(row.join)
144
+ when 0 then row
145
+ when 1 then row.map {|col| col.include?(s) ? "#{q}#{col}#{q}" : col }
146
+ else
147
+ row.map do |col|
148
+ case grok(col)
149
+ when 0 then col
150
+ when 1 then "#{q}#{col}#{q}"
151
+ else "#{q}#{col.gsub(q, @esc)}#{q}"
152
+ end
153
+ end
154
+ end
155
+ when :full
156
+ row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
157
+ end.join(s)
158
+
159
+ #!# TODO: allow an option to remove trailing seps in the output
160
+ # out.gsub!(/#{s}+\z/,'')
161
+
162
+ #!# TODO: allow these line endings to be configurable
163
+ @out << out + @lf
164
+ end
165
+
166
+ def each
167
+ @rows ||= parse
168
+ @rows.each {|row| yield row }
169
+ end
170
+
108
171
  def stats
109
172
  wide = string.size.to_s.size
110
173
  puts "%#{wide}d rows" % @rows.size
@@ -113,8 +176,17 @@ class Censive < StringScanner
113
176
  puts "%#{wide}d bytes" % string.size
114
177
  end
115
178
  end
116
- #
117
- # # ==[ Test it out... ]==
179
+
180
+ # ==[ Playground... ]==
181
+
182
+ data = File.read('1.csv')
183
+
184
+ Censive.writer('out.csv', sep: ',', quote: "'", mode: :compact) do |out|
185
+ Censive.new(data).each do |row|
186
+ out << row
187
+ end
188
+ end
189
+
118
190
  #
119
191
  # ARGV << "z.csv" if ARGV.empty?
120
192
  #
@@ -0,0 +1,3 @@
1
+ age name
2
+ 5 'Mike the ''man!'''
3
+ 10 Tommy
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: censive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.2'
4
+ version: '0.4'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steve Shreeve
@@ -20,6 +20,7 @@ files:
20
20
  - README.md
21
21
  - censive.gemspec
22
22
  - lib/censive.rb
23
+ - test/a-uses-tabs-and-single-quotes-and-no-trailing-newline.csv
23
24
  homepage: https://github.com/shreeve/censive
24
25
  licenses:
25
26
  - MIT