censive 0.2 → 0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 654d5b8603cb9d50881c9b4289a27722f0bbc3e3c7607b86f99a5b2e3014de38
4
- data.tar.gz: 18ce8e5b1c8596c314aa90110ac541218ea7382daa5b863ab96925bb8b0dd26d
3
+ metadata.gz: 99c4cd0527e02e78f1fc12c95eda569b23849940d105a6e79442e7eb56623384
4
+ data.tar.gz: 173aa495f225aa40c1a75686eea8f98e91828cf03e92ab1fee43aba92c27ccfe
5
5
  SHA512:
6
- metadata.gz: e94f1c8147463ab51d0aa4bda19792f255a6458ecd0abd4ebcb2a3f2d286a1cc41771af4be85285b901f8b2a856914f482230595d82b735b5fca076d0d8d498e
7
- data.tar.gz: 2949df7cadab7b0e7a091eddeb057bb73288e32f7493d420556fb42886531fbd77e59143d4c2e0119a0f4994245fa726d70bff39c8dd8f3e17195369827ff041
6
+ metadata.gz: 10010211f2d2f50f075b6ffff16517bea7a9c821cfb9143ea38ca6cce4323a9aa5c59529f84d96f9ed7077aa1fd93418312fa084774f7f34027fd436e7dc9f49
7
+ data.tar.gz: 61fbef5d03f87a0f92f0c1456699d2489d9f13a1fc4ae1dbb7f1f7196b1e7f10b2f94429cc8e18d23e7edca458189bdb535da833ab4c1968bcb1ace92d9f409f
data/README.md CHANGED
@@ -1,3 +1,19 @@
1
1
  # censive
2
2
 
3
3
  A quick and lightweight CVS handling library for Ruby
4
+
5
+ ## Writing CSV
6
+
7
+ ```ruby
8
+ require 'censive'
9
+
10
+ # read in a comma-separated csv file
11
+ data = File.read('data.csv')
12
+
13
+ # write out a tab-separated tsv file
14
+ Censive.writer('out.tsv', sep: "\t") do |out|
15
+ Censive.new(data).each do |row|
16
+ out << row
17
+ end
18
+ end
19
+ ```
data/censive.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "censive"
5
- s.version = "0.2"
5
+ s.version = "0.4"
6
6
  s.author = "Steve Shreeve"
7
7
  s.email = "steve.shreeve@gmail.com"
8
8
  s.summary = "A quick and lightweight CVS handling library for Ruby"
data/lib/censive.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  # ==============================================================================
4
- # censive - A quick and lightweight CVS handling library for Ruby
4
+ # censive - A quick and lightweight CSV handling library for Ruby
5
5
  #
6
6
  # Author: Steve Shreeve (steve.shreeve@gmail.com)
7
7
  # Date: Jan 30, 2023
@@ -10,7 +10,15 @@
10
10
  #
11
11
  # 1. Faster than Ruby's default CSV library
12
12
  # 2. Lightweight code base with streamlined method calls
13
- # 3. Eventually support IO streaming
13
+ #
14
+ # To consider:
15
+ #
16
+ # 1. Option to support IO streaming
17
+ # 2. Option to strip whitespace
18
+ # 3. Option to change output line endings
19
+ # 4. Option to force quotes in output
20
+ # 5. Option to allow reading excel CSV (="Text" for cells)
21
+ # 6. Confirm file encodings such as UTF-8, UTF-16, etc.
14
22
  #
15
23
  # NOTE: Only getch and scan_until advance strscan's position
16
24
  # ==============================================================================
@@ -18,22 +26,34 @@
18
26
  require 'strscan'
19
27
 
20
28
  class Censive < StringScanner
21
- def initialize(string, sep: ',', quote: '"')
22
- super(string)
23
- reset
24
29
 
30
+ def self.writer(path, **opts)
31
+ File.open(path, 'w') do |file|
32
+ yield new(out: file, **opts)
33
+ end
34
+ end
35
+
36
+ def initialize(str=nil, sep: ',', quote: '"', out: nil, mode: :compact)
37
+ super(str || '')
38
+ reset
25
39
  @sep = sep .freeze
26
40
  @quote = quote.freeze
27
-
28
41
  @es = "" .freeze
29
42
  @cr = "\r" .freeze
30
43
  @lf = "\n" .freeze
44
+ @out = out
45
+ @esc = (@quote * 2).freeze
46
+ @mode = mode
31
47
  end
32
48
 
33
- def reset
34
- super
49
+ def reset(str=nil)
50
+ self.string = str if str
51
+ super()
35
52
  @char = string[pos]
36
53
  @flag = nil
54
+
55
+ @rows = nil
56
+ @cols = @cells = 0
37
57
  end
38
58
 
39
59
  # ==[ Lexer ]==
@@ -58,7 +78,7 @@ class Censive < StringScanner
58
78
  getch # consume the quote (optimized by not calling next_char)
59
79
  match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
60
80
  case next_char
61
- when @sep then next_char; break
81
+ when @sep then @flag = @es; next_char; break
62
82
  when @quote then match << @quote
63
83
  when @cr,@lf,nil then break
64
84
  else bomb "unexpected character after quote"
@@ -73,20 +93,19 @@ class Censive < StringScanner
73
93
  else # consume_unquoted_cell
74
94
  match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
75
95
  @char = string[pos]
76
- @char == @sep and next_char
96
+ @char == @sep and @flag = @es and next_char
77
97
  match
78
98
  end
79
99
  end
80
100
 
81
101
  def bomb(msg)
82
- abort "censive: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
102
+ abort "#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
83
103
  end
84
104
 
85
105
  # ==[ Parser ]==
86
106
 
87
107
  def parse
88
- @rows = []
89
- @cols = @cells = 0
108
+ @rows ||= []
90
109
  while row = next_row
91
110
  @rows << row
92
111
  size = row.size
@@ -105,6 +124,50 @@ class Censive < StringScanner
105
124
 
106
125
  # ==[ Helpers ]==
107
126
 
127
+ # grok returns 2 (seps and quotes), 1 (seps only), 0 (neither)
128
+ def grok(str)
129
+ if pos = str.index(/(#{@quote})|#{@sep}/o)
130
+ $1 ? 2 : str.index(/#{@quote}/o, pos) ? 2 : 1
131
+ else
132
+ 0
133
+ end
134
+ end
135
+
136
+ def <<(row)
137
+ @out or return super
138
+
139
+ # most compact export format
140
+ s,q = @sep, @quote
141
+ out = case @mode
142
+ when :compact
143
+ case grok(row.join)
144
+ when 0 then row
145
+ when 1 then row.map {|col| col.include?(s) ? "#{q}#{col}#{q}" : col }
146
+ else
147
+ row.map do |col|
148
+ case grok(col)
149
+ when 0 then col
150
+ when 1 then "#{q}#{col}#{q}"
151
+ else "#{q}#{col.gsub(q, @esc)}#{q}"
152
+ end
153
+ end
154
+ end
155
+ when :full
156
+ row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
157
+ end.join(s)
158
+
159
+ #!# TODO: allow an option to remove trailing seps in the output
160
+ # out.gsub!(/#{s}+\z/,'')
161
+
162
+ #!# TODO: allow these line endings to be configurable
163
+ @out << out + @lf
164
+ end
165
+
166
+ def each
167
+ @rows ||= parse
168
+ @rows.each {|row| yield row }
169
+ end
170
+
108
171
  def stats
109
172
  wide = string.size.to_s.size
110
173
  puts "%#{wide}d rows" % @rows.size
@@ -113,8 +176,17 @@ class Censive < StringScanner
113
176
  puts "%#{wide}d bytes" % string.size
114
177
  end
115
178
  end
116
- #
117
- # # ==[ Test it out... ]==
179
+
180
+ # ==[ Playground... ]==
181
+
182
+ data = File.read('1.csv')
183
+
184
+ Censive.writer('out.csv', sep: ',', quote: "'", mode: :compact) do |out|
185
+ Censive.new(data).each do |row|
186
+ out << row
187
+ end
188
+ end
189
+
118
190
  #
119
191
  # ARGV << "z.csv" if ARGV.empty?
120
192
  #
@@ -0,0 +1,3 @@
1
+ age name
2
+ 5 'Mike the ''man!'''
3
+ 10 Tommy
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: censive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.2'
4
+ version: '0.4'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steve Shreeve
@@ -20,6 +20,7 @@ files:
20
20
  - README.md
21
21
  - censive.gemspec
22
22
  - lib/censive.rb
23
+ - test/a-uses-tabs-and-single-quotes-and-no-trailing-newline.csv
23
24
  homepage: https://github.com/shreeve/censive
24
25
  licenses:
25
26
  - MIT