censive 0.1 → 0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 78a04712f2c737b263765b117a0614bddeda6e4c233409827ed900983b061a64
4
- data.tar.gz: e42610ad444e4b4ed9db374f1ee1764c97619fa54235c3bb18e537513804f940
3
+ metadata.gz: 99c4cd0527e02e78f1fc12c95eda569b23849940d105a6e79442e7eb56623384
4
+ data.tar.gz: 173aa495f225aa40c1a75686eea8f98e91828cf03e92ab1fee43aba92c27ccfe
5
5
  SHA512:
6
- metadata.gz: 294e8879052426037bbaed072171ecb8ca0abff47156a4a1d51a3f4531229a6f755ae7db4f8e5dfb7422ad25edf1e145d497bd39d99e46d895cdf6adb4988edc
7
- data.tar.gz: d087b32edd1e467143a1e785bd1aa89d2e096920027185ff2dc07a275317b034521b1143f962774270040ae57b079c3a61fa045d10e79f194312787125527fa0
6
+ metadata.gz: 10010211f2d2f50f075b6ffff16517bea7a9c821cfb9143ea38ca6cce4323a9aa5c59529f84d96f9ed7077aa1fd93418312fa084774f7f34027fd436e7dc9f49
7
+ data.tar.gz: 61fbef5d03f87a0f92f0c1456699d2489d9f13a1fc4ae1dbb7f1f7196b1e7f10b2f94429cc8e18d23e7edca458189bdb535da833ab4c1968bcb1ace92d9f409f
data/README.md CHANGED
@@ -1,3 +1,19 @@
1
1
  # censive
2
2
 
3
3
  A quick and lightweight CVS handling library for Ruby
4
+
5
+ ## Writing CSV
6
+
7
+ ```ruby
8
+ require 'censive'
9
+
10
+ # read in a comma-separated csv file
11
+ data = File.read('data.csv')
12
+
13
+ # write out a tab-separated tsv file
14
+ Censive.writer('out.tsv', sep: "\t") do |out|
15
+ Censive.new(data).each do |row|
16
+ out << row
17
+ end
18
+ end
19
+ ```
data/censive.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "censive"
5
- s.version = "0.1"
5
+ s.version = "0.4"
6
6
  s.author = "Steve Shreeve"
7
7
  s.email = "steve.shreeve@gmail.com"
8
8
  s.summary = "A quick and lightweight CVS handling library for Ruby"
data/lib/censive.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  # ==============================================================================
4
- # censive - A quick and lightweight CVS handling library for Ruby
4
+ # censive - A quick and lightweight CSV handling library for Ruby
5
5
  #
6
6
  # Author: Steve Shreeve (steve.shreeve@gmail.com)
7
7
  # Date: Jan 30, 2023
@@ -10,7 +10,15 @@
10
10
  #
11
11
  # 1. Faster than Ruby's default CSV library
12
12
  # 2. Lightweight code base with streamlined method calls
13
- # 3. Eventually support IO streaming
13
+ #
14
+ # To consider:
15
+ #
16
+ # 1. Option to support IO streaming
17
+ # 2. Option to strip whitespace
18
+ # 3. Option to change output line endings
19
+ # 4. Option to force quotes in output
20
+ # 5. Option to allow reading excel CSV (="Text" for cells)
21
+ # 6. Confirm file encodings such as UTF-8, UTF-16, etc.
14
22
  #
15
23
  # NOTE: Only getch and scan_until advance strscan's position
16
24
  # ==============================================================================
@@ -18,20 +26,34 @@
18
26
  require 'strscan'
19
27
 
20
28
  class Censive < StringScanner
21
- def initialize(...)
22
- super
29
+
30
+ def self.writer(path, **opts)
31
+ File.open(path, 'w') do |file|
32
+ yield new(out: file, **opts)
33
+ end
34
+ end
35
+
36
+ def initialize(str=nil, sep: ',', quote: '"', out: nil, mode: :compact)
37
+ super(str || '')
23
38
  reset
24
- @sep = ',' .freeze # make this a param
25
- @quote = '"' .freeze # make this a param
26
- @es = "" .freeze
27
- @cr = "\r".freeze
28
- @lf = "\n".freeze
39
+ @sep = sep .freeze
40
+ @quote = quote.freeze
41
+ @es = "" .freeze
42
+ @cr = "\r" .freeze
43
+ @lf = "\n" .freeze
44
+ @out = out
45
+ @esc = (@quote * 2).freeze
46
+ @mode = mode
29
47
  end
30
48
 
31
- def reset
32
- super
49
+ def reset(str=nil)
50
+ self.string = str if str
51
+ super()
33
52
  @char = string[pos]
34
53
  @flag = nil
54
+
55
+ @rows = nil
56
+ @cols = @cells = 0
35
57
  end
36
58
 
37
59
  # ==[ Lexer ]==
@@ -56,7 +78,7 @@ class Censive < StringScanner
56
78
  getch # consume the quote (optimized by not calling next_char)
57
79
  match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
58
80
  case next_char
59
- when @sep then next_char; break
81
+ when @sep then @flag = @es; next_char; break
60
82
  when @quote then match << @quote
61
83
  when @cr,@lf,nil then break
62
84
  else bomb "unexpected character after quote"
@@ -71,20 +93,19 @@ class Censive < StringScanner
71
93
  else # consume_unquoted_cell
72
94
  match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
73
95
  @char = string[pos]
74
- @char == @sep and next_char
96
+ @char == @sep and @flag = @es and next_char
75
97
  match
76
98
  end
77
99
  end
78
100
 
79
101
  def bomb(msg)
80
- abort "censive: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
102
+ abort "#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
81
103
  end
82
104
 
83
105
  # ==[ Parser ]==
84
106
 
85
107
  def parse
86
- @rows = []
87
- @cols = @cells = 0
108
+ @rows ||= []
88
109
  while row = next_row
89
110
  @rows << row
90
111
  size = row.size
@@ -103,6 +124,50 @@ class Censive < StringScanner
103
124
 
104
125
  # ==[ Helpers ]==
105
126
 
127
+ # grok returns 2 (seps and quotes), 1 (seps only), 0 (neither)
128
+ def grok(str)
129
+ if pos = str.index(/(#{@quote})|#{@sep}/o)
130
+ $1 ? 2 : str.index(/#{@quote}/o, pos) ? 2 : 1
131
+ else
132
+ 0
133
+ end
134
+ end
135
+
136
+ def <<(row)
137
+ @out or return super
138
+
139
+ # most compact export format
140
+ s,q = @sep, @quote
141
+ out = case @mode
142
+ when :compact
143
+ case grok(row.join)
144
+ when 0 then row
145
+ when 1 then row.map {|col| col.include?(s) ? "#{q}#{col}#{q}" : col }
146
+ else
147
+ row.map do |col|
148
+ case grok(col)
149
+ when 0 then col
150
+ when 1 then "#{q}#{col}#{q}"
151
+ else "#{q}#{col.gsub(q, @esc)}#{q}"
152
+ end
153
+ end
154
+ end
155
+ when :full
156
+ row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
157
+ end.join(s)
158
+
159
+ #!# TODO: allow an option to remove trailing seps in the output
160
+ # out.gsub!(/#{s}+\z/,'')
161
+
162
+ #!# TODO: allow these line endings to be configurable
163
+ @out << out + @lf
164
+ end
165
+
166
+ def each
167
+ @rows ||= parse
168
+ @rows.each {|row| yield row }
169
+ end
170
+
106
171
  def stats
107
172
  wide = string.size.to_s.size
108
173
  puts "%#{wide}d rows" % @rows.size
@@ -112,22 +177,31 @@ class Censive < StringScanner
112
177
  end
113
178
  end
114
179
 
115
- # ==[ Test it out... ]==
180
+ # ==[ Playground... ]==
116
181
 
117
- ARGV << "z.csv" if ARGV.empty?
182
+ data = File.read('1.csv')
118
183
 
119
- case 1
120
- when 1
121
- path = ARGV.first
122
- data = File.read(path)
123
- when 2
124
- data = DATA.gets("\n\n").rstrip
184
+ Censive.writer('out.csv', sep: ',', quote: "'", mode: :compact) do |out|
185
+ Censive.new(data).each do |row|
186
+ out << row
187
+ end
125
188
  end
126
189
 
127
- STDOUT.sync = true
128
-
129
- csv = Censive.new(data)
130
-
131
- data.size > 1e6 ? csv.parse : csv.parse.each {|cols| p cols }
132
-
133
- csv.stats
190
+ #
191
+ # ARGV << "z.csv" if ARGV.empty?
192
+ #
193
+ # case 1
194
+ # when 1
195
+ # path = ARGV.first
196
+ # data = File.read(path)
197
+ # when 2
198
+ # data = DATA.gets("\n\n").rstrip
199
+ # end
200
+ #
201
+ # STDOUT.sync = true
202
+ #
203
+ # csv = Censive.new(data)
204
+ #
205
+ # data.size > 1e6 ? csv.parse : csv.parse.each {|cols| p cols }
206
+ #
207
+ # csv.stats
@@ -0,0 +1,3 @@
1
+ age name
2
+ 5 'Mike the ''man!'''
3
+ 10 Tommy
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: censive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.1'
4
+ version: '0.4'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steve Shreeve
@@ -20,6 +20,7 @@ files:
20
20
  - README.md
21
21
  - censive.gemspec
22
22
  - lib/censive.rb
23
+ - test/a-uses-tabs-and-single-quotes-and-no-trailing-newline.csv
23
24
  homepage: https://github.com/shreeve/censive
24
25
  licenses:
25
26
  - MIT