censive 0.13 → 0.14

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +21 -2
  3. data/censive.gemspec +1 -1
  4. data/lib/censive.rb +29 -36
  5. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f3d6b5bd88adf5c40972cd5a978b49e57a7a8724780eeaaf80b0cb749323dc1d
4
- data.tar.gz: c24b0efa60901ca6d8de7e8314a3b137c981601b866d837123a96722acf826db
3
+ metadata.gz: '072390db5028d3fc9f2bf9c22f95616dea749a502c6f010c25f222f0f8bd86ec'
4
+ data.tar.gz: 012b5549ca2454f22db2b5bdc225e4a703c622d6a636ddea2f31bd6380556164
5
5
  SHA512:
6
- metadata.gz: 3584864597a1631179212563d29ad188a55c2ed90dd1d36c6464e641ee6b5a85c01437516cdf2865c39e9f4f9bfc96605ae318ca456d722afc544e31d87e1801
7
- data.tar.gz: a7c59e427ca7014e0207dc8c3bb8eb1647c41f9193de021dc4a0ee98b913cfa54105f5bbf5301c2ec852feace0ed7d69622008acc2c09697b5ab1e9560fd89d3
6
+ metadata.gz: 285efce01bbdee5e2f2505757342de56918025a2f2a146e1ec5ba9677a2c27cb8698ef0e68170fdeb74cf7e7a82665ea7e533732b813a8c39680bff7c3c22674
7
+ data.tar.gz: 0f36dcba5cac1f608db296fa1728dc87fc50134d1d356bc1d699533c75b02c8eb4dcfa5b33326b68d78f4bb9edaff39d7da975c04eaa6549b1536e2e113bbb61
data/README.md CHANGED
@@ -74,6 +74,25 @@ Or, you can be more succinct with:
74
74
  ```ruby
75
75
  require 'censive'
76
76
 
77
- csv = Censive.new(File.read('data.csv'))
78
- csv.export(sep: "\t")
77
+ csv = Censive.new(DATA.read, excel: true, relax: true, strip: true)
78
+ csv.export(sep: "|", excel: true) # pipe separated, protect leading zeroes
79
+
80
+ __END__
81
+ Name,Age, Shoe
82
+ Alice, 27,5
83
+ Bob, 33,10 1/2
84
+ Charlie or "Chuck",=B2 + B3,9
85
+ "Doug E Fresh",="007",001122
86
+ Subtotal,=sum(B2:B5),="01234"
87
+ ```
88
+
89
+ Which returns:
90
+
91
+ ```
92
+ Name|Age|Shoe
93
+ Alice|27|5
94
+ Bob|33|10 1/2
95
+ "Charlie or ""Chuck"""|=B2 + B3|9
96
+ Doug E Fresh|="007"|="001122"
97
+ Subtotal|=sum(B2:B5)|="01234"
79
98
  ```
data/censive.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "censive"
5
- s.version = "0.13"
5
+ s.version = "0.14"
6
6
  s.author = "Steve Shreeve"
7
7
  s.email = "steve.shreeve@gmail.com"
8
8
  s.summary =
data/lib/censive.rb CHANGED
@@ -4,7 +4,7 @@
4
4
  # censive - A quick and lightweight CSV handling library for Ruby
5
5
  #
6
6
  # Author: Steve Shreeve (steve.shreeve@gmail.com)
7
- # Date: Feb 3, 2023
7
+ # Date: Feb 4, 2023
8
8
  #
9
9
  # https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
10
10
  # https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
@@ -13,13 +13,10 @@
13
13
  # ============================================================================
14
14
  # GOALS:
15
15
  # 1. Faster than Ruby's default CSV library
16
- # 2. Lightweight code base with streamlined logic
17
- # 3. Support for most non-compliant CSV variations (eg - @relax, @excel)
16
+ # 2. Lightweight code with streamlined and optimized logic
17
+ # 3. Support most non-compliant CSV variations (eg - @excel, @relax, @strip)
18
18
  #
19
- # TODO:
20
- # 1. Support IO streaming
21
- # 2. Add option to strip whitespace
22
- # 3. Support CSV headers in first row
19
+ # TODO: Support IO streaming
23
20
  # ============================================================================
24
21
 
25
22
  require "bundler/setup"
@@ -44,6 +41,7 @@ class Censive < StringScanner
44
41
  quote: '"' , # quote character
45
42
  relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
46
43
  sep: "," , # column separator character
44
+ strip: false , # strip fields when reading
47
45
  **opts # grab bag
48
46
  )
49
47
  super(str || "")
@@ -57,6 +55,7 @@ class Censive < StringScanner
57
55
  @quote = quote
58
56
  @relax = relax
59
57
  @sep = sep
58
+ @strip = strip
60
59
 
61
60
  @cr = "\r"
62
61
  @lf = "\n"
@@ -68,50 +67,43 @@ class Censive < StringScanner
68
67
  def reset(str=nil)
69
68
  self.string = str if str
70
69
  super()
71
- @char = curr_char
70
+ @char = currchar
72
71
  @rows = nil
73
72
  @cols = @cells = 0
74
73
  end
75
74
 
76
75
  # ==[ Lexer ]==
77
76
 
78
- # pure ruby versions for debugging
79
- # def curr_char; @char = string[pos]; end
80
- # def next_char; scan(/./m); @char = string[pos]; end
81
-
82
- def curr_char; @char = currchar; end
83
- def next_char; @char = nextchar; end
84
-
85
77
  def next_token
86
78
  if @excel && @char == @eq
87
79
  excel = true
88
- next_char
80
+ @char = nextchar
89
81
  end
90
82
 
91
83
  if @char == @quote # consume quoted cell
92
- match = ""
84
+ token = ""
93
85
  while true
94
- next_char
95
- match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
96
- match << @quote and next if next_char == @quote
86
+ @char = nextchar
87
+ token << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
88
+ token << @quote and next if (@char = nextchar) == @quote
97
89
  break if [@sep,@cr,@lf,@es,nil].include?(@char)
98
90
  @relax or bomb "invalid character after quote"
99
- match << @quote + scan_until(/(?=#{@quote})/o) + @quote
91
+ token << @quote + scan_until(/(?=#{@quote})/o) + @quote
100
92
  end
101
- next_char if @char == @sep
102
- match
93
+ @char = nextchar if @char == @sep
94
+ @strip ? token.strip : token
103
95
  elsif [@sep,@cr,@lf,@es,nil].include?(@char)
104
96
  case @char
105
- when @sep then next_char; @es
106
- when @cr then next_char == @lf and next_char; nil
107
- when @lf then next_char; nil
108
- else nil
97
+ when @sep then @char = nextchar ; @es
98
+ when @cr then (@char = nextchar) == @lf and @char = nextchar; nil
99
+ when @lf then @char = nextchar ; nil
100
+ else nil
109
101
  end
110
102
  else # consume unquoted cell
111
- match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
112
- match.prepend(@eq) if excel
113
- next_char if curr_char == @sep
114
- match
103
+ token = scan_until(/(?=#{"\\"+@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
104
+ token.prepend(@eq) if excel
105
+ @char = nextchar if (@char = currchar) == @sep
106
+ @strip ? token.strip : token
115
107
  end
116
108
  end
117
109
 
@@ -143,7 +135,7 @@ class Censive < StringScanner
143
135
 
144
136
  # returns 2 (must be quoted and escaped), 1 (must be quoted), 0 (neither)
145
137
  def grok(str)
146
- if idx = str.index(/(#{@quote})|#{@sep}|#{@cr}|#{@lf}/o) #!# FIXME: regex injection?
138
+ if idx = str.index(/(#{@quote})|#{"\\"+@sep}|#{@cr}|#{@lf}/o)
147
139
  $1 ? 2 : str.index(/#{@quote}/o, idx) ? 2 : 1
148
140
  else
149
141
  0
@@ -164,7 +156,7 @@ class Censive < StringScanner
164
156
  row
165
157
  when 1
166
158
  row.map do |col|
167
- col.match?(/#{@sep}|#{@cr}|#{@lf}/o) ? "#{q}#{col}#{q}" : col
159
+ col.match?(/#{"\\"+@sep}|#{@cr}|#{@lf}/o) ? "#{q}#{col}#{q}" : col
168
160
  end
169
161
  else
170
162
  row.map do |col|
@@ -209,10 +201,11 @@ class Censive < StringScanner
209
201
  end
210
202
 
211
203
  if __FILE__ == $0
212
- raw = DATA.gets("\n\n").chomp
204
+ raw = DATA.read
205
+ # raw = DATA.gets("\n\n").chomp
213
206
  # raw = File.read(ARGV.first || "lc-2023.csv")
214
- csv = Censive.new(raw, excel: true, relax: true)
215
- csv.export # (sep: ",", excel: true)
207
+ csv = Censive.new(raw, excel: true, relax: true, strip: true)
208
+ csv.export(sep: "|", excel: true)
216
209
  end
217
210
 
218
211
  __END__
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: censive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.13'
4
+ version: '0.14'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steve Shreeve
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-04 00:00:00.000000000 Z
11
+ date: 2023-02-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: strscan