censive 0.13 → 0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +21 -2
  3. data/censive.gemspec +1 -1
  4. data/lib/censive.rb +29 -36
  5. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f3d6b5bd88adf5c40972cd5a978b49e57a7a8724780eeaaf80b0cb749323dc1d
4
- data.tar.gz: c24b0efa60901ca6d8de7e8314a3b137c981601b866d837123a96722acf826db
3
+ metadata.gz: '072390db5028d3fc9f2bf9c22f95616dea749a502c6f010c25f222f0f8bd86ec'
4
+ data.tar.gz: 012b5549ca2454f22db2b5bdc225e4a703c622d6a636ddea2f31bd6380556164
5
5
  SHA512:
6
- metadata.gz: 3584864597a1631179212563d29ad188a55c2ed90dd1d36c6464e641ee6b5a85c01437516cdf2865c39e9f4f9bfc96605ae318ca456d722afc544e31d87e1801
7
- data.tar.gz: a7c59e427ca7014e0207dc8c3bb8eb1647c41f9193de021dc4a0ee98b913cfa54105f5bbf5301c2ec852feace0ed7d69622008acc2c09697b5ab1e9560fd89d3
6
+ metadata.gz: 285efce01bbdee5e2f2505757342de56918025a2f2a146e1ec5ba9677a2c27cb8698ef0e68170fdeb74cf7e7a82665ea7e533732b813a8c39680bff7c3c22674
7
+ data.tar.gz: 0f36dcba5cac1f608db296fa1728dc87fc50134d1d356bc1d699533c75b02c8eb4dcfa5b33326b68d78f4bb9edaff39d7da975c04eaa6549b1536e2e113bbb61
data/README.md CHANGED
@@ -74,6 +74,25 @@ Or, you can be more succinct with:
74
74
  ```ruby
75
75
  require 'censive'
76
76
 
77
- csv = Censive.new(File.read('data.csv'))
78
- csv.export(sep: "\t")
77
+ csv = Censive.new(DATA.read, excel: true, relax: true, strip: true)
78
+ csv.export(sep: "|", excel: true) # pipe separated, protect leading zeroes
79
+
80
+ __END__
81
+ Name,Age, Shoe
82
+ Alice, 27,5
83
+ Bob, 33,10 1/2
84
+ Charlie or "Chuck",=B2 + B3,9
85
+ "Doug E Fresh",="007",001122
86
+ Subtotal,=sum(B2:B5),="01234"
87
+ ```
88
+
89
+ Which returns:
90
+
91
+ ```
92
+ Name|Age|Shoe
93
+ Alice|27|5
94
+ Bob|33|10 1/2
95
+ "Charlie or ""Chuck"""|=B2 + B3|9
96
+ Doug E Fresh|="007"|="001122"
97
+ Subtotal|=sum(B2:B5)|="01234"
79
98
  ```
data/censive.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "censive"
5
- s.version = "0.13"
5
+ s.version = "0.14"
6
6
  s.author = "Steve Shreeve"
7
7
  s.email = "steve.shreeve@gmail.com"
8
8
  s.summary =
data/lib/censive.rb CHANGED
@@ -4,7 +4,7 @@
4
4
  # censive - A quick and lightweight CSV handling library for Ruby
5
5
  #
6
6
  # Author: Steve Shreeve (steve.shreeve@gmail.com)
7
- # Date: Feb 3, 2023
7
+ # Date: Feb 4, 2023
8
8
  #
9
9
  # https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
10
10
  # https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
@@ -13,13 +13,10 @@
13
13
  # ============================================================================
14
14
  # GOALS:
15
15
  # 1. Faster than Ruby's default CSV library
16
- # 2. Lightweight code base with streamlined logic
17
- # 3. Support for most non-compliant CSV variations (eg - @relax, @excel)
16
+ # 2. Lightweight code with streamlined and optimized logic
17
+ # 3. Support most non-compliant CSV variations (eg - @excel, @relax, @strip)
18
18
  #
19
- # TODO:
20
- # 1. Support IO streaming
21
- # 2. Add option to strip whitespace
22
- # 3. Support CSV headers in first row
19
+ # TODO: Support IO streaming
23
20
  # ============================================================================
24
21
 
25
22
  require "bundler/setup"
@@ -44,6 +41,7 @@ class Censive < StringScanner
44
41
  quote: '"' , # quote character
45
42
  relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
46
43
  sep: "," , # column separator character
44
+ strip: false , # strip fields when reading
47
45
  **opts # grab bag
48
46
  )
49
47
  super(str || "")
@@ -57,6 +55,7 @@ class Censive < StringScanner
57
55
  @quote = quote
58
56
  @relax = relax
59
57
  @sep = sep
58
+ @strip = strip
60
59
 
61
60
  @cr = "\r"
62
61
  @lf = "\n"
@@ -68,50 +67,43 @@ class Censive < StringScanner
68
67
  def reset(str=nil)
69
68
  self.string = str if str
70
69
  super()
71
- @char = curr_char
70
+ @char = currchar
72
71
  @rows = nil
73
72
  @cols = @cells = 0
74
73
  end
75
74
 
76
75
  # ==[ Lexer ]==
77
76
 
78
- # pure ruby versions for debugging
79
- # def curr_char; @char = string[pos]; end
80
- # def next_char; scan(/./m); @char = string[pos]; end
81
-
82
- def curr_char; @char = currchar; end
83
- def next_char; @char = nextchar; end
84
-
85
77
  def next_token
86
78
  if @excel && @char == @eq
87
79
  excel = true
88
- next_char
80
+ @char = nextchar
89
81
  end
90
82
 
91
83
  if @char == @quote # consume quoted cell
92
- match = ""
84
+ token = ""
93
85
  while true
94
- next_char
95
- match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
96
- match << @quote and next if next_char == @quote
86
+ @char = nextchar
87
+ token << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
88
+ token << @quote and next if (@char = nextchar) == @quote
97
89
  break if [@sep,@cr,@lf,@es,nil].include?(@char)
98
90
  @relax or bomb "invalid character after quote"
99
- match << @quote + scan_until(/(?=#{@quote})/o) + @quote
91
+ token << @quote + scan_until(/(?=#{@quote})/o) + @quote
100
92
  end
101
- next_char if @char == @sep
102
- match
93
+ @char = nextchar if @char == @sep
94
+ @strip ? token.strip : token
103
95
  elsif [@sep,@cr,@lf,@es,nil].include?(@char)
104
96
  case @char
105
- when @sep then next_char; @es
106
- when @cr then next_char == @lf and next_char; nil
107
- when @lf then next_char; nil
108
- else nil
97
+ when @sep then @char = nextchar ; @es
98
+ when @cr then (@char = nextchar) == @lf and @char = nextchar; nil
99
+ when @lf then @char = nextchar ; nil
100
+ else nil
109
101
  end
110
102
  else # consume unquoted cell
111
- match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
112
- match.prepend(@eq) if excel
113
- next_char if curr_char == @sep
114
- match
103
+ token = scan_until(/(?=#{"\\"+@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
104
+ token.prepend(@eq) if excel
105
+ @char = nextchar if (@char = currchar) == @sep
106
+ @strip ? token.strip : token
115
107
  end
116
108
  end
117
109
 
@@ -143,7 +135,7 @@ class Censive < StringScanner
143
135
 
144
136
  # returns 2 (must be quoted and escaped), 1 (must be quoted), 0 (neither)
145
137
  def grok(str)
146
- if idx = str.index(/(#{@quote})|#{@sep}|#{@cr}|#{@lf}/o) #!# FIXME: regex injection?
138
+ if idx = str.index(/(#{@quote})|#{"\\"+@sep}|#{@cr}|#{@lf}/o)
147
139
  $1 ? 2 : str.index(/#{@quote}/o, idx) ? 2 : 1
148
140
  else
149
141
  0
@@ -164,7 +156,7 @@ class Censive < StringScanner
164
156
  row
165
157
  when 1
166
158
  row.map do |col|
167
- col.match?(/#{@sep}|#{@cr}|#{@lf}/o) ? "#{q}#{col}#{q}" : col
159
+ col.match?(/#{"\\"+@sep}|#{@cr}|#{@lf}/o) ? "#{q}#{col}#{q}" : col
168
160
  end
169
161
  else
170
162
  row.map do |col|
@@ -209,10 +201,11 @@ class Censive < StringScanner
209
201
  end
210
202
 
211
203
  if __FILE__ == $0
212
- raw = DATA.gets("\n\n").chomp
204
+ raw = DATA.read
205
+ # raw = DATA.gets("\n\n").chomp
213
206
  # raw = File.read(ARGV.first || "lc-2023.csv")
214
- csv = Censive.new(raw, excel: true, relax: true)
215
- csv.export # (sep: ",", excel: true)
207
+ csv = Censive.new(raw, excel: true, relax: true, strip: true)
208
+ csv.export(sep: "|", excel: true)
216
209
  end
217
210
 
218
211
  __END__
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: censive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.13'
4
+ version: '0.14'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steve Shreeve
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-04 00:00:00.000000000 Z
11
+ date: 2023-02-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: strscan