censive 0.13 → 0.15

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +21 -2
  3. data/censive.gemspec +1 -2
  4. data/lib/censive.rb +52 -68
  5. metadata +3 -17
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f3d6b5bd88adf5c40972cd5a978b49e57a7a8724780eeaaf80b0cb749323dc1d
4
- data.tar.gz: c24b0efa60901ca6d8de7e8314a3b137c981601b866d837123a96722acf826db
3
+ metadata.gz: 4211eb0036bcbadf9d1683f10084bb8cbb109d26845d6990778e159b634b8d00
4
+ data.tar.gz: 1f97a3e5343958a86d54b7f1f87ada5d05a8f091d597253c3079c1a15eeb4dfa
5
5
  SHA512:
6
- metadata.gz: 3584864597a1631179212563d29ad188a55c2ed90dd1d36c6464e641ee6b5a85c01437516cdf2865c39e9f4f9bfc96605ae318ca456d722afc544e31d87e1801
7
- data.tar.gz: a7c59e427ca7014e0207dc8c3bb8eb1647c41f9193de021dc4a0ee98b913cfa54105f5bbf5301c2ec852feace0ed7d69622008acc2c09697b5ab1e9560fd89d3
6
+ metadata.gz: f3b8e0ee3cf2d8eadbb26d6be5b590418a844561062779f47eec0863b87cfbc65868a8c89624865eb74ee7f6825ee3fb1d2d26611353a30f805d40d567c1f1ac
7
+ data.tar.gz: 949e07f3f2208cbecc868a85c7bde5e67f38bf570ae1708496fd7745703c3fc133e8d48fc03c5c18c85264c0bff5b394d5ce2c2b2894daa8b7ca9033f9d68828
data/README.md CHANGED
@@ -74,6 +74,25 @@ Or, you can be more succinct with:
74
74
  ```ruby
75
75
  require 'censive'
76
76
 
77
- csv = Censive.new(File.read('data.csv'))
78
- csv.export(sep: "\t")
77
+ csv = Censive.new(DATA.read, excel: true, relax: true, strip: true)
78
+ csv.export(sep: "|", excel: true) # pipe separated, protect leading zeroes
79
+
80
+ __END__
81
+ Name,Age, Shoe
82
+ Alice, 27,5
83
+ Bob, 33,10 1/2
84
+ Charlie or "Chuck",=B2 + B3,9
85
+ "Doug E Fresh",="007",001122
86
+ Subtotal,=sum(B2:B5),="01234"
87
+ ```
88
+
89
+ Which returns:
90
+
91
+ ```
92
+ Name|Age|Shoe
93
+ Alice|27|5
94
+ Bob|33|10 1/2
95
+ "Charlie or ""Chuck"""|=B2 + B3|9
96
+ Doug E Fresh|="007"|="001122"
97
+ Subtotal|=sum(B2:B5)|="01234"
79
98
  ```
data/censive.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "censive"
5
- s.version = "0.13"
5
+ s.version = "0.15"
6
6
  s.author = "Steve Shreeve"
7
7
  s.email = "steve.shreeve@gmail.com"
8
8
  s.summary =
@@ -11,5 +11,4 @@ Gem::Specification.new do |s|
11
11
  s.license = "MIT"
12
12
  s.files = `git ls-files`.split("\n") - %w[.gitignore]
13
13
  s.executables = `(cd bin 2>&1) > /dev/null && git ls-files .`.split("\n")
14
- s.add_dependency "strscan", ">= 3.0.6"
15
14
  end
data/lib/censive.rb CHANGED
@@ -4,25 +4,21 @@
4
4
  # censive - A quick and lightweight CSV handling library for Ruby
5
5
  #
6
6
  # Author: Steve Shreeve (steve.shreeve@gmail.com)
7
- # Date: Feb 3, 2023
7
+ # Date: Feb 5, 2023
8
8
  #
9
9
  # https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
10
10
  # https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
11
- # https://github.com/ruby/strscan/issues/53 for details
12
- # https://github.com/ruby/strscan/pull/54 for code
11
+ #
12
+ # Thanks to Sutou Kouhei (kou) for his excellent advice on using scan
13
13
  # ============================================================================
14
14
  # GOALS:
15
15
  # 1. Faster than Ruby's default CSV library
16
- # 2. Lightweight code base with streamlined logic
17
- # 3. Support for most non-compliant CSV variations (eg - @relax, @excel)
16
+ # 2. Lightweight code with streamlined and optimized logic
17
+ # 3. Support most non-compliant CSV variations (eg - @excel, @relax, @strip)
18
18
  #
19
- # TODO:
20
- # 1. Support IO streaming
21
- # 2. Add option to strip whitespace
22
- # 3. Support CSV headers in first row
19
+ # TODO: Support IO streaming
23
20
  # ============================================================================
24
21
 
25
- require "bundler/setup"
26
22
  require "strscan"
27
23
 
28
24
  class Censive < StringScanner
@@ -36,83 +32,71 @@ class Censive < StringScanner
36
32
  end
37
33
 
38
34
  def initialize(str=nil,
39
- drop: false , # drop trailing empty fields?
40
- eol: "\n" , # line endings for exports
41
- excel: false , # literals ="01" formulas =A1 + B2 http://bit.ly/3Y7jIvc
42
- mode: :compact, # export mode: compact or full
43
- out: nil , # output stream, needs to respond to <<
44
- quote: '"' , # quote character
45
- relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
46
- sep: "," , # column separator character
47
- **opts # grab bag
35
+ drop: false , # drop trailing empty fields?
36
+ excel: false , # literals ="01" formulas =A1 + B2 http://bit.ly/3Y7jIvc
37
+ mode: :compact, # export mode: compact or full
38
+ out: $stdout , # output stream, needs to respond to <<
39
+ quote: '"' , # quote character
40
+ relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
41
+ rowsep: "\n" , # row separator for export
42
+ sep: "," , # column separator character
43
+ strip: false , # strip fields when reading
44
+ **opts # grab bag
48
45
  )
49
46
  super(str || "")
50
47
  reset
51
48
 
52
- @drop = drop
53
- @eol = eol
54
- @excel = excel
55
- @mode = mode
56
- @out = out || $stdout
57
- @quote = quote
58
- @relax = relax
59
- @sep = sep
60
-
61
- @cr = "\r"
62
- @lf = "\n"
63
- @es = ""
64
- @eq = "="
65
- @esc = (@quote * 2)
49
+ # options
50
+ @drop = drop
51
+ @excel = excel
52
+ @mode = mode
53
+ @out = out
54
+ @quote = quote
55
+ @relax = relax
56
+ @rowsep = rowsep
57
+ @sep = sep
58
+ @strip = strip
59
+
60
+ # determined
61
+ @cr = "\r"
62
+ @lf = "\n"
63
+ @es = ""
64
+ @eq = "="
65
+ @esc = (@quote * 2)
66
+ @eol = /#{@cr}#{@lf}?|#{@lf}|\z/o # end of line
67
+ @eoc = /(?=#{"\\" + @sep}|#{@cr}|#{@lf}|\z)/o # end of cell
66
68
  end
67
69
 
68
70
  def reset(str=nil)
69
71
  self.string = str if str
70
72
  super()
71
- @char = curr_char
72
73
  @rows = nil
73
74
  @cols = @cells = 0
74
75
  end
75
76
 
76
77
  # ==[ Lexer ]==
77
78
 
78
- # pure ruby versions for debugging
79
- # def curr_char; @char = string[pos]; end
80
- # def next_char; scan(/./m); @char = string[pos]; end
81
-
82
- def curr_char; @char = currchar; end
83
- def next_char; @char = nextchar; end
84
-
85
79
  def next_token
86
- if @excel && @char == @eq
87
- excel = true
88
- next_char
89
- end
80
+ excel = true if @excel && scan(@eq)
90
81
 
91
- if @char == @quote # consume quoted cell
92
- match = ""
82
+ if scan(@quote) # consume quoted cell
83
+ token = ""
93
84
  while true
94
- next_char
95
- match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
96
- match << @quote and next if next_char == @quote
97
- break if [@sep,@cr,@lf,@es,nil].include?(@char)
85
+ token << (scan_until(/#{@quote}/o) or bomb "unclosed quote")[0..-2]
86
+ token << @quote and next if scan(@quote)
87
+ break if scan(@eoc)
98
88
  @relax or bomb "invalid character after quote"
99
- match << @quote + scan_until(/(?=#{@quote})/o) + @quote
100
- end
101
- next_char if @char == @sep
102
- match
103
- elsif [@sep,@cr,@lf,@es,nil].include?(@char)
104
- case @char
105
- when @sep then next_char; @es
106
- when @cr then next_char == @lf and next_char; nil
107
- when @lf then next_char; nil
108
- else nil
89
+ quoted = scan_until(/#{@quote}/o) or bomb "invalid inline quote"
90
+ token << @quote + quoted + @quote
109
91
  end
92
+ elsif scan(@sep) then return @es
93
+ elsif scan(@eol) then return nil
110
94
  else # consume unquoted cell
111
- match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
112
- match.prepend(@eq) if excel
113
- next_char if curr_char == @sep
114
- match
95
+ token = scan_until(@eoc) or bomb "unexpected character"
96
+ token.prepend(@eq) if excel
115
97
  end
98
+ scan(@sep)
99
+ @strip ? token.strip : token
116
100
  end
117
101
 
118
102
  def bomb(msg)
@@ -143,7 +127,7 @@ class Censive < StringScanner
143
127
 
144
128
  # returns 2 (must be quoted and escaped), 1 (must be quoted), 0 (neither)
145
129
  def grok(str)
146
- if idx = str.index(/(#{@quote})|#{@sep}|#{@cr}|#{@lf}/o) #!# FIXME: regex injection?
130
+ if idx = str.index(/(#{@quote})|#{"\\"+@sep}|#{@cr}|#{@lf}/o)
147
131
  $1 ? 2 : str.index(/#{@quote}/o, idx) ? 2 : 1
148
132
  else
149
133
  0
@@ -164,7 +148,7 @@ class Censive < StringScanner
164
148
  row
165
149
  when 1
166
150
  row.map do |col|
167
- col.match?(/#{@sep}|#{@cr}|#{@lf}/o) ? "#{q}#{col}#{q}" : col
151
+ col.match?(/#{"\\"+@sep}|#{@cr}|#{@lf}/o) ? "#{q}#{col}#{q}" : col
168
152
  end
169
153
  else
170
154
  row.map do |col|
@@ -186,7 +170,7 @@ class Censive < StringScanner
186
170
  end
187
171
  end.join(s)
188
172
 
189
- @out << out + @eol
173
+ @out << out + @rowsep
190
174
  end
191
175
 
192
176
  def each
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: censive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.13'
4
+ version: '0.15'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steve Shreeve
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-04 00:00:00.000000000 Z
12
- dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: strscan
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ">="
18
- - !ruby/object:Gem::Version
19
- version: 3.0.6
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ">="
25
- - !ruby/object:Gem::Version
26
- version: 3.0.6
11
+ date: 2023-02-05 00:00:00.000000000 Z
12
+ dependencies: []
27
13
  description: A quick and lightweight CSV handling library for Ruby
28
14
  email: steve.shreeve@gmail.com
29
15
  executables: []