censive 0.13 → 0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +21 -2
  3. data/censive.gemspec +1 -2
  4. data/lib/censive.rb +52 -68
  5. metadata +3 -17
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f3d6b5bd88adf5c40972cd5a978b49e57a7a8724780eeaaf80b0cb749323dc1d
4
- data.tar.gz: c24b0efa60901ca6d8de7e8314a3b137c981601b866d837123a96722acf826db
3
+ metadata.gz: 4211eb0036bcbadf9d1683f10084bb8cbb109d26845d6990778e159b634b8d00
4
+ data.tar.gz: 1f97a3e5343958a86d54b7f1f87ada5d05a8f091d597253c3079c1a15eeb4dfa
5
5
  SHA512:
6
- metadata.gz: 3584864597a1631179212563d29ad188a55c2ed90dd1d36c6464e641ee6b5a85c01437516cdf2865c39e9f4f9bfc96605ae318ca456d722afc544e31d87e1801
7
- data.tar.gz: a7c59e427ca7014e0207dc8c3bb8eb1647c41f9193de021dc4a0ee98b913cfa54105f5bbf5301c2ec852feace0ed7d69622008acc2c09697b5ab1e9560fd89d3
6
+ metadata.gz: f3b8e0ee3cf2d8eadbb26d6be5b590418a844561062779f47eec0863b87cfbc65868a8c89624865eb74ee7f6825ee3fb1d2d26611353a30f805d40d567c1f1ac
7
+ data.tar.gz: 949e07f3f2208cbecc868a85c7bde5e67f38bf570ae1708496fd7745703c3fc133e8d48fc03c5c18c85264c0bff5b394d5ce2c2b2894daa8b7ca9033f9d68828
data/README.md CHANGED
@@ -74,6 +74,25 @@ Or, you can be more succinct with:
74
74
  ```ruby
75
75
  require 'censive'
76
76
 
77
- csv = Censive.new(File.read('data.csv'))
78
- csv.export(sep: "\t")
77
+ csv = Censive.new(DATA.read, excel: true, relax: true, strip: true)
78
+ csv.export(sep: "|", excel: true) # pipe separated, protect leading zeroes
79
+
80
+ __END__
81
+ Name,Age, Shoe
82
+ Alice, 27,5
83
+ Bob, 33,10 1/2
84
+ Charlie or "Chuck",=B2 + B3,9
85
+ "Doug E Fresh",="007",001122
86
+ Subtotal,=sum(B2:B5),="01234"
87
+ ```
88
+
89
+ Which returns:
90
+
91
+ ```
92
+ Name|Age|Shoe
93
+ Alice|27|5
94
+ Bob|33|10 1/2
95
+ "Charlie or ""Chuck"""|=B2 + B3|9
96
+ Doug E Fresh|="007"|="001122"
97
+ Subtotal|=sum(B2:B5)|="01234"
79
98
  ```
data/censive.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "censive"
5
- s.version = "0.13"
5
+ s.version = "0.15"
6
6
  s.author = "Steve Shreeve"
7
7
  s.email = "steve.shreeve@gmail.com"
8
8
  s.summary =
@@ -11,5 +11,4 @@ Gem::Specification.new do |s|
11
11
  s.license = "MIT"
12
12
  s.files = `git ls-files`.split("\n") - %w[.gitignore]
13
13
  s.executables = `(cd bin 2>&1) > /dev/null && git ls-files .`.split("\n")
14
- s.add_dependency "strscan", ">= 3.0.6"
15
14
  end
data/lib/censive.rb CHANGED
@@ -4,25 +4,21 @@
4
4
  # censive - A quick and lightweight CSV handling library for Ruby
5
5
  #
6
6
  # Author: Steve Shreeve (steve.shreeve@gmail.com)
7
- # Date: Feb 3, 2023
7
+ # Date: Feb 5, 2023
8
8
  #
9
9
  # https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
10
10
  # https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
11
- # https://github.com/ruby/strscan/issues/53 for details
12
- # https://github.com/ruby/strscan/pull/54 for code
11
+ #
12
+ # Thanks to Sutou Kouhei (kou) for his excellent advice on using scan
13
13
  # ============================================================================
14
14
  # GOALS:
15
15
  # 1. Faster than Ruby's default CSV library
16
- # 2. Lightweight code base with streamlined logic
17
- # 3. Support for most non-compliant CSV variations (eg - @relax, @excel)
16
+ # 2. Lightweight code with streamlined and optimized logic
17
+ # 3. Support most non-compliant CSV variations (eg - @excel, @relax, @strip)
18
18
  #
19
- # TODO:
20
- # 1. Support IO streaming
21
- # 2. Add option to strip whitespace
22
- # 3. Support CSV headers in first row
19
+ # TODO: Support IO streaming
23
20
  # ============================================================================
24
21
 
25
- require "bundler/setup"
26
22
  require "strscan"
27
23
 
28
24
  class Censive < StringScanner
@@ -36,83 +32,71 @@ class Censive < StringScanner
36
32
  end
37
33
 
38
34
  def initialize(str=nil,
39
- drop: false , # drop trailing empty fields?
40
- eol: "\n" , # line endings for exports
41
- excel: false , # literals ="01" formulas =A1 + B2 http://bit.ly/3Y7jIvc
42
- mode: :compact, # export mode: compact or full
43
- out: nil , # output stream, needs to respond to <<
44
- quote: '"' , # quote character
45
- relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
46
- sep: "," , # column separator character
47
- **opts # grab bag
35
+ drop: false , # drop trailing empty fields?
36
+ excel: false , # literals ="01" formulas =A1 + B2 http://bit.ly/3Y7jIvc
37
+ mode: :compact, # export mode: compact or full
38
+ out: $stdout , # output stream, needs to respond to <<
39
+ quote: '"' , # quote character
40
+ relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
41
+ rowsep: "\n" , # row separator for export
42
+ sep: "," , # column separator character
43
+ strip: false , # strip fields when reading
44
+ **opts # grab bag
48
45
  )
49
46
  super(str || "")
50
47
  reset
51
48
 
52
- @drop = drop
53
- @eol = eol
54
- @excel = excel
55
- @mode = mode
56
- @out = out || $stdout
57
- @quote = quote
58
- @relax = relax
59
- @sep = sep
60
-
61
- @cr = "\r"
62
- @lf = "\n"
63
- @es = ""
64
- @eq = "="
65
- @esc = (@quote * 2)
49
+ # options
50
+ @drop = drop
51
+ @excel = excel
52
+ @mode = mode
53
+ @out = out
54
+ @quote = quote
55
+ @relax = relax
56
+ @rowsep = rowsep
57
+ @sep = sep
58
+ @strip = strip
59
+
60
+ # determined
61
+ @cr = "\r"
62
+ @lf = "\n"
63
+ @es = ""
64
+ @eq = "="
65
+ @esc = (@quote * 2)
66
+ @eol = /#{@cr}#{@lf}?|#{@lf}|\z/o # end of line
67
+ @eoc = /(?=#{"\\" + @sep}|#{@cr}|#{@lf}|\z)/o # end of cell
66
68
  end
67
69
 
68
70
  def reset(str=nil)
69
71
  self.string = str if str
70
72
  super()
71
- @char = curr_char
72
73
  @rows = nil
73
74
  @cols = @cells = 0
74
75
  end
75
76
 
76
77
  # ==[ Lexer ]==
77
78
 
78
- # pure ruby versions for debugging
79
- # def curr_char; @char = string[pos]; end
80
- # def next_char; scan(/./m); @char = string[pos]; end
81
-
82
- def curr_char; @char = currchar; end
83
- def next_char; @char = nextchar; end
84
-
85
79
  def next_token
86
- if @excel && @char == @eq
87
- excel = true
88
- next_char
89
- end
80
+ excel = true if @excel && scan(@eq)
90
81
 
91
- if @char == @quote # consume quoted cell
92
- match = ""
82
+ if scan(@quote) # consume quoted cell
83
+ token = ""
93
84
  while true
94
- next_char
95
- match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
96
- match << @quote and next if next_char == @quote
97
- break if [@sep,@cr,@lf,@es,nil].include?(@char)
85
+ token << (scan_until(/#{@quote}/o) or bomb "unclosed quote")[0..-2]
86
+ token << @quote and next if scan(@quote)
87
+ break if scan(@eoc)
98
88
  @relax or bomb "invalid character after quote"
99
- match << @quote + scan_until(/(?=#{@quote})/o) + @quote
100
- end
101
- next_char if @char == @sep
102
- match
103
- elsif [@sep,@cr,@lf,@es,nil].include?(@char)
104
- case @char
105
- when @sep then next_char; @es
106
- when @cr then next_char == @lf and next_char; nil
107
- when @lf then next_char; nil
108
- else nil
89
+ quoted = scan_until(/#{@quote}/o) or bomb "invalid inline quote"
90
+ token << @quote + quoted + @quote
109
91
  end
92
+ elsif scan(@sep) then return @es
93
+ elsif scan(@eol) then return nil
110
94
  else # consume unquoted cell
111
- match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
112
- match.prepend(@eq) if excel
113
- next_char if curr_char == @sep
114
- match
95
+ token = scan_until(@eoc) or bomb "unexpected character"
96
+ token.prepend(@eq) if excel
115
97
  end
98
+ scan(@sep)
99
+ @strip ? token.strip : token
116
100
  end
117
101
 
118
102
  def bomb(msg)
@@ -143,7 +127,7 @@ class Censive < StringScanner
143
127
 
144
128
  # returns 2 (must be quoted and escaped), 1 (must be quoted), 0 (neither)
145
129
  def grok(str)
146
- if idx = str.index(/(#{@quote})|#{@sep}|#{@cr}|#{@lf}/o) #!# FIXME: regex injection?
130
+ if idx = str.index(/(#{@quote})|#{"\\"+@sep}|#{@cr}|#{@lf}/o)
147
131
  $1 ? 2 : str.index(/#{@quote}/o, idx) ? 2 : 1
148
132
  else
149
133
  0
@@ -164,7 +148,7 @@ class Censive < StringScanner
164
148
  row
165
149
  when 1
166
150
  row.map do |col|
167
- col.match?(/#{@sep}|#{@cr}|#{@lf}/o) ? "#{q}#{col}#{q}" : col
151
+ col.match?(/#{"\\"+@sep}|#{@cr}|#{@lf}/o) ? "#{q}#{col}#{q}" : col
168
152
  end
169
153
  else
170
154
  row.map do |col|
@@ -186,7 +170,7 @@ class Censive < StringScanner
186
170
  end
187
171
  end.join(s)
188
172
 
189
- @out << out + @eol
173
+ @out << out + @rowsep
190
174
  end
191
175
 
192
176
  def each
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: censive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.13'
4
+ version: '0.15'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steve Shreeve
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-04 00:00:00.000000000 Z
12
- dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: strscan
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ">="
18
- - !ruby/object:Gem::Version
19
- version: 3.0.6
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ">="
25
- - !ruby/object:Gem::Version
26
- version: 3.0.6
11
+ date: 2023-02-05 00:00:00.000000000 Z
12
+ dependencies: []
27
13
  description: A quick and lightweight CSV handling library for Ruby
28
14
  email: steve.shreeve@gmail.com
29
15
  executables: []