censive 0.14 → 0.15

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/censive.gemspec +1 -2
  3. data/lib/censive.rb +48 -57
  4. metadata +2 -16
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '072390db5028d3fc9f2bf9c22f95616dea749a502c6f010c25f222f0f8bd86ec'
4
- data.tar.gz: 012b5549ca2454f22db2b5bdc225e4a703c622d6a636ddea2f31bd6380556164
3
+ metadata.gz: 4211eb0036bcbadf9d1683f10084bb8cbb109d26845d6990778e159b634b8d00
4
+ data.tar.gz: 1f97a3e5343958a86d54b7f1f87ada5d05a8f091d597253c3079c1a15eeb4dfa
5
5
  SHA512:
6
- metadata.gz: 285efce01bbdee5e2f2505757342de56918025a2f2a146e1ec5ba9677a2c27cb8698ef0e68170fdeb74cf7e7a82665ea7e533732b813a8c39680bff7c3c22674
7
- data.tar.gz: 0f36dcba5cac1f608db296fa1728dc87fc50134d1d356bc1d699533c75b02c8eb4dcfa5b33326b68d78f4bb9edaff39d7da975c04eaa6549b1536e2e113bbb61
6
+ metadata.gz: f3b8e0ee3cf2d8eadbb26d6be5b590418a844561062779f47eec0863b87cfbc65868a8c89624865eb74ee7f6825ee3fb1d2d26611353a30f805d40d567c1f1ac
7
+ data.tar.gz: 949e07f3f2208cbecc868a85c7bde5e67f38bf570ae1708496fd7745703c3fc133e8d48fc03c5c18c85264c0bff5b394d5ce2c2b2894daa8b7ca9033f9d68828
data/censive.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "censive"
5
- s.version = "0.14"
5
+ s.version = "0.15"
6
6
  s.author = "Steve Shreeve"
7
7
  s.email = "steve.shreeve@gmail.com"
8
8
  s.summary =
@@ -11,5 +11,4 @@ Gem::Specification.new do |s|
11
11
  s.license = "MIT"
12
12
  s.files = `git ls-files`.split("\n") - %w[.gitignore]
13
13
  s.executables = `(cd bin 2>&1) > /dev/null && git ls-files .`.split("\n")
14
- s.add_dependency "strscan", ">= 3.0.6"
15
14
  end
data/lib/censive.rb CHANGED
@@ -4,12 +4,12 @@
4
4
  # censive - A quick and lightweight CSV handling library for Ruby
5
5
  #
6
6
  # Author: Steve Shreeve (steve.shreeve@gmail.com)
7
- # Date: Feb 4, 2023
7
+ # Date: Feb 5, 2023
8
8
  #
9
9
  # https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
10
10
  # https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
11
- # https://github.com/ruby/strscan/issues/53 for details
12
- # https://github.com/ruby/strscan/pull/54 for code
11
+ #
12
+ # Thanks to Sutou Kouhei (kou) for his excellent advice on using scan
13
13
  # ============================================================================
14
14
  # GOALS:
15
15
  # 1. Faster than Ruby's default CSV library
@@ -19,7 +19,6 @@
19
19
  # TODO: Support IO streaming
20
20
  # ============================================================================
21
21
 
22
- require "bundler/setup"
23
22
  require "strscan"
24
23
 
25
24
  class Censive < StringScanner
@@ -33,41 +32,44 @@ class Censive < StringScanner
33
32
  end
34
33
 
35
34
  def initialize(str=nil,
36
- drop: false , # drop trailing empty fields?
37
- eol: "\n" , # line endings for exports
38
- excel: false , # literals ="01" formulas =A1 + B2 http://bit.ly/3Y7jIvc
39
- mode: :compact, # export mode: compact or full
40
- out: nil , # output stream, needs to respond to <<
41
- quote: '"' , # quote character
42
- relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
43
- sep: "," , # column separator character
44
- strip: false , # strip fields when reading
45
- **opts # grab bag
35
+ drop: false , # drop trailing empty fields?
36
+ excel: false , # literals ="01" formulas =A1 + B2 http://bit.ly/3Y7jIvc
37
+ mode: :compact, # export mode: compact or full
38
+ out: $stdout , # output stream, needs to respond to <<
39
+ quote: '"' , # quote character
40
+ relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
41
+ rowsep: "\n" , # row separator for export
42
+ sep: "," , # column separator character
43
+ strip: false , # strip fields when reading
44
+ **opts # grab bag
46
45
  )
47
46
  super(str || "")
48
47
  reset
49
48
 
50
- @drop = drop
51
- @eol = eol
52
- @excel = excel
53
- @mode = mode
54
- @out = out || $stdout
55
- @quote = quote
56
- @relax = relax
57
- @sep = sep
58
- @strip = strip
59
-
60
- @cr = "\r"
61
- @lf = "\n"
62
- @es = ""
63
- @eq = "="
64
- @esc = (@quote * 2)
49
+ # options
50
+ @drop = drop
51
+ @excel = excel
52
+ @mode = mode
53
+ @out = out
54
+ @quote = quote
55
+ @relax = relax
56
+ @rowsep = rowsep
57
+ @sep = sep
58
+ @strip = strip
59
+
60
+ # determined
61
+ @cr = "\r"
62
+ @lf = "\n"
63
+ @es = ""
64
+ @eq = "="
65
+ @esc = (@quote * 2)
66
+ @eol = /#{@cr}#{@lf}?|#{@lf}|\z/o # end of line
67
+ @eoc = /(?=#{"\\" + @sep}|#{@cr}|#{@lf}|\z)/o # end of cell
65
68
  end
66
69
 
67
70
  def reset(str=nil)
68
71
  self.string = str if str
69
72
  super()
70
- @char = currchar
71
73
  @rows = nil
72
74
  @cols = @cells = 0
73
75
  end
@@ -75,36 +77,26 @@ class Censive < StringScanner
75
77
  # ==[ Lexer ]==
76
78
 
77
79
  def next_token
78
- if @excel && @char == @eq
79
- excel = true
80
- @char = nextchar
81
- end
80
+ excel = true if @excel && scan(@eq)
82
81
 
83
- if @char == @quote # consume quoted cell
82
+ if scan(@quote) # consume quoted cell
84
83
  token = ""
85
84
  while true
86
- @char = nextchar
87
- token << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
88
- token << @quote and next if (@char = nextchar) == @quote
89
- break if [@sep,@cr,@lf,@es,nil].include?(@char)
85
+ token << (scan_until(/#{@quote}/o) or bomb "unclosed quote")[0..-2]
86
+ token << @quote and next if scan(@quote)
87
+ break if scan(@eoc)
90
88
  @relax or bomb "invalid character after quote"
91
- token << @quote + scan_until(/(?=#{@quote})/o) + @quote
92
- end
93
- @char = nextchar if @char == @sep
94
- @strip ? token.strip : token
95
- elsif [@sep,@cr,@lf,@es,nil].include?(@char)
96
- case @char
97
- when @sep then @char = nextchar ; @es
98
- when @cr then (@char = nextchar) == @lf and @char = nextchar; nil
99
- when @lf then @char = nextchar ; nil
100
- else nil
89
+ quoted = scan_until(/#{@quote}/o) or bomb "invalid inline quote"
90
+ token << @quote + quoted + @quote
101
91
  end
92
+ elsif scan(@sep) then return @es
93
+ elsif scan(@eol) then return nil
102
94
  else # consume unquoted cell
103
- token = scan_until(/(?=#{"\\"+@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
95
+ token = scan_until(@eoc) or bomb "unexpected character"
104
96
  token.prepend(@eq) if excel
105
- @char = nextchar if (@char = currchar) == @sep
106
- @strip ? token.strip : token
107
97
  end
98
+ scan(@sep)
99
+ @strip ? token.strip : token
108
100
  end
109
101
 
110
102
  def bomb(msg)
@@ -178,7 +170,7 @@ class Censive < StringScanner
178
170
  end
179
171
  end.join(s)
180
172
 
181
- @out << out + @eol
173
+ @out << out + @rowsep
182
174
  end
183
175
 
184
176
  def each
@@ -201,11 +193,10 @@ class Censive < StringScanner
201
193
  end
202
194
 
203
195
  if __FILE__ == $0
204
- raw = DATA.read
205
- # raw = DATA.gets("\n\n").chomp
196
+ raw = DATA.gets("\n\n").chomp
206
197
  # raw = File.read(ARGV.first || "lc-2023.csv")
207
- csv = Censive.new(raw, excel: true, relax: true, strip: true)
208
- csv.export(sep: "|", excel: true)
198
+ csv = Censive.new(raw, excel: true, relax: true)
199
+ csv.export # (sep: ",", excel: true)
209
200
  end
210
201
 
211
202
  __END__
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: censive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.14'
4
+ version: '0.15'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steve Shreeve
@@ -9,21 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
  date: 2023-02-05 00:00:00.000000000 Z
12
- dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: strscan
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ">="
18
- - !ruby/object:Gem::Version
19
- version: 3.0.6
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ">="
25
- - !ruby/object:Gem::Version
26
- version: 3.0.6
12
+ dependencies: []
27
13
  description: A quick and lightweight CSV handling library for Ruby
28
14
  email: steve.shreeve@gmail.com
29
15
  executables: []