censive 0.14 → 0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/censive.gemspec +1 -2
  3. data/lib/censive.rb +49 -57
  4. metadata +2 -16
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '072390db5028d3fc9f2bf9c22f95616dea749a502c6f010c25f222f0f8bd86ec'
4
- data.tar.gz: 012b5549ca2454f22db2b5bdc225e4a703c622d6a636ddea2f31bd6380556164
3
+ metadata.gz: 7abd1490426ab80b6b1cc3c7b28a590eb22c5dd565405662dd82bd1c427b013b
4
+ data.tar.gz: adb90312a493b92ccb36e7354153a30f530f33d2b205fd6ebbea49ca1e80efe5
5
5
  SHA512:
6
- metadata.gz: 285efce01bbdee5e2f2505757342de56918025a2f2a146e1ec5ba9677a2c27cb8698ef0e68170fdeb74cf7e7a82665ea7e533732b813a8c39680bff7c3c22674
7
- data.tar.gz: 0f36dcba5cac1f608db296fa1728dc87fc50134d1d356bc1d699533c75b02c8eb4dcfa5b33326b68d78f4bb9edaff39d7da975c04eaa6549b1536e2e113bbb61
6
+ metadata.gz: f603db91e71cce8d72322d9a248f81f625238aa2a7d5107a6849d18ecdb00cf94865f2ad716afde1df79e4d0dd126765968822e3e9d53f3bea09886f2ae08d89
7
+ data.tar.gz: b5fc1fa9b2a309677091334d50200ada240ea2490d23d427cc32774a828a672ce1be1c22e156d21a7ef0ebdfc604f7b003386f46d4bc7fb2b6fdb50dc6d2a857
data/censive.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "censive"
5
- s.version = "0.14"
5
+ s.version = "0.16"
6
6
  s.author = "Steve Shreeve"
7
7
  s.email = "steve.shreeve@gmail.com"
8
8
  s.summary =
@@ -11,5 +11,4 @@ Gem::Specification.new do |s|
11
11
  s.license = "MIT"
12
12
  s.files = `git ls-files`.split("\n") - %w[.gitignore]
13
13
  s.executables = `(cd bin 2>&1) > /dev/null && git ls-files .`.split("\n")
14
- s.add_dependency "strscan", ">= 3.0.6"
15
14
  end
data/lib/censive.rb CHANGED
@@ -4,12 +4,12 @@
4
4
  # censive - A quick and lightweight CSV handling library for Ruby
5
5
  #
6
6
  # Author: Steve Shreeve (steve.shreeve@gmail.com)
7
- # Date: Feb 4, 2023
7
+ # Date: Feb 5, 2023
8
8
  #
9
9
  # https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
10
10
  # https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
11
- # https://github.com/ruby/strscan/issues/53 for details
12
- # https://github.com/ruby/strscan/pull/54 for code
11
+ #
12
+ # Thanks to Sutou Kouhei (kou) for his excellent advice on using scan
13
13
  # ============================================================================
14
14
  # GOALS:
15
15
  # 1. Faster than Ruby's default CSV library
@@ -19,7 +19,6 @@
19
19
  # TODO: Support IO streaming
20
20
  # ============================================================================
21
21
 
22
- require "bundler/setup"
23
22
  require "strscan"
24
23
 
25
24
  class Censive < StringScanner
@@ -33,41 +32,44 @@ class Censive < StringScanner
33
32
  end
34
33
 
35
34
  def initialize(str=nil,
36
- drop: false , # drop trailing empty fields?
37
- eol: "\n" , # line endings for exports
38
- excel: false , # literals ="01" formulas =A1 + B2 http://bit.ly/3Y7jIvc
39
- mode: :compact, # export mode: compact or full
40
- out: nil , # output stream, needs to respond to <<
41
- quote: '"' , # quote character
42
- relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
43
- sep: "," , # column separator character
44
- strip: false , # strip fields when reading
45
- **opts # grab bag
35
+ drop: false , # drop trailing empty fields?
36
+ excel: false , # literals ="01" formulas =A1 + B2 http://bit.ly/3Y7jIvc
37
+ mode: :compact, # export mode: compact or full
38
+ out: $stdout , # output stream, needs to respond to <<
39
+ quote: '"' , # quote character
40
+ relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
41
+ rowsep: "\n" , # row separator for export
42
+ sep: "," , # column separator character
43
+ strip: false , # strip fields when reading
44
+ **opts # grab bag
46
45
  )
47
46
  super(str || "")
48
47
  reset
49
48
 
50
- @drop = drop
51
- @eol = eol
52
- @excel = excel
53
- @mode = mode
54
- @out = out || $stdout
55
- @quote = quote
56
- @relax = relax
57
- @sep = sep
58
- @strip = strip
59
-
60
- @cr = "\r"
61
- @lf = "\n"
62
- @es = ""
63
- @eq = "="
64
- @esc = (@quote * 2)
49
+ # options
50
+ @drop = drop
51
+ @excel = excel
52
+ @mode = mode
53
+ @out = out
54
+ @quote = quote
55
+ @relax = relax
56
+ @rowsep = rowsep
57
+ @sep = sep
58
+ @strip = strip
59
+
60
+ # determined
61
+ @cr = "\r"
62
+ @lf = "\n"
63
+ @es = ""
64
+ @eq = "="
65
+ @esc = (@quote * 2)
66
+ @eol = /#{@cr}#{@lf}?|#{@lf}|\z/o # end of line
67
+ @eoc = /(?=#{"\\" + @sep}|#{@cr}|#{@lf}|\z)/o # end of cell
65
68
  end
66
69
 
67
70
  def reset(str=nil)
68
71
  self.string = str if str
69
72
  super()
70
- @char = currchar
71
73
  @rows = nil
72
74
  @cols = @cells = 0
73
75
  end
@@ -75,36 +77,25 @@ class Censive < StringScanner
75
77
  # ==[ Lexer ]==
76
78
 
77
79
  def next_token
78
- if @excel && @char == @eq
79
- excel = true
80
- @char = nextchar
81
- end
80
+ excel = true if @excel && scan(@eq)
82
81
 
83
- if @char == @quote # consume quoted cell
82
+ if scan(@quote) # consume quoted cell
84
83
  token = ""
85
84
  while true
86
- @char = nextchar
87
- token << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
88
- token << @quote and next if (@char = nextchar) == @quote
89
- break if [@sep,@cr,@lf,@es,nil].include?(@char)
85
+ token << (scan_until(/#{@quote}/o) or bomb "unclosed quote")[0..-2]
86
+ token << @quote and next if scan(@quote)
87
+ break if scan(@eoc)
90
88
  @relax or bomb "invalid character after quote"
91
- token << @quote + scan_until(/(?=#{@quote})/o) + @quote
92
- end
93
- @char = nextchar if @char == @sep
94
- @strip ? token.strip : token
95
- elsif [@sep,@cr,@lf,@es,nil].include?(@char)
96
- case @char
97
- when @sep then @char = nextchar ; @es
98
- when @cr then (@char = nextchar) == @lf and @char = nextchar; nil
99
- when @lf then @char = nextchar ; nil
100
- else nil
89
+ token << @quote + (scan_until(/#{@quote}/o) or bomb "bad inline quote")
101
90
  end
91
+ elsif scan(@sep) then return @es
92
+ elsif scan(@eol) then return nil
102
93
  else # consume unquoted cell
103
- token = scan_until(/(?=#{"\\"+@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
94
+ token = scan_until(@eoc) or bomb "unexpected character"
104
95
  token.prepend(@eq) if excel
105
- @char = nextchar if (@char = currchar) == @sep
106
- @strip ? token.strip : token
107
96
  end
97
+ scan(@sep)
98
+ @strip ? token.strip : token
108
99
  end
109
100
 
110
101
  def bomb(msg)
@@ -178,7 +169,7 @@ class Censive < StringScanner
178
169
  end
179
170
  end.join(s)
180
171
 
181
- @out << out + @eol
172
+ @out << out + @rowsep
182
173
  end
183
174
 
184
175
  def each
@@ -201,14 +192,15 @@ class Censive < StringScanner
201
192
  end
202
193
 
203
194
  if __FILE__ == $0
204
- raw = DATA.read
205
- # raw = DATA.gets("\n\n").chomp
195
+ raw = DATA.gets("\n\n").chomp
206
196
  # raw = File.read(ARGV.first || "lc-2023.csv")
207
- csv = Censive.new(raw, excel: true, relax: true, strip: true)
208
- csv.export(sep: "|", excel: true)
197
+ csv = Censive.new(raw, excel: true, relax: true)
198
+ csv.export # (sep: ",", excel: true)
209
199
  end
210
200
 
211
201
  __END__
202
+ 123,"CHO, JOELLE "JOJO"",456
203
+
212
204
  Name,Age,Shoe
213
205
  Alice,27,5
214
206
  Bob,33,10 1/2
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: censive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.14'
4
+ version: '0.16'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steve Shreeve
@@ -9,21 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
  date: 2023-02-05 00:00:00.000000000 Z
12
- dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: strscan
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ">="
18
- - !ruby/object:Gem::Version
19
- version: 3.0.6
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ">="
25
- - !ruby/object:Gem::Version
26
- version: 3.0.6
12
+ dependencies: []
27
13
  description: A quick and lightweight CSV handling library for Ruby
28
14
  email: steve.shreeve@gmail.com
29
15
  executables: []