censive 0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (6) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +3 -0
  4. data/censive.gemspec +14 -0
  5. data/lib/censive.rb +133 -0
  6. metadata +46 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 78a04712f2c737b263765b117a0614bddeda6e4c233409827ed900983b061a64
4
+ data.tar.gz: e42610ad444e4b4ed9db374f1ee1764c97619fa54235c3bb18e537513804f940
5
+ SHA512:
6
+ metadata.gz: 294e8879052426037bbaed072171ecb8ca0abff47156a4a1d51a3f4531229a6f755ae7db4f8e5dfb7422ad25edf1e145d497bd39d99e46d895cdf6adb4988edc
7
+ data.tar.gz: d087b32edd1e467143a1e785bd1aa89d2e096920027185ff2dc07a275317b034521b1143f962774270040ae57b079c3a61fa045d10e79f194312787125527fa0
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Steve Shreeve
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,3 @@
1
+ # censive
2
+
3
+ A quick and lightweight CVS handling library for Ruby
data/censive.gemspec ADDED
@@ -0,0 +1,14 @@
1
+ # encoding: utf-8
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = "censive"
5
+ s.version = "0.1"
6
+ s.author = "Steve Shreeve"
7
+ s.email = "steve.shreeve@gmail.com"
8
+ s.summary = "A quick and lightweight CVS handling library for Ruby"
9
+ s.description = "A quick and lightweight CVS handling library for Ruby"
10
+ s.homepage = "https://github.com/shreeve/censive"
11
+ s.license = "MIT"
12
+ s.files = `git ls-files`.split("\n") - %w[.gitignore]
13
+ s.executables = `cd bin && git ls-files .`.split("\n")
14
+ end
data/lib/censive.rb ADDED
@@ -0,0 +1,133 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # ==============================================================================
4
+ # censive - A quick and lightweight CVS handling library for Ruby
5
+ #
6
+ # Author: Steve Shreeve (steve.shreeve@gmail.com)
7
+ # Date: Jan 30, 2023
8
+ # ==============================================================================
9
+ # The goals are:
10
+ #
11
+ # 1. Faster than Ruby's default CSV library
12
+ # 2. Lightweight code base with streamlined method calls
13
+ # 3. Eventually support IO streaming
14
+ #
15
+ # NOTE: Only getch and scan_until advance strscan's position
16
+ # ==============================================================================
17
+
18
+ require 'strscan'
19
+
20
+ class Censive < StringScanner
21
+ def initialize(...)
22
+ super
23
+ reset
24
+ @sep = ',' .freeze # make this a param
25
+ @quote = '"' .freeze # make this a param
26
+ @es = "" .freeze
27
+ @cr = "\r".freeze
28
+ @lf = "\n".freeze
29
+ end
30
+
31
+ def reset
32
+ super
33
+ @char = string[pos]
34
+ @flag = nil
35
+ end
36
+
37
+ # ==[ Lexer ]==
38
+
39
+ def next_char
40
+ getch
41
+ @char = string[pos]
42
+ end
43
+
44
+ def next_token
45
+ case @flag
46
+ when @es then @flag = nil; [@cr,@lf,nil].include?(@char) and return @es
47
+ when @cr then @flag = nil; next_char == @lf and next_char
48
+ when @lf then @flag = nil; next_char
49
+ end if @flag
50
+
51
+ if [@sep,@quote,@cr,@lf,nil].include?(@char)
52
+ case @char
53
+ when @quote # consume_quoted_cell
54
+ match = ""
55
+ while true
56
+ getch # consume the quote (optimized by not calling next_char)
57
+ match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
58
+ case next_char
59
+ when @sep then next_char; break
60
+ when @quote then match << @quote
61
+ when @cr,@lf,nil then break
62
+ else bomb "unexpected character after quote"
63
+ end
64
+ end
65
+ match
66
+ when @sep then @flag = @es; next_char; @es
67
+ when @cr then @flag = @cr; nil
68
+ when @lf then @flag = @lf; nil
69
+ when nil then nil
70
+ end
71
+ else # consume_unquoted_cell
72
+ match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
73
+ @char = string[pos]
74
+ @char == @sep and next_char
75
+ match
76
+ end
77
+ end
78
+
79
+ def bomb(msg)
80
+ abort "censive: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
81
+ end
82
+
83
+ # ==[ Parser ]==
84
+
85
+ def parse
86
+ @rows = []
87
+ @cols = @cells = 0
88
+ while row = next_row
89
+ @rows << row
90
+ size = row.size
91
+ @cols = size if size > @cols
92
+ @cells += size
93
+ end
94
+ @rows
95
+ end
96
+
97
+ def next_row
98
+ token = next_token or return
99
+ row = [token]
100
+ row << token while token = next_token
101
+ row
102
+ end
103
+
104
+ # ==[ Helpers ]==
105
+
106
+ def stats
107
+ wide = string.size.to_s.size
108
+ puts "%#{wide}d rows" % @rows.size
109
+ puts "%#{wide}d columns" % @cols
110
+ puts "%#{wide}d cells" % @cells
111
+ puts "%#{wide}d bytes" % string.size
112
+ end
113
+ end
114
+
115
+ # ==[ Test it out... ]==
116
+
117
+ ARGV << "z.csv" if ARGV.empty?
118
+
119
+ case 1
120
+ when 1
121
+ path = ARGV.first
122
+ data = File.read(path)
123
+ when 2
124
+ data = DATA.gets("\n\n").rstrip
125
+ end
126
+
127
+ STDOUT.sync = true
128
+
129
+ csv = Censive.new(data)
130
+
131
+ data.size > 1e6 ? csv.parse : csv.parse.each {|cols| p cols }
132
+
133
+ csv.stats
metadata ADDED
@@ -0,0 +1,46 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: censive
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.1'
5
+ platform: ruby
6
+ authors:
7
+ - Steve Shreeve
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-01-31 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A quick and lightweight CVS handling library for Ruby
14
+ email: steve.shreeve@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - LICENSE
20
+ - README.md
21
+ - censive.gemspec
22
+ - lib/censive.rb
23
+ homepage: https://github.com/shreeve/censive
24
+ licenses:
25
+ - MIT
26
+ metadata: {}
27
+ post_install_message:
28
+ rdoc_options: []
29
+ require_paths:
30
+ - lib
31
+ required_ruby_version: !ruby/object:Gem::Requirement
32
+ requirements:
33
+ - - ">="
34
+ - !ruby/object:Gem::Version
35
+ version: '0'
36
+ required_rubygems_version: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ requirements: []
42
+ rubygems_version: 3.4.5
43
+ signing_key:
44
+ specification_version: 4
45
+ summary: A quick and lightweight CVS handling library for Ruby
46
+ test_files: []