censive 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (6) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +3 -0
  4. data/censive.gemspec +14 -0
  5. data/lib/censive.rb +133 -0
  6. metadata +46 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 78a04712f2c737b263765b117a0614bddeda6e4c233409827ed900983b061a64
4
+ data.tar.gz: e42610ad444e4b4ed9db374f1ee1764c97619fa54235c3bb18e537513804f940
5
+ SHA512:
6
+ metadata.gz: 294e8879052426037bbaed072171ecb8ca0abff47156a4a1d51a3f4531229a6f755ae7db4f8e5dfb7422ad25edf1e145d497bd39d99e46d895cdf6adb4988edc
7
+ data.tar.gz: d087b32edd1e467143a1e785bd1aa89d2e096920027185ff2dc07a275317b034521b1143f962774270040ae57b079c3a61fa045d10e79f194312787125527fa0
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Steve Shreeve
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,3 @@
1
+ # censive
2
+
3
+ A quick and lightweight CVS handling library for Ruby
data/censive.gemspec ADDED
@@ -0,0 +1,14 @@
1
+ # encoding: utf-8
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = "censive"
5
+ s.version = "0.1"
6
+ s.author = "Steve Shreeve"
7
+ s.email = "steve.shreeve@gmail.com"
8
+ s.summary = "A quick and lightweight CVS handling library for Ruby"
9
+ s.description = "A quick and lightweight CVS handling library for Ruby"
10
+ s.homepage = "https://github.com/shreeve/censive"
11
+ s.license = "MIT"
12
+ s.files = `git ls-files`.split("\n") - %w[.gitignore]
13
+ s.executables = `cd bin && git ls-files .`.split("\n")
14
+ end
data/lib/censive.rb ADDED
@@ -0,0 +1,133 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # ==============================================================================
4
+ # censive - A quick and lightweight CVS handling library for Ruby
5
+ #
6
+ # Author: Steve Shreeve (steve.shreeve@gmail.com)
7
+ # Date: Jan 30, 2023
8
+ # ==============================================================================
9
+ # The goals are:
10
+ #
11
+ # 1. Faster than Ruby's default CSV library
12
+ # 2. Lightweight code base with streamlined method calls
13
+ # 3. Eventually support IO streaming
14
+ #
15
+ # NOTE: Only getch and scan_until advance strscan's position
16
+ # ==============================================================================
17
+
18
+ require 'strscan'
19
+
20
+ class Censive < StringScanner
21
+ def initialize(...)
22
+ super
23
+ reset
24
+ @sep = ',' .freeze # make this a param
25
+ @quote = '"' .freeze # make this a param
26
+ @es = "" .freeze
27
+ @cr = "\r".freeze
28
+ @lf = "\n".freeze
29
+ end
30
+
31
+ def reset
32
+ super
33
+ @char = string[pos]
34
+ @flag = nil
35
+ end
36
+
37
+ # ==[ Lexer ]==
38
+
39
+ def next_char
40
+ getch
41
+ @char = string[pos]
42
+ end
43
+
44
+ def next_token
45
+ case @flag
46
+ when @es then @flag = nil; [@cr,@lf,nil].include?(@char) and return @es
47
+ when @cr then @flag = nil; next_char == @lf and next_char
48
+ when @lf then @flag = nil; next_char
49
+ end if @flag
50
+
51
+ if [@sep,@quote,@cr,@lf,nil].include?(@char)
52
+ case @char
53
+ when @quote # consume_quoted_cell
54
+ match = ""
55
+ while true
56
+ getch # consume the quote (optimized by not calling next_char)
57
+ match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
58
+ case next_char
59
+ when @sep then next_char; break
60
+ when @quote then match << @quote
61
+ when @cr,@lf,nil then break
62
+ else bomb "unexpected character after quote"
63
+ end
64
+ end
65
+ match
66
+ when @sep then @flag = @es; next_char; @es
67
+ when @cr then @flag = @cr; nil
68
+ when @lf then @flag = @lf; nil
69
+ when nil then nil
70
+ end
71
+ else # consume_unquoted_cell
72
+ match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
73
+ @char = string[pos]
74
+ @char == @sep and next_char
75
+ match
76
+ end
77
+ end
78
+
79
+ def bomb(msg)
80
+ abort "censive: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
81
+ end
82
+
83
+ # ==[ Parser ]==
84
+
85
+ def parse
86
+ @rows = []
87
+ @cols = @cells = 0
88
+ while row = next_row
89
+ @rows << row
90
+ size = row.size
91
+ @cols = size if size > @cols
92
+ @cells += size
93
+ end
94
+ @rows
95
+ end
96
+
97
+ def next_row
98
+ token = next_token or return
99
+ row = [token]
100
+ row << token while token = next_token
101
+ row
102
+ end
103
+
104
+ # ==[ Helpers ]==
105
+
106
+ def stats
107
+ wide = string.size.to_s.size
108
+ puts "%#{wide}d rows" % @rows.size
109
+ puts "%#{wide}d columns" % @cols
110
+ puts "%#{wide}d cells" % @cells
111
+ puts "%#{wide}d bytes" % string.size
112
+ end
113
+ end
114
+
115
+ # ==[ Test it out... ]==
116
+
117
+ ARGV << "z.csv" if ARGV.empty?
118
+
119
+ case 1
120
+ when 1
121
+ path = ARGV.first
122
+ data = File.read(path)
123
+ when 2
124
+ data = DATA.gets("\n\n").rstrip
125
+ end
126
+
127
+ STDOUT.sync = true
128
+
129
+ csv = Censive.new(data)
130
+
131
+ data.size > 1e6 ? csv.parse : csv.parse.each {|cols| p cols }
132
+
133
+ csv.stats
metadata ADDED
@@ -0,0 +1,46 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: censive
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.1'
5
+ platform: ruby
6
+ authors:
7
+ - Steve Shreeve
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-01-31 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A quick and lightweight CVS handling library for Ruby
14
+ email: steve.shreeve@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - LICENSE
20
+ - README.md
21
+ - censive.gemspec
22
+ - lib/censive.rb
23
+ homepage: https://github.com/shreeve/censive
24
+ licenses:
25
+ - MIT
26
+ metadata: {}
27
+ post_install_message:
28
+ rdoc_options: []
29
+ require_paths:
30
+ - lib
31
+ required_ruby_version: !ruby/object:Gem::Requirement
32
+ requirements:
33
+ - - ">="
34
+ - !ruby/object:Gem::Version
35
+ version: '0'
36
+ required_rubygems_version: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ requirements: []
42
+ rubygems_version: 3.4.5
43
+ signing_key:
44
+ specification_version: 4
45
+ summary: A quick and lightweight CVS handling library for Ruby
46
+ test_files: []