censive 0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +3 -0
- data/censive.gemspec +14 -0
- data/lib/censive.rb +133 -0
- metadata +46 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 78a04712f2c737b263765b117a0614bddeda6e4c233409827ed900983b061a64
|
4
|
+
data.tar.gz: e42610ad444e4b4ed9db374f1ee1764c97619fa54235c3bb18e537513804f940
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 294e8879052426037bbaed072171ecb8ca0abff47156a4a1d51a3f4531229a6f755ae7db4f8e5dfb7422ad25edf1e145d497bd39d99e46d895cdf6adb4988edc
|
7
|
+
data.tar.gz: d087b32edd1e467143a1e785bd1aa89d2e096920027185ff2dc07a275317b034521b1143f962774270040ae57b079c3a61fa045d10e79f194312787125527fa0
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2023 Steve Shreeve
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
data/censive.gemspec
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = "censive"
|
5
|
+
s.version = "0.1"
|
6
|
+
s.author = "Steve Shreeve"
|
7
|
+
s.email = "steve.shreeve@gmail.com"
|
8
|
+
s.summary = "A quick and lightweight CVS handling library for Ruby"
|
9
|
+
s.description = "A quick and lightweight CVS handling library for Ruby"
|
10
|
+
s.homepage = "https://github.com/shreeve/censive"
|
11
|
+
s.license = "MIT"
|
12
|
+
s.files = `git ls-files`.split("\n") - %w[.gitignore]
|
13
|
+
s.executables = `cd bin && git ls-files .`.split("\n")
|
14
|
+
end
|
data/lib/censive.rb
ADDED
@@ -0,0 +1,133 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# ==============================================================================
|
4
|
+
# censive - A quick and lightweight CVS handling library for Ruby
|
5
|
+
#
|
6
|
+
# Author: Steve Shreeve (steve.shreeve@gmail.com)
|
7
|
+
# Date: Jan 30, 2023
|
8
|
+
# ==============================================================================
|
9
|
+
# The goals are:
|
10
|
+
#
|
11
|
+
# 1. Faster than Ruby's default CSV library
|
12
|
+
# 2. Lightweight code base with streamlined method calls
|
13
|
+
# 3. Eventually support IO streaming
|
14
|
+
#
|
15
|
+
# NOTE: Only getch and scan_until advance strscan's position
|
16
|
+
# ==============================================================================
|
17
|
+
|
18
|
+
require 'strscan'
|
19
|
+
|
20
|
+
class Censive < StringScanner
|
21
|
+
def initialize(...)
|
22
|
+
super
|
23
|
+
reset
|
24
|
+
@sep = ',' .freeze # make this a param
|
25
|
+
@quote = '"' .freeze # make this a param
|
26
|
+
@es = "" .freeze
|
27
|
+
@cr = "\r".freeze
|
28
|
+
@lf = "\n".freeze
|
29
|
+
end
|
30
|
+
|
31
|
+
def reset
|
32
|
+
super
|
33
|
+
@char = string[pos]
|
34
|
+
@flag = nil
|
35
|
+
end
|
36
|
+
|
37
|
+
# ==[ Lexer ]==
|
38
|
+
|
39
|
+
def next_char
|
40
|
+
getch
|
41
|
+
@char = string[pos]
|
42
|
+
end
|
43
|
+
|
44
|
+
def next_token
|
45
|
+
case @flag
|
46
|
+
when @es then @flag = nil; [@cr,@lf,nil].include?(@char) and return @es
|
47
|
+
when @cr then @flag = nil; next_char == @lf and next_char
|
48
|
+
when @lf then @flag = nil; next_char
|
49
|
+
end if @flag
|
50
|
+
|
51
|
+
if [@sep,@quote,@cr,@lf,nil].include?(@char)
|
52
|
+
case @char
|
53
|
+
when @quote # consume_quoted_cell
|
54
|
+
match = ""
|
55
|
+
while true
|
56
|
+
getch # consume the quote (optimized by not calling next_char)
|
57
|
+
match << (scan_until(/(?=#{@quote})/o) or bomb "unclosed quote")
|
58
|
+
case next_char
|
59
|
+
when @sep then next_char; break
|
60
|
+
when @quote then match << @quote
|
61
|
+
when @cr,@lf,nil then break
|
62
|
+
else bomb "unexpected character after quote"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
match
|
66
|
+
when @sep then @flag = @es; next_char; @es
|
67
|
+
when @cr then @flag = @cr; nil
|
68
|
+
when @lf then @flag = @lf; nil
|
69
|
+
when nil then nil
|
70
|
+
end
|
71
|
+
else # consume_unquoted_cell
|
72
|
+
match = scan_until(/(?=#{@sep}|#{@cr}|#{@lf}|\z)/o) or bomb "unexpected character"
|
73
|
+
@char = string[pos]
|
74
|
+
@char == @sep and next_char
|
75
|
+
match
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def bomb(msg)
|
80
|
+
abort "censive: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
|
81
|
+
end
|
82
|
+
|
83
|
+
# ==[ Parser ]==
|
84
|
+
|
85
|
+
def parse
|
86
|
+
@rows = []
|
87
|
+
@cols = @cells = 0
|
88
|
+
while row = next_row
|
89
|
+
@rows << row
|
90
|
+
size = row.size
|
91
|
+
@cols = size if size > @cols
|
92
|
+
@cells += size
|
93
|
+
end
|
94
|
+
@rows
|
95
|
+
end
|
96
|
+
|
97
|
+
def next_row
|
98
|
+
token = next_token or return
|
99
|
+
row = [token]
|
100
|
+
row << token while token = next_token
|
101
|
+
row
|
102
|
+
end
|
103
|
+
|
104
|
+
# ==[ Helpers ]==
|
105
|
+
|
106
|
+
def stats
|
107
|
+
wide = string.size.to_s.size
|
108
|
+
puts "%#{wide}d rows" % @rows.size
|
109
|
+
puts "%#{wide}d columns" % @cols
|
110
|
+
puts "%#{wide}d cells" % @cells
|
111
|
+
puts "%#{wide}d bytes" % string.size
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
# ==[ Test it out... ]==
|
116
|
+
|
117
|
+
ARGV << "z.csv" if ARGV.empty?
|
118
|
+
|
119
|
+
case 1
|
120
|
+
when 1
|
121
|
+
path = ARGV.first
|
122
|
+
data = File.read(path)
|
123
|
+
when 2
|
124
|
+
data = DATA.gets("\n\n").rstrip
|
125
|
+
end
|
126
|
+
|
127
|
+
STDOUT.sync = true
|
128
|
+
|
129
|
+
csv = Censive.new(data)
|
130
|
+
|
131
|
+
data.size > 1e6 ? csv.parse : csv.parse.each {|cols| p cols }
|
132
|
+
|
133
|
+
csv.stats
|
metadata
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: censive
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '0.1'
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Steve Shreeve
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2023-01-31 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: A quick and lightweight CVS handling library for Ruby
|
14
|
+
email: steve.shreeve@gmail.com
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- LICENSE
|
20
|
+
- README.md
|
21
|
+
- censive.gemspec
|
22
|
+
- lib/censive.rb
|
23
|
+
homepage: https://github.com/shreeve/censive
|
24
|
+
licenses:
|
25
|
+
- MIT
|
26
|
+
metadata: {}
|
27
|
+
post_install_message:
|
28
|
+
rdoc_options: []
|
29
|
+
require_paths:
|
30
|
+
- lib
|
31
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
32
|
+
requirements:
|
33
|
+
- - ">="
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: '0'
|
36
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
requirements: []
|
42
|
+
rubygems_version: 3.4.5
|
43
|
+
signing_key:
|
44
|
+
specification_version: 4
|
45
|
+
summary: A quick and lightweight CVS handling library for Ruby
|
46
|
+
test_files: []
|