csv11 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/HISTORY.md +3 -0
- data/Manifest.txt +8 -0
- data/README.md +27 -0
- data/Rakefile +26 -0
- data/lib/csv11.rb +257 -0
- data/lib/csv11/version.rb +20 -0
- data/test/helper.rb +10 -0
- data/test/test_version.rb +21 -0
- metadata +86 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 2d7cb8629cffcede12c4545bc884afff3b65c306
|
4
|
+
data.tar.gz: aef587fdb3d5d9d7f949d91ff2f5c3ccf7b5ff5c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: '0218c870f0dc609009f3b4f1cbebdb6cbd9cd2c0812897493988cff88d28c89681ce8a0b4428280ed94adaa22cc8a194973942bdd7a1beb224513cb98164cde3'
|
7
|
+
data.tar.gz: 5e2d3a577fb6d436e7d4e1247744250d344368e0393c799f3307e764a041243d5db26cd308fd65674a99c304b92befb24c3713c174a29b4ce80824bcce4ba4b2
|
data/HISTORY.md
ADDED
data/Manifest.txt
ADDED
data/README.md
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# csv11
|
2
|
+
|
3
|
+
csv11 library / gem - read / parse comma-separated values (csv); supports csv 1.1 incl. comments, named values, multi-line records, and more
|
4
|
+
|
5
|
+
* home :: [github.com/csv11/csv11](https://github.com/csv11/csv11)
|
6
|
+
* bugs :: [github.com/csv11/csv11/issues](https://github.com/csv11/csv11/issues)
|
7
|
+
* gem :: [rubygems.org/gems/csvalues](https://rubygems.org/gems/csv11)
|
8
|
+
* rdoc :: [rubydoc.info/gems/csvalues](http://rubydoc.info/gems/csv11)
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
## Usage
|
14
|
+
|
15
|
+
to be done
|
16
|
+
|
17
|
+
|
18
|
+
## License
|
19
|
+
|
20
|
+
![](https://publicdomainworks.github.io/buttons/zero88x31.png)
|
21
|
+
|
22
|
+
The `csv11` scripts are dedicated to the public domain.
|
23
|
+
Use it as you please with no restrictions whatsoever.
|
24
|
+
|
25
|
+
## Questions? Comments?
|
26
|
+
|
27
|
+
Post them to the [wwwmake forum](http://groups.google.com/group/wwwmake). Thanks!
|
data/Rakefile
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'hoe'
|
2
|
+
require './lib/csv11/version.rb'
|
3
|
+
|
4
|
+
Hoe.spec 'csv11' do
|
5
|
+
|
6
|
+
self.version = Values::VERSION
|
7
|
+
|
8
|
+
self.summary = 'csv11 - read / parse comma-separated values (csv); supports csv 1.1 incl. comments, named values, multi-line records, and more'
|
9
|
+
self.description = summary
|
10
|
+
|
11
|
+
self.urls = ['https://github.com/csv11/csv11']
|
12
|
+
|
13
|
+
self.author = 'Gerald Bauer'
|
14
|
+
self.email = 'ruby-talk@ruby-lang.org'
|
15
|
+
|
16
|
+
# switch extension to .markdown for gihub formatting
|
17
|
+
self.readme_file = 'README.md'
|
18
|
+
self.history_file = 'HISTORY.md'
|
19
|
+
|
20
|
+
self.licenses = ['Public Domain']
|
21
|
+
|
22
|
+
self.spec_extras = {
|
23
|
+
required_ruby_version: '>= 2.2.2'
|
24
|
+
}
|
25
|
+
|
26
|
+
end
|
data/lib/csv11.rb
ADDED
@@ -0,0 +1,257 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
require 'pp'
|
5
|
+
require 'strscan' ## StringScanner
|
6
|
+
|
7
|
+
|
8
|
+
# our own code
|
9
|
+
require 'csv11/version' # note: let version always go first
|
10
|
+
|
11
|
+
|
12
|
+
module Values
|
13
|
+
|
14
|
+
|
15
|
+
class Parser
|
16
|
+
|
17
|
+
|
18
|
+
Token = Struct.new(:name, :value) do
|
19
|
+
def unquoted?() name=='UNQUOTED'; end
|
20
|
+
def quoted?() name=='QUOTED'; end
|
21
|
+
def triple_quoted?() name=='TRIPLE_QUOTED'; end
|
22
|
+
end
|
23
|
+
|
24
|
+
class Unquoted < Token
|
25
|
+
def initialize( value ) super( 'UNQUOTED', value ); end
|
26
|
+
end
|
27
|
+
|
28
|
+
class Quoted < Token
|
29
|
+
def initialize( value ) super( 'QUOTED', value ); end
|
30
|
+
end
|
31
|
+
|
32
|
+
class TripleQuoted < Token
|
33
|
+
def initialize( value ) super( 'TRIPLE_UNQUOTED', value ); end
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
|
38
|
+
def initialize
|
39
|
+
end
|
40
|
+
|
41
|
+
##
|
42
|
+
## how to handle:
|
43
|
+
## country:dk
|
44
|
+
## mailto:hello ## use excluded (reserved names urn, mailto, http, https)
|
45
|
+
## http://example.com
|
46
|
+
## urn:hello:444
|
47
|
+
## name:"hello, world!"
|
48
|
+
|
49
|
+
|
50
|
+
def parse_line
|
51
|
+
|
52
|
+
loop do
|
53
|
+
t = parse_token
|
54
|
+
if t.nil?
|
55
|
+
puts "!! format error: expected token with value, returns nil - rest is >>#{@buffer.rest}<<"
|
56
|
+
break
|
57
|
+
end
|
58
|
+
|
59
|
+
if @buffer.peek(1) == ','
|
60
|
+
@buffer.getch ## consume ','
|
61
|
+
puts "adding value >>#{t.value}<<"
|
62
|
+
@values << t.value
|
63
|
+
elsif @buffer.eos?
|
64
|
+
puts "adding (last) value >>#{t.value}<<"
|
65
|
+
@values << t.value
|
66
|
+
break
|
67
|
+
elsif @buffer.peek(1) == ':'
|
68
|
+
@buffer.getch ## consume ':'
|
69
|
+
|
70
|
+
## todo/fix:
|
71
|
+
## do NOT allow names for quoted, triple_quoted for now - why? why not?
|
72
|
+
## issue a format error: why? why not??
|
73
|
+
|
74
|
+
if @values.empty?
|
75
|
+
### note:special case:
|
76
|
+
## if first token is a name/key
|
77
|
+
## consume all the rest!! including commas, colons etc.
|
78
|
+
## no escape needed for nothing
|
79
|
+
value = @buffer.rest
|
80
|
+
value = value.strip
|
81
|
+
puts "adding (single-line) first named value >>#{t.value}<< : >>#{value}<<"
|
82
|
+
@values << [t.value,value]
|
83
|
+
break
|
84
|
+
else
|
85
|
+
t2 = parse_token( match_name: false )
|
86
|
+
puts "adding named value >>#{t.value}<< : >>#{t2.value}<<"
|
87
|
+
@values << [t.value,t2.value]
|
88
|
+
|
89
|
+
if @buffer.peek(1) == ','
|
90
|
+
@buffer.getch ## consume ','
|
91
|
+
elsif @buffer.eos?
|
92
|
+
break
|
93
|
+
else
|
94
|
+
puts "!! format error: expected comma (,) or EOS - rest is >>#{@buffer.rest}<<"
|
95
|
+
break
|
96
|
+
end
|
97
|
+
end
|
98
|
+
else
|
99
|
+
puts "!! format error: expected comma (,) or colon (:) or EOS - rest is >>#{@buffer.rest}<<"
|
100
|
+
break
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
|
106
|
+
|
107
|
+
def match_triple_quoted?
|
108
|
+
## todo/fix: use @buffer.match - """ (next letter MUST Not be "!! e.g.""""" not valid!!!
|
109
|
+
@buffer.peek(3) == %{"""} || ## double triple quotes
|
110
|
+
@buffer.peek(3) == %{'''} ## single triple quotes
|
111
|
+
end
|
112
|
+
|
113
|
+
## todo/fix: use @buffer.match - " (next letter MUST Not be "!! e.g."" not valid!!!
|
114
|
+
def match_quoted?
|
115
|
+
@buffer.peek(1) == %{"} || ## double quote
|
116
|
+
@buffer.peek(1) == %{'} ## single quote
|
117
|
+
end
|
118
|
+
|
119
|
+
|
120
|
+
def parse_triple_quoted
|
121
|
+
token = nil # nothing found
|
122
|
+
|
123
|
+
if @buffer.peek(3) == %{"""} ## double quote
|
124
|
+
@buffer.getch # consume double quote
|
125
|
+
@buffer.getch
|
126
|
+
@buffer.getch
|
127
|
+
value = @buffer.scan_until( /(?=""")/)
|
128
|
+
@buffer.getch # consume double quote
|
129
|
+
@buffer.getch
|
130
|
+
@buffer.getch
|
131
|
+
@buffer.skip( /[ \t]*/ ) ## skip trailing WHITESPACE
|
132
|
+
puts %{quoted """...""" value >>#{value}<<}
|
133
|
+
token = TripleQuoted.new( value )
|
134
|
+
elsif @buffer.peek(3) == %{'''} ## single quote
|
135
|
+
@buffer.getch # consume single quote
|
136
|
+
@buffer.getch
|
137
|
+
@buffer.getch
|
138
|
+
value = @buffer.scan_until( /(?=''')/)
|
139
|
+
@buffer.getch # consume single quote
|
140
|
+
@buffer.getch
|
141
|
+
@buffer.getch
|
142
|
+
@buffer.skip( /[ \t]*/ ) ## skip trailing WHITESPACE
|
143
|
+
puts %{quoted '''...''' value >>#{value}<<}
|
144
|
+
token = TripleQuoted.new( value )
|
145
|
+
else
|
146
|
+
## do nothing; report format error
|
147
|
+
end
|
148
|
+
|
149
|
+
token
|
150
|
+
end
|
151
|
+
|
152
|
+
def parse_quoted
|
153
|
+
token = nil # nothing found
|
154
|
+
|
155
|
+
if @buffer.peek(1) == '"' ## double quote
|
156
|
+
@buffer.getch # consume double quote
|
157
|
+
value = @buffer.scan_until( /(?=")/)
|
158
|
+
@buffer.getch # consume double quote
|
159
|
+
@buffer.skip( /[ \t]*/ ) ## skip trailing WHITESPACE
|
160
|
+
puts %{quoted "..." value >>#{value}<<}
|
161
|
+
token = Quoted.new( value )
|
162
|
+
elsif @buffer.peek(1) == "'" ## single quote
|
163
|
+
@buffer.getch # consume single quote
|
164
|
+
value = @buffer.scan_until( /(?=')/)
|
165
|
+
@buffer.getch # consume single quote
|
166
|
+
@buffer.skip( /[ \t]*/ ) ## skip trailing WHITESPACE
|
167
|
+
puts %{quoted '...' value >>#{value}<<}
|
168
|
+
token = Quoted.new( value )
|
169
|
+
else
|
170
|
+
## do nothing; report format error
|
171
|
+
end
|
172
|
+
|
173
|
+
token
|
174
|
+
end
|
175
|
+
|
176
|
+
|
177
|
+
def parse_unquoted( match_name: true )
|
178
|
+
## unquoted value
|
179
|
+
puts "collect unquoted token (match_name? => #{match_name}) - rest: >>#{@buffer.rest}<<"
|
180
|
+
|
181
|
+
if match_name
|
182
|
+
value = @buffer.scan_until( /(?=[,:]|$)/)
|
183
|
+
|
184
|
+
## check for reserverd "non-keys" e.g.:
|
185
|
+
### https: http:
|
186
|
+
### urn:
|
187
|
+
### mailto:
|
188
|
+
### file:
|
189
|
+
### add some more??
|
190
|
+
## todo/fix: add ip address e.g. 127.0.0.1: too ??
|
191
|
+
reserved_names = %w{ https http urn mailto file }
|
192
|
+
name_regex = %r{^[a-zA-Z0-9._-]+$} ## todo/fix: allow more chars
|
193
|
+
|
194
|
+
if @buffer.peek(1) == ':'
|
195
|
+
if reserved_names.include?( value )
|
196
|
+
## continue scan until next comma or eos(end-of-string)!!!
|
197
|
+
value << @buffer.scan_until( /(?=,|$)/)
|
198
|
+
elsif name_regex.match( value ).nil?
|
199
|
+
## does NOT match name/key pattern
|
200
|
+
## continue scan until next comma or eos(end-of-string)!!!
|
201
|
+
value << @buffer.scan_until( /(?=,|$)/)
|
202
|
+
else
|
203
|
+
## continue
|
204
|
+
end
|
205
|
+
end
|
206
|
+
else ## do NOT match name (named values) e.g. do NOT include colon (:)
|
207
|
+
value = @buffer.scan_until( /(?=[,]|$)/)
|
208
|
+
end
|
209
|
+
|
210
|
+
value = value.rstrip ## right strip whitespace
|
211
|
+
puts "value >>#{value}<<"
|
212
|
+
token = Unquoted.new( value )
|
213
|
+
end
|
214
|
+
|
215
|
+
|
216
|
+
|
217
|
+
def parse_token( match_name: true )
|
218
|
+
@buffer.skip( /[ \t]*/ ) ## skip WHITESPACE
|
219
|
+
|
220
|
+
token = nil # nothing found
|
221
|
+
|
222
|
+
if match_triple_quoted? # """...""" or '''...'''
|
223
|
+
token = parse_triple_quoted
|
224
|
+
elsif match_quoted? # "..." or '...'
|
225
|
+
token = parse_quoted
|
226
|
+
else
|
227
|
+
token = parse_unquoted( match_name: match_name )
|
228
|
+
end
|
229
|
+
token
|
230
|
+
end
|
231
|
+
|
232
|
+
|
233
|
+
|
234
|
+
def parse(str)
|
235
|
+
puts ""
|
236
|
+
puts "**** parse >>#{str}<<"
|
237
|
+
|
238
|
+
@values = []
|
239
|
+
@buffer = StringScanner.new(str)
|
240
|
+
|
241
|
+
parse_line
|
242
|
+
@values
|
243
|
+
end
|
244
|
+
end ## class Parser
|
245
|
+
|
246
|
+
|
247
|
+
|
248
|
+
def self.split( line )
|
249
|
+
parser = Parser.new
|
250
|
+
parser.parse( line )
|
251
|
+
end
|
252
|
+
|
253
|
+
end # module Values
|
254
|
+
|
255
|
+
|
256
|
+
# say hello
|
257
|
+
puts Values.banner if defined?( $RUBYLIBS_DEBUG )
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Values
|
4
|
+
MAJOR = 0 ## todo: namespace inside version or something - why? why not??
|
5
|
+
MINOR = 0
|
6
|
+
PATCH = 3
|
7
|
+
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
8
|
+
|
9
|
+
def self.version
|
10
|
+
VERSION
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.banner
|
14
|
+
"csv11/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.root
|
18
|
+
"#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
|
19
|
+
end
|
20
|
+
end # module Values
|
data/test/helper.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_version.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
|
11
|
+
class TestVersion < MiniTest::Test
|
12
|
+
|
13
|
+
|
14
|
+
def test_version
|
15
|
+
|
16
|
+
puts Values::VERSION
|
17
|
+
assert true
|
18
|
+
## assume everything ok if get here
|
19
|
+
end
|
20
|
+
|
21
|
+
end # class TestVersion
|
metadata
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: csv11
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.3
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Gerald Bauer
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-08-04 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rdoc
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '4.0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '4.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: hoe
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '3.16'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '3.16'
|
41
|
+
description: csv11 - read / parse comma-separated values (csv); supports csv 1.1 incl.
|
42
|
+
comments, named values, multi-line records, and more
|
43
|
+
email: ruby-talk@ruby-lang.org
|
44
|
+
executables: []
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files:
|
47
|
+
- HISTORY.md
|
48
|
+
- Manifest.txt
|
49
|
+
- README.md
|
50
|
+
files:
|
51
|
+
- HISTORY.md
|
52
|
+
- Manifest.txt
|
53
|
+
- README.md
|
54
|
+
- Rakefile
|
55
|
+
- lib/csv11.rb
|
56
|
+
- lib/csv11/version.rb
|
57
|
+
- test/helper.rb
|
58
|
+
- test/test_version.rb
|
59
|
+
homepage: https://github.com/csv11/csv11
|
60
|
+
licenses:
|
61
|
+
- Public Domain
|
62
|
+
metadata: {}
|
63
|
+
post_install_message:
|
64
|
+
rdoc_options:
|
65
|
+
- "--main"
|
66
|
+
- README.md
|
67
|
+
require_paths:
|
68
|
+
- lib
|
69
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
70
|
+
requirements:
|
71
|
+
- - ">="
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
version: 2.2.2
|
74
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
75
|
+
requirements:
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: '0'
|
79
|
+
requirements: []
|
80
|
+
rubyforge_project:
|
81
|
+
rubygems_version: 2.5.2
|
82
|
+
signing_key:
|
83
|
+
specification_version: 4
|
84
|
+
summary: csv11 - read / parse comma-separated values (csv); supports csv 1.1 incl.
|
85
|
+
comments, named values, multi-line records, and more
|
86
|
+
test_files: []
|