csv11 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/HISTORY.md +3 -0
- data/Manifest.txt +8 -0
- data/README.md +27 -0
- data/Rakefile +26 -0
- data/lib/csv11.rb +257 -0
- data/lib/csv11/version.rb +20 -0
- data/test/helper.rb +10 -0
- data/test/test_version.rb +21 -0
- metadata +86 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 2d7cb8629cffcede12c4545bc884afff3b65c306
|
4
|
+
data.tar.gz: aef587fdb3d5d9d7f949d91ff2f5c3ccf7b5ff5c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: '0218c870f0dc609009f3b4f1cbebdb6cbd9cd2c0812897493988cff88d28c89681ce8a0b4428280ed94adaa22cc8a194973942bdd7a1beb224513cb98164cde3'
|
7
|
+
data.tar.gz: 5e2d3a577fb6d436e7d4e1247744250d344368e0393c799f3307e764a041243d5db26cd308fd65674a99c304b92befb24c3713c174a29b4ce80824bcce4ba4b2
|
data/HISTORY.md
ADDED
data/Manifest.txt
ADDED
data/README.md
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# csv11
|
2
|
+
|
3
|
+
csv11 library / gem - read / parse comma-separated values (csv); supports csv 1.1 incl. comments, named values, multi-line records, and more
|
4
|
+
|
5
|
+
* home :: [github.com/csv11/csv11](https://github.com/csv11/csv11)
|
6
|
+
* bugs :: [github.com/csv11/csv11/issues](https://github.com/csv11/csv11/issues)
|
7
|
+
* gem :: [rubygems.org/gems/csvalues](https://rubygems.org/gems/csv11)
|
8
|
+
* rdoc :: [rubydoc.info/gems/csvalues](http://rubydoc.info/gems/csv11)
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
## Usage
|
14
|
+
|
15
|
+
to be done
|
16
|
+
|
17
|
+
|
18
|
+
## License
|
19
|
+
|
20
|
+

|
21
|
+
|
22
|
+
The `csv11` scripts are dedicated to the public domain.
|
23
|
+
Use it as you please with no restrictions whatsoever.
|
24
|
+
|
25
|
+
## Questions? Comments?
|
26
|
+
|
27
|
+
Post them to the [wwwmake forum](http://groups.google.com/group/wwwmake). Thanks!
|
data/Rakefile
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'hoe'
|
2
|
+
require './lib/csv11/version.rb'
|
3
|
+
|
4
|
+
Hoe.spec 'csv11' do
|
5
|
+
|
6
|
+
self.version = Values::VERSION
|
7
|
+
|
8
|
+
self.summary = 'csv11 - read / parse comma-separated values (csv); supports csv 1.1 incl. comments, named values, multi-line records, and more'
|
9
|
+
self.description = summary
|
10
|
+
|
11
|
+
self.urls = ['https://github.com/csv11/csv11']
|
12
|
+
|
13
|
+
self.author = 'Gerald Bauer'
|
14
|
+
self.email = 'ruby-talk@ruby-lang.org'
|
15
|
+
|
16
|
+
# switch extension to .markdown for gihub formatting
|
17
|
+
self.readme_file = 'README.md'
|
18
|
+
self.history_file = 'HISTORY.md'
|
19
|
+
|
20
|
+
self.licenses = ['Public Domain']
|
21
|
+
|
22
|
+
self.spec_extras = {
|
23
|
+
required_ruby_version: '>= 2.2.2'
|
24
|
+
}
|
25
|
+
|
26
|
+
end
|
data/lib/csv11.rb
ADDED
@@ -0,0 +1,257 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
require 'pp'
|
5
|
+
require 'strscan' ## StringScanner
|
6
|
+
|
7
|
+
|
8
|
+
# our own code
|
9
|
+
require 'csv11/version' # note: let version always go first
|
10
|
+
|
11
|
+
|
12
|
+
module Values
|
13
|
+
|
14
|
+
|
15
|
+
class Parser
|
16
|
+
|
17
|
+
|
18
|
+
Token = Struct.new(:name, :value) do
|
19
|
+
def unquoted?() name=='UNQUOTED'; end
|
20
|
+
def quoted?() name=='QUOTED'; end
|
21
|
+
def triple_quoted?() name=='TRIPLE_QUOTED'; end
|
22
|
+
end
|
23
|
+
|
24
|
+
class Unquoted < Token
|
25
|
+
def initialize( value ) super( 'UNQUOTED', value ); end
|
26
|
+
end
|
27
|
+
|
28
|
+
class Quoted < Token
|
29
|
+
def initialize( value ) super( 'QUOTED', value ); end
|
30
|
+
end
|
31
|
+
|
32
|
+
class TripleQuoted < Token
|
33
|
+
def initialize( value ) super( 'TRIPLE_UNQUOTED', value ); end
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
|
38
|
+
def initialize
|
39
|
+
end
|
40
|
+
|
41
|
+
##
|
42
|
+
## how to handle:
|
43
|
+
## country:dk
|
44
|
+
## mailto:hello ## use excluded (reserved names urn, mailto, http, https)
|
45
|
+
## http://example.com
|
46
|
+
## urn:hello:444
|
47
|
+
## name:"hello, world!"
|
48
|
+
|
49
|
+
|
50
|
+
def parse_line
|
51
|
+
|
52
|
+
loop do
|
53
|
+
t = parse_token
|
54
|
+
if t.nil?
|
55
|
+
puts "!! format error: expected token with value, returns nil - rest is >>#{@buffer.rest}<<"
|
56
|
+
break
|
57
|
+
end
|
58
|
+
|
59
|
+
if @buffer.peek(1) == ','
|
60
|
+
@buffer.getch ## consume ','
|
61
|
+
puts "adding value >>#{t.value}<<"
|
62
|
+
@values << t.value
|
63
|
+
elsif @buffer.eos?
|
64
|
+
puts "adding (last) value >>#{t.value}<<"
|
65
|
+
@values << t.value
|
66
|
+
break
|
67
|
+
elsif @buffer.peek(1) == ':'
|
68
|
+
@buffer.getch ## consume ':'
|
69
|
+
|
70
|
+
## todo/fix:
|
71
|
+
## do NOT allow names for quoted, triple_quoted for now - why? why not?
|
72
|
+
## issue a format error: why? why not??
|
73
|
+
|
74
|
+
if @values.empty?
|
75
|
+
### note:special case:
|
76
|
+
## if first token is a name/key
|
77
|
+
## consume all the rest!! including commas, colons etc.
|
78
|
+
## no escape needed for nothing
|
79
|
+
value = @buffer.rest
|
80
|
+
value = value.strip
|
81
|
+
puts "adding (single-line) first named value >>#{t.value}<< : >>#{value}<<"
|
82
|
+
@values << [t.value,value]
|
83
|
+
break
|
84
|
+
else
|
85
|
+
t2 = parse_token( match_name: false )
|
86
|
+
puts "adding named value >>#{t.value}<< : >>#{t2.value}<<"
|
87
|
+
@values << [t.value,t2.value]
|
88
|
+
|
89
|
+
if @buffer.peek(1) == ','
|
90
|
+
@buffer.getch ## consume ','
|
91
|
+
elsif @buffer.eos?
|
92
|
+
break
|
93
|
+
else
|
94
|
+
puts "!! format error: expected comma (,) or EOS - rest is >>#{@buffer.rest}<<"
|
95
|
+
break
|
96
|
+
end
|
97
|
+
end
|
98
|
+
else
|
99
|
+
puts "!! format error: expected comma (,) or colon (:) or EOS - rest is >>#{@buffer.rest}<<"
|
100
|
+
break
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
|
106
|
+
|
107
|
+
def match_triple_quoted?
|
108
|
+
## todo/fix: use @buffer.match - """ (next letter MUST Not be "!! e.g.""""" not valid!!!
|
109
|
+
@buffer.peek(3) == %{"""} || ## double triple quotes
|
110
|
+
@buffer.peek(3) == %{'''} ## single triple quotes
|
111
|
+
end
|
112
|
+
|
113
|
+
## todo/fix: use @buffer.match - " (next letter MUST Not be "!! e.g."" not valid!!!
|
114
|
+
def match_quoted?
|
115
|
+
@buffer.peek(1) == %{"} || ## double quote
|
116
|
+
@buffer.peek(1) == %{'} ## single quote
|
117
|
+
end
|
118
|
+
|
119
|
+
|
120
|
+
def parse_triple_quoted
|
121
|
+
token = nil # nothing found
|
122
|
+
|
123
|
+
if @buffer.peek(3) == %{"""} ## double quote
|
124
|
+
@buffer.getch # consume double quote
|
125
|
+
@buffer.getch
|
126
|
+
@buffer.getch
|
127
|
+
value = @buffer.scan_until( /(?=""")/)
|
128
|
+
@buffer.getch # consume double quote
|
129
|
+
@buffer.getch
|
130
|
+
@buffer.getch
|
131
|
+
@buffer.skip( /[ \t]*/ ) ## skip trailing WHITESPACE
|
132
|
+
puts %{quoted """...""" value >>#{value}<<}
|
133
|
+
token = TripleQuoted.new( value )
|
134
|
+
elsif @buffer.peek(3) == %{'''} ## single quote
|
135
|
+
@buffer.getch # consume single quote
|
136
|
+
@buffer.getch
|
137
|
+
@buffer.getch
|
138
|
+
value = @buffer.scan_until( /(?=''')/)
|
139
|
+
@buffer.getch # consume single quote
|
140
|
+
@buffer.getch
|
141
|
+
@buffer.getch
|
142
|
+
@buffer.skip( /[ \t]*/ ) ## skip trailing WHITESPACE
|
143
|
+
puts %{quoted '''...''' value >>#{value}<<}
|
144
|
+
token = TripleQuoted.new( value )
|
145
|
+
else
|
146
|
+
## do nothing; report format error
|
147
|
+
end
|
148
|
+
|
149
|
+
token
|
150
|
+
end
|
151
|
+
|
152
|
+
def parse_quoted
|
153
|
+
token = nil # nothing found
|
154
|
+
|
155
|
+
if @buffer.peek(1) == '"' ## double quote
|
156
|
+
@buffer.getch # consume double quote
|
157
|
+
value = @buffer.scan_until( /(?=")/)
|
158
|
+
@buffer.getch # consume double quote
|
159
|
+
@buffer.skip( /[ \t]*/ ) ## skip trailing WHITESPACE
|
160
|
+
puts %{quoted "..." value >>#{value}<<}
|
161
|
+
token = Quoted.new( value )
|
162
|
+
elsif @buffer.peek(1) == "'" ## single quote
|
163
|
+
@buffer.getch # consume single quote
|
164
|
+
value = @buffer.scan_until( /(?=')/)
|
165
|
+
@buffer.getch # consume single quote
|
166
|
+
@buffer.skip( /[ \t]*/ ) ## skip trailing WHITESPACE
|
167
|
+
puts %{quoted '...' value >>#{value}<<}
|
168
|
+
token = Quoted.new( value )
|
169
|
+
else
|
170
|
+
## do nothing; report format error
|
171
|
+
end
|
172
|
+
|
173
|
+
token
|
174
|
+
end
|
175
|
+
|
176
|
+
|
177
|
+
def parse_unquoted( match_name: true )
|
178
|
+
## unquoted value
|
179
|
+
puts "collect unquoted token (match_name? => #{match_name}) - rest: >>#{@buffer.rest}<<"
|
180
|
+
|
181
|
+
if match_name
|
182
|
+
value = @buffer.scan_until( /(?=[,:]|$)/)
|
183
|
+
|
184
|
+
## check for reserverd "non-keys" e.g.:
|
185
|
+
### https: http:
|
186
|
+
### urn:
|
187
|
+
### mailto:
|
188
|
+
### file:
|
189
|
+
### add some more??
|
190
|
+
## todo/fix: add ip address e.g. 127.0.0.1: too ??
|
191
|
+
reserved_names = %w{ https http urn mailto file }
|
192
|
+
name_regex = %r{^[a-zA-Z0-9._-]+$} ## todo/fix: allow more chars
|
193
|
+
|
194
|
+
if @buffer.peek(1) == ':'
|
195
|
+
if reserved_names.include?( value )
|
196
|
+
## continue scan until next comma or eos(end-of-string)!!!
|
197
|
+
value << @buffer.scan_until( /(?=,|$)/)
|
198
|
+
elsif name_regex.match( value ).nil?
|
199
|
+
## does NOT match name/key pattern
|
200
|
+
## continue scan until next comma or eos(end-of-string)!!!
|
201
|
+
value << @buffer.scan_until( /(?=,|$)/)
|
202
|
+
else
|
203
|
+
## continue
|
204
|
+
end
|
205
|
+
end
|
206
|
+
else ## do NOT match name (named values) e.g. do NOT include colon (:)
|
207
|
+
value = @buffer.scan_until( /(?=[,]|$)/)
|
208
|
+
end
|
209
|
+
|
210
|
+
value = value.rstrip ## right strip whitespace
|
211
|
+
puts "value >>#{value}<<"
|
212
|
+
token = Unquoted.new( value )
|
213
|
+
end
|
214
|
+
|
215
|
+
|
216
|
+
|
217
|
+
def parse_token( match_name: true )
|
218
|
+
@buffer.skip( /[ \t]*/ ) ## skip WHITESPACE
|
219
|
+
|
220
|
+
token = nil # nothing found
|
221
|
+
|
222
|
+
if match_triple_quoted? # """...""" or '''...'''
|
223
|
+
token = parse_triple_quoted
|
224
|
+
elsif match_quoted? # "..." or '...'
|
225
|
+
token = parse_quoted
|
226
|
+
else
|
227
|
+
token = parse_unquoted( match_name: match_name )
|
228
|
+
end
|
229
|
+
token
|
230
|
+
end
|
231
|
+
|
232
|
+
|
233
|
+
|
234
|
+
def parse(str)
|
235
|
+
puts ""
|
236
|
+
puts "**** parse >>#{str}<<"
|
237
|
+
|
238
|
+
@values = []
|
239
|
+
@buffer = StringScanner.new(str)
|
240
|
+
|
241
|
+
parse_line
|
242
|
+
@values
|
243
|
+
end
|
244
|
+
end ## class Parser
|
245
|
+
|
246
|
+
|
247
|
+
|
248
|
+
def self.split( line )
|
249
|
+
parser = Parser.new
|
250
|
+
parser.parse( line )
|
251
|
+
end
|
252
|
+
|
253
|
+
end # module Values
|
254
|
+
|
255
|
+
|
256
|
+
# say hello
|
257
|
+
puts Values.banner if defined?( $RUBYLIBS_DEBUG )
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Values
|
4
|
+
MAJOR = 0 ## todo: namespace inside version or something - why? why not??
|
5
|
+
MINOR = 0
|
6
|
+
PATCH = 3
|
7
|
+
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
8
|
+
|
9
|
+
def self.version
|
10
|
+
VERSION
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.banner
|
14
|
+
"csv11/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.root
|
18
|
+
"#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
|
19
|
+
end
|
20
|
+
end # module Values
|
data/test/helper.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_version.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
|
11
|
+
class TestVersion < MiniTest::Test
|
12
|
+
|
13
|
+
|
14
|
+
def test_version
|
15
|
+
|
16
|
+
puts Values::VERSION
|
17
|
+
assert true
|
18
|
+
## assume everything ok if get here
|
19
|
+
end
|
20
|
+
|
21
|
+
end # class TestVersion
|
metadata
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: csv11
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.3
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Gerald Bauer
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-08-04 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rdoc
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '4.0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '4.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: hoe
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '3.16'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '3.16'
|
41
|
+
description: csv11 - read / parse comma-separated values (csv); supports csv 1.1 incl.
|
42
|
+
comments, named values, multi-line records, and more
|
43
|
+
email: ruby-talk@ruby-lang.org
|
44
|
+
executables: []
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files:
|
47
|
+
- HISTORY.md
|
48
|
+
- Manifest.txt
|
49
|
+
- README.md
|
50
|
+
files:
|
51
|
+
- HISTORY.md
|
52
|
+
- Manifest.txt
|
53
|
+
- README.md
|
54
|
+
- Rakefile
|
55
|
+
- lib/csv11.rb
|
56
|
+
- lib/csv11/version.rb
|
57
|
+
- test/helper.rb
|
58
|
+
- test/test_version.rb
|
59
|
+
homepage: https://github.com/csv11/csv11
|
60
|
+
licenses:
|
61
|
+
- Public Domain
|
62
|
+
metadata: {}
|
63
|
+
post_install_message:
|
64
|
+
rdoc_options:
|
65
|
+
- "--main"
|
66
|
+
- README.md
|
67
|
+
require_paths:
|
68
|
+
- lib
|
69
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
70
|
+
requirements:
|
71
|
+
- - ">="
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
version: 2.2.2
|
74
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
75
|
+
requirements:
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: '0'
|
79
|
+
requirements: []
|
80
|
+
rubyforge_project:
|
81
|
+
rubygems_version: 2.5.2
|
82
|
+
signing_key:
|
83
|
+
specification_version: 4
|
84
|
+
summary: csv11 - read / parse comma-separated values (csv); supports csv 1.1 incl.
|
85
|
+
comments, named values, multi-line records, and more
|
86
|
+
test_files: []
|