eccsv 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.projections.json +10 -0
- data/README.md +34 -0
- data/lib/eccsv.rb +5 -0
- data/lib/eccsv/correction.rb +42 -0
- data/lib/eccsv/lexer.rb +47 -0
- data/lib/eccsv/node.rb +101 -0
- data/lib/eccsv/parser.rb +38 -137
- data/lib/eccsv/parser.y +37 -136
- data/lib/eccsv/stream.rb +77 -0
- data/lib/eccsv/version.rb +1 -1
- data/test.rb +5 -0
- data/test/test_parser.rb +14 -0
- data/test/test_stream.rb +147 -0
- metadata +11 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6cd8161c122089744642e805673f8758e1f9eead
|
4
|
+
data.tar.gz: 2243015e40dfa275dd74b291fc1303fd533ad0bb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ebb75489bb52a02c4c6690476bdaea6a45fe750ed3b16dfee051e511086a50e5c7921e9b0546d340342f3b04febfda9a99869104dc866305177c268aa532c295
|
7
|
+
data.tar.gz: 20b6014bc62d0138e454dec1ca2f71679058cb199104246afb2670958b615145112f58cd92c21d269efca0cca4f12fa7df28b231922be3cf8ff39802defe366c
|
data/.gitignore
CHANGED
data/.projections.json
ADDED
data/README.md
CHANGED
@@ -104,6 +104,40 @@ parser.warnings[0].line #=> 2
|
|
104
104
|
parser.warnings[0].col #=> 4
|
105
105
|
```
|
106
106
|
|
107
|
+
## Corrections
|
108
|
+
|
109
|
+
It is possible to provide corrections to errors by inserting and deleting.
|
110
|
+
|
111
|
+
### Examples
|
112
|
+
|
113
|
+
#### Inserting
|
114
|
+
|
115
|
+
```ruby
|
116
|
+
require 'eccsv'
|
117
|
+
|
118
|
+
data = <<EOF
|
119
|
+
foo",bar
|
120
|
+
EOF
|
121
|
+
|
122
|
+
parser = ECCSV::Parser.new
|
123
|
+
parser.add_correction(1, 1, :insert, '"')
|
124
|
+
parser.parse(data) #=> [["foo", "bar"]]]
|
125
|
+
```
|
126
|
+
|
127
|
+
#### Deleting
|
128
|
+
|
129
|
+
```ruby
|
130
|
+
require 'eccsv'
|
131
|
+
|
132
|
+
data = <<EOF
|
133
|
+
foo",bar
|
134
|
+
EOF
|
135
|
+
|
136
|
+
parser = ECCSV::Parser.new
|
137
|
+
parser.add_correction(1, 4, :delete, 1)
|
138
|
+
parser.parse(data) #=> [["foo", "bar"]]]
|
139
|
+
```
|
140
|
+
|
107
141
|
## Contributing
|
108
142
|
|
109
143
|
1. Fork it
|
data/lib/eccsv.rb
CHANGED
@@ -0,0 +1,42 @@
|
|
1
|
+
module ECCSV
|
2
|
+
class Correction
|
3
|
+
attr_reader :line, :col
|
4
|
+
|
5
|
+
def initialize(line, col, *args)
|
6
|
+
@line = line
|
7
|
+
@col = col
|
8
|
+
end
|
9
|
+
|
10
|
+
def apply(stream)
|
11
|
+
raise NotImplementedError
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
class InsertCorrection < Correction
|
16
|
+
attr_reader :string
|
17
|
+
|
18
|
+
def initialize(line, col, string)
|
19
|
+
super
|
20
|
+
@string = string
|
21
|
+
end
|
22
|
+
|
23
|
+
def length
|
24
|
+
@string.length
|
25
|
+
end
|
26
|
+
|
27
|
+
def apply(stream)
|
28
|
+
stream.insert(@string, @line, @col)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
class DeleteCorrection < Correction
|
33
|
+
def initialize(line, col, amount)
|
34
|
+
super
|
35
|
+
@amount = amount
|
36
|
+
end
|
37
|
+
|
38
|
+
def apply(stream)
|
39
|
+
stream.delete(@amount, @line, @col)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
data/lib/eccsv/lexer.rb
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
module ECCSV
|
2
|
+
class Lexer
|
3
|
+
def initialize(stream)
|
4
|
+
@stream = stream
|
5
|
+
end
|
6
|
+
|
7
|
+
def next_token
|
8
|
+
unless @stream.eof?
|
9
|
+
token = nil
|
10
|
+
match = ""
|
11
|
+
line = @stream.line
|
12
|
+
col = @stream.col
|
13
|
+
|
14
|
+
until @stream.eof?
|
15
|
+
c = @stream.peek
|
16
|
+
if token.nil?
|
17
|
+
match << c
|
18
|
+
@stream.next
|
19
|
+
if c == ","
|
20
|
+
token = :COMMA
|
21
|
+
break
|
22
|
+
elsif c == '"'
|
23
|
+
token = :QUOTE
|
24
|
+
break
|
25
|
+
elsif c == "\n"
|
26
|
+
token = :NEWLINE
|
27
|
+
break
|
28
|
+
else
|
29
|
+
token = :TEXT
|
30
|
+
end
|
31
|
+
elsif c != "," && c != '"' && c != "\n"
|
32
|
+
match << c
|
33
|
+
@stream.next
|
34
|
+
else
|
35
|
+
break
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
if match.length == 0
|
40
|
+
raise "Stream error"
|
41
|
+
end
|
42
|
+
node = Node.new(match, token, line, col)
|
43
|
+
[token, node]
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
data/lib/eccsv/node.rb
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
module ECCSV
|
2
|
+
class Node
|
3
|
+
attr_reader :value, :token, :line, :col
|
4
|
+
|
5
|
+
def initialize(value = "", token = nil, line = nil, col = nil)
|
6
|
+
@value = value
|
7
|
+
@token = token
|
8
|
+
@line = line
|
9
|
+
@col = col
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class ParentNode < Node
|
14
|
+
def initialize(children = [], line = nil, col = nil)
|
15
|
+
last = children.last
|
16
|
+
if last && last.is_a?(Node)
|
17
|
+
line = last.line
|
18
|
+
col = last.col
|
19
|
+
end
|
20
|
+
super(nil, nil, line, col)
|
21
|
+
@children = children
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
class QuotedTextNode < ParentNode
|
26
|
+
def value
|
27
|
+
@value ||= @children.collect(&:value).join
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
class FieldNode < ParentNode
|
32
|
+
def value
|
33
|
+
@value ||=
|
34
|
+
if @children[0].token == :TEXT
|
35
|
+
@children[0].value
|
36
|
+
else
|
37
|
+
# quoted text
|
38
|
+
@children[1].value
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
class DelimFieldNode < ParentNode
|
44
|
+
def value
|
45
|
+
@value ||= @children[0].value
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
class DelimFieldsNode < ParentNode
|
50
|
+
def value
|
51
|
+
@value ||=
|
52
|
+
if @children.empty?
|
53
|
+
[]
|
54
|
+
else
|
55
|
+
@children[0].value + [@children[1].value]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
class RecordNode < ParentNode
|
61
|
+
def value
|
62
|
+
# TODO: 'consume' children to produce value to reduce memory footprint
|
63
|
+
@value ||= @children[0].value + [@children[1].value]
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
class DelimRecordNode < ParentNode
|
68
|
+
def value
|
69
|
+
@value ||= @children.length == 1 ? [] : @children[0].value
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
class DelimRecordsNode < ParentNode
|
74
|
+
def value
|
75
|
+
if @value.nil?
|
76
|
+
if @children.empty?
|
77
|
+
@value = []
|
78
|
+
else
|
79
|
+
@value = @children[0].value
|
80
|
+
val = @children[1].value
|
81
|
+
if !val.empty?
|
82
|
+
@value += [val]
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
@value
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
class RootNode < ParentNode
|
91
|
+
def value
|
92
|
+
if @value.nil?
|
93
|
+
@value = @children[0].value
|
94
|
+
if @children[1]
|
95
|
+
@value += [@children[1].value]
|
96
|
+
end
|
97
|
+
end
|
98
|
+
@value
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
data/lib/eccsv/parser.rb
CHANGED
@@ -12,141 +12,46 @@ module ECCSV
|
|
12
12
|
class Parser < Racc::Parser
|
13
13
|
|
14
14
|
module_eval(<<'...end parser.y/module_eval...', 'parser.y', 36)
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
col = last.col
|
15
|
+
attr_reader :error, :warnings
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@warnings = []
|
19
|
+
@corrections = []
|
20
|
+
end
|
21
|
+
|
22
|
+
def add_correction(line, col, type, *args)
|
23
|
+
klass =
|
24
|
+
case type
|
25
|
+
when :insert
|
26
|
+
InsertCorrection
|
27
|
+
when :delete
|
28
|
+
DeleteCorrection
|
29
|
+
else
|
30
|
+
raise "invalid correction type: #{type.inspect}"
|
32
31
|
end
|
33
|
-
|
34
|
-
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
class QuotedTextNode < ParentNode
|
39
|
-
def value
|
40
|
-
@value ||= @children.collect(&:value).join
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
class FieldNode < ParentNode
|
45
|
-
def value
|
46
|
-
@value ||=
|
47
|
-
if @children[0].token == :TEXT
|
48
|
-
@children[0].value
|
49
|
-
else
|
50
|
-
# quoted text
|
51
|
-
@children[1].value
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
class DelimFieldNode < ParentNode
|
57
|
-
def value
|
58
|
-
@value ||= @children[0].value
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
class DelimFieldsNode < ParentNode
|
63
|
-
def value
|
64
|
-
@value ||=
|
65
|
-
if @children.empty?
|
66
|
-
[]
|
67
|
-
else
|
68
|
-
@children[0].value + [@children[1].value]
|
69
|
-
end
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
class RecordNode < ParentNode
|
74
|
-
def value
|
75
|
-
# TODO: 'consume' children to produce value to reduce memory footprint
|
76
|
-
@value ||= @children[0].value + [@children[1].value]
|
77
|
-
end
|
32
|
+
correction = klass.new(line, col, *args)
|
33
|
+
@corrections << correction
|
78
34
|
end
|
79
35
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
end
|
85
|
-
|
86
|
-
class DelimRecordsNode < ParentNode
|
87
|
-
def value
|
88
|
-
if @value.nil?
|
89
|
-
if @children.empty?
|
90
|
-
@value = []
|
91
|
-
else
|
92
|
-
@value = @children[0].value
|
93
|
-
val = @children[1].value
|
94
|
-
if !val.empty?
|
95
|
-
@value += [val]
|
96
|
-
end
|
97
|
-
end
|
98
|
-
end
|
99
|
-
@value
|
36
|
+
def parse(str)
|
37
|
+
@stream = Stream.new(StringIO.new(str))
|
38
|
+
@corrections.each do |correction|
|
39
|
+
correction.apply(@stream)
|
100
40
|
end
|
41
|
+
@lexer = Lexer.new(@stream)
|
42
|
+
do_parse
|
101
43
|
end
|
102
44
|
|
103
|
-
|
104
|
-
|
105
|
-
if @value.nil?
|
106
|
-
@value = @children[0].value
|
107
|
-
if @children[1]
|
108
|
-
@value += [@children[1].value]
|
109
|
-
end
|
110
|
-
end
|
111
|
-
@value
|
112
|
-
end
|
45
|
+
def curr_line
|
46
|
+
@stream.line
|
113
47
|
end
|
114
48
|
|
115
|
-
|
116
|
-
|
117
|
-
def parse(str)
|
118
|
-
@scanner = StringScanner.new(str)
|
119
|
-
@line = 1
|
120
|
-
@col = 1
|
121
|
-
do_parse
|
49
|
+
def curr_col
|
50
|
+
@stream.col
|
122
51
|
end
|
123
52
|
|
124
53
|
def next_token
|
125
|
-
|
126
|
-
next_line = @line
|
127
|
-
next_col = @col
|
128
|
-
case
|
129
|
-
when match = @scanner.scan(/,/)
|
130
|
-
token = :COMMA
|
131
|
-
when match = @scanner.scan(/"/)
|
132
|
-
token = :QUOTE
|
133
|
-
when match = @scanner.scan(/\n/)
|
134
|
-
token = :NEWLINE
|
135
|
-
next_line += 1
|
136
|
-
next_col = 0
|
137
|
-
when match = @scanner.scan(/[^,\n"]+/)
|
138
|
-
token = :TEXT
|
139
|
-
else
|
140
|
-
raise "can't recognize <#{@scanner.peek(5)}>"
|
141
|
-
end
|
142
|
-
next_col += match.length
|
143
|
-
|
144
|
-
value = node(match, token)
|
145
|
-
@line = next_line
|
146
|
-
@col = next_col
|
147
|
-
|
148
|
-
return [token, value]
|
149
|
-
end
|
54
|
+
@lexer.next_token
|
150
55
|
end
|
151
56
|
|
152
57
|
def warnings
|
@@ -155,27 +60,23 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 36)
|
|
155
60
|
|
156
61
|
private
|
157
62
|
|
158
|
-
def
|
159
|
-
Node.new(value, token, line, col)
|
160
|
-
end
|
161
|
-
|
162
|
-
def quoted_text(children = [], line = @line, col = @col)
|
63
|
+
def quoted_text(children = [], line = curr_line, col = curr_col)
|
163
64
|
QuotedTextNode.new(children, line, col)
|
164
65
|
end
|
165
66
|
|
166
|
-
def field(children = [], line =
|
67
|
+
def field(children = [], line = curr_line, col = curr_col)
|
167
68
|
FieldNode.new(children, line, col)
|
168
69
|
end
|
169
70
|
|
170
|
-
def delim_field(children = [], line =
|
71
|
+
def delim_field(children = [], line = curr_line, col = curr_col)
|
171
72
|
DelimFieldNode.new(children, line, col)
|
172
73
|
end
|
173
74
|
|
174
|
-
def delim_fields(children = [], line =
|
75
|
+
def delim_fields(children = [], line = curr_line, col = curr_col)
|
175
76
|
DelimFieldsNode.new(children, line, col)
|
176
77
|
end
|
177
78
|
|
178
|
-
def record(children = [], line =
|
79
|
+
def record(children = [], line = curr_line, col = curr_col)
|
179
80
|
record = RecordNode.new(children, line, col)
|
180
81
|
value = record.value
|
181
82
|
if defined? @num_fields
|
@@ -196,15 +97,15 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 36)
|
|
196
97
|
record
|
197
98
|
end
|
198
99
|
|
199
|
-
def delim_record(children = [], line =
|
100
|
+
def delim_record(children = [], line = curr_line, col = curr_col)
|
200
101
|
DelimRecordNode.new(children, line, col)
|
201
102
|
end
|
202
103
|
|
203
|
-
def delim_records(children = [], line =
|
104
|
+
def delim_records(children = [], line = curr_line, col = curr_col)
|
204
105
|
DelimRecordsNode.new(children, line, col)
|
205
106
|
end
|
206
107
|
|
207
|
-
def root(children = [], line =
|
108
|
+
def root(children = [], line = curr_line, col = curr_col)
|
208
109
|
RootNode.new(children, line, col)
|
209
110
|
end
|
210
111
|
|
@@ -338,7 +239,7 @@ Racc_token_to_s_table = [
|
|
338
239
|
"delim_field",
|
339
240
|
"quoted_text" ]
|
340
241
|
|
341
|
-
Racc_debug_parser =
|
242
|
+
Racc_debug_parser = false
|
342
243
|
|
343
244
|
##### State transition tables end #####
|
344
245
|
|
data/lib/eccsv/parser.y
CHANGED
@@ -33,141 +33,46 @@ require 'strscan'
|
|
33
33
|
|
34
34
|
module ECCSV
|
35
35
|
---- inner
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
col = last.col
|
36
|
+
attr_reader :error, :warnings
|
37
|
+
|
38
|
+
def initialize
|
39
|
+
@warnings = []
|
40
|
+
@corrections = []
|
41
|
+
end
|
42
|
+
|
43
|
+
def add_correction(line, col, type, *args)
|
44
|
+
klass =
|
45
|
+
case type
|
46
|
+
when :insert
|
47
|
+
InsertCorrection
|
48
|
+
when :delete
|
49
|
+
DeleteCorrection
|
50
|
+
else
|
51
|
+
raise "invalid correction type: #{type.inspect}"
|
53
52
|
end
|
54
|
-
|
55
|
-
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
class QuotedTextNode < ParentNode
|
60
|
-
def value
|
61
|
-
@value ||= @children.collect(&:value).join
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
class FieldNode < ParentNode
|
66
|
-
def value
|
67
|
-
@value ||=
|
68
|
-
if @children[0].token == :TEXT
|
69
|
-
@children[0].value
|
70
|
-
else
|
71
|
-
# quoted text
|
72
|
-
@children[1].value
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
class DelimFieldNode < ParentNode
|
78
|
-
def value
|
79
|
-
@value ||= @children[0].value
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
class DelimFieldsNode < ParentNode
|
84
|
-
def value
|
85
|
-
@value ||=
|
86
|
-
if @children.empty?
|
87
|
-
[]
|
88
|
-
else
|
89
|
-
@children[0].value + [@children[1].value]
|
90
|
-
end
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
class RecordNode < ParentNode
|
95
|
-
def value
|
96
|
-
# TODO: 'consume' children to produce value to reduce memory footprint
|
97
|
-
@value ||= @children[0].value + [@children[1].value]
|
98
|
-
end
|
53
|
+
correction = klass.new(line, col, *args)
|
54
|
+
@corrections << correction
|
99
55
|
end
|
100
56
|
|
101
|
-
|
102
|
-
|
103
|
-
|
57
|
+
def parse(str)
|
58
|
+
@stream = Stream.new(StringIO.new(str))
|
59
|
+
@corrections.each do |correction|
|
60
|
+
correction.apply(@stream)
|
104
61
|
end
|
62
|
+
@lexer = Lexer.new(@stream)
|
63
|
+
do_parse
|
105
64
|
end
|
106
65
|
|
107
|
-
|
108
|
-
|
109
|
-
if @value.nil?
|
110
|
-
if @children.empty?
|
111
|
-
@value = []
|
112
|
-
else
|
113
|
-
@value = @children[0].value
|
114
|
-
val = @children[1].value
|
115
|
-
if !val.empty?
|
116
|
-
@value += [val]
|
117
|
-
end
|
118
|
-
end
|
119
|
-
end
|
120
|
-
@value
|
121
|
-
end
|
66
|
+
def curr_line
|
67
|
+
@stream.line
|
122
68
|
end
|
123
69
|
|
124
|
-
|
125
|
-
|
126
|
-
if @value.nil?
|
127
|
-
@value = @children[0].value
|
128
|
-
if @children[1]
|
129
|
-
@value += [@children[1].value]
|
130
|
-
end
|
131
|
-
end
|
132
|
-
@value
|
133
|
-
end
|
134
|
-
end
|
135
|
-
|
136
|
-
attr_reader :error
|
137
|
-
|
138
|
-
def parse(str)
|
139
|
-
@scanner = StringScanner.new(str)
|
140
|
-
@line = 1
|
141
|
-
@col = 1
|
142
|
-
do_parse
|
70
|
+
def curr_col
|
71
|
+
@stream.col
|
143
72
|
end
|
144
73
|
|
145
74
|
def next_token
|
146
|
-
|
147
|
-
next_line = @line
|
148
|
-
next_col = @col
|
149
|
-
case
|
150
|
-
when match = @scanner.scan(/,/)
|
151
|
-
token = :COMMA
|
152
|
-
when match = @scanner.scan(/"/)
|
153
|
-
token = :QUOTE
|
154
|
-
when match = @scanner.scan(/\n/)
|
155
|
-
token = :NEWLINE
|
156
|
-
next_line += 1
|
157
|
-
next_col = 0
|
158
|
-
when match = @scanner.scan(/[^,\n"]+/)
|
159
|
-
token = :TEXT
|
160
|
-
else
|
161
|
-
raise "can't recognize <#{@scanner.peek(5)}>"
|
162
|
-
end
|
163
|
-
next_col += match.length
|
164
|
-
|
165
|
-
value = node(match, token)
|
166
|
-
@line = next_line
|
167
|
-
@col = next_col
|
168
|
-
|
169
|
-
return [token, value]
|
170
|
-
end
|
75
|
+
@lexer.next_token
|
171
76
|
end
|
172
77
|
|
173
78
|
def warnings
|
@@ -176,27 +81,23 @@ module ECCSV
|
|
176
81
|
|
177
82
|
private
|
178
83
|
|
179
|
-
def
|
180
|
-
Node.new(value, token, line, col)
|
181
|
-
end
|
182
|
-
|
183
|
-
def quoted_text(children = [], line = @line, col = @col)
|
84
|
+
def quoted_text(children = [], line = curr_line, col = curr_col)
|
184
85
|
QuotedTextNode.new(children, line, col)
|
185
86
|
end
|
186
87
|
|
187
|
-
def field(children = [], line =
|
88
|
+
def field(children = [], line = curr_line, col = curr_col)
|
188
89
|
FieldNode.new(children, line, col)
|
189
90
|
end
|
190
91
|
|
191
|
-
def delim_field(children = [], line =
|
92
|
+
def delim_field(children = [], line = curr_line, col = curr_col)
|
192
93
|
DelimFieldNode.new(children, line, col)
|
193
94
|
end
|
194
95
|
|
195
|
-
def delim_fields(children = [], line =
|
96
|
+
def delim_fields(children = [], line = curr_line, col = curr_col)
|
196
97
|
DelimFieldsNode.new(children, line, col)
|
197
98
|
end
|
198
99
|
|
199
|
-
def record(children = [], line =
|
100
|
+
def record(children = [], line = curr_line, col = curr_col)
|
200
101
|
record = RecordNode.new(children, line, col)
|
201
102
|
value = record.value
|
202
103
|
if defined? @num_fields
|
@@ -217,15 +118,15 @@ module ECCSV
|
|
217
118
|
record
|
218
119
|
end
|
219
120
|
|
220
|
-
def delim_record(children = [], line =
|
121
|
+
def delim_record(children = [], line = curr_line, col = curr_col)
|
221
122
|
DelimRecordNode.new(children, line, col)
|
222
123
|
end
|
223
124
|
|
224
|
-
def delim_records(children = [], line =
|
125
|
+
def delim_records(children = [], line = curr_line, col = curr_col)
|
225
126
|
DelimRecordsNode.new(children, line, col)
|
226
127
|
end
|
227
128
|
|
228
|
-
def root(children = [], line =
|
129
|
+
def root(children = [], line = curr_line, col = curr_col)
|
229
130
|
RootNode.new(children, line, col)
|
230
131
|
end
|
231
132
|
|
data/lib/eccsv/stream.rb
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
module ECCSV
|
2
|
+
class Stream
|
3
|
+
attr_reader :line, :col, :pos
|
4
|
+
|
5
|
+
def initialize(io)
|
6
|
+
@io = io
|
7
|
+
@line = 1
|
8
|
+
@col = 1
|
9
|
+
@pos = 0
|
10
|
+
@inserts = Hash.new { |h, k| h[k] = {} }
|
11
|
+
@deletions = Hash.new { |h, k| h[k] = {} }
|
12
|
+
end
|
13
|
+
|
14
|
+
def peek
|
15
|
+
unless defined? @buf
|
16
|
+
@buf = getc
|
17
|
+
end
|
18
|
+
@buf
|
19
|
+
end
|
20
|
+
|
21
|
+
def next
|
22
|
+
if defined? @buf
|
23
|
+
val = @buf
|
24
|
+
remove_instance_variable(:@buf)
|
25
|
+
else
|
26
|
+
val = getc
|
27
|
+
end
|
28
|
+
|
29
|
+
if val
|
30
|
+
if val == "\n"
|
31
|
+
@line += 1
|
32
|
+
@col = 1
|
33
|
+
else
|
34
|
+
@col += 1
|
35
|
+
end
|
36
|
+
@pos += val.bytesize
|
37
|
+
end
|
38
|
+
val
|
39
|
+
end
|
40
|
+
|
41
|
+
def eof?
|
42
|
+
peek.nil?
|
43
|
+
end
|
44
|
+
|
45
|
+
def insert(str, line, col)
|
46
|
+
i = 0
|
47
|
+
str.each_char do |c|
|
48
|
+
@inserts[line][col+i] = c
|
49
|
+
if c == "\n"
|
50
|
+
line += 1
|
51
|
+
col = 1
|
52
|
+
i = 0
|
53
|
+
else
|
54
|
+
i += 1
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def delete(len, line, col)
|
60
|
+
@deletions[line][col] = len
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
|
65
|
+
def getc
|
66
|
+
if @deletions.has_key?(@line) && @deletions[@line].has_key?(@col)
|
67
|
+
@io.seek(@deletions[@line][@col], IO::SEEK_CUR)
|
68
|
+
end
|
69
|
+
|
70
|
+
if @inserts.has_key?(@line) && @inserts[@line].has_key?(@col)
|
71
|
+
@inserts[@line][@col]
|
72
|
+
else
|
73
|
+
@io.getc
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
data/lib/eccsv/version.rb
CHANGED
data/test.rb
ADDED
data/test/test_parser.rb
CHANGED
@@ -227,4 +227,18 @@ class TestParser < Test::Unit::TestCase
|
|
227
227
|
assert error
|
228
228
|
end
|
229
229
|
=end
|
230
|
+
|
231
|
+
test "single insertion correction" do
|
232
|
+
parser = ECCSV::Parser.new
|
233
|
+
parser.add_correction(2, 5, :insert, '"')
|
234
|
+
result = parser.parse(%{foo,bar\n"foo})
|
235
|
+
assert_equal [['foo', 'bar'], ['foo']], result
|
236
|
+
end
|
237
|
+
|
238
|
+
test "single deletion correction" do
|
239
|
+
parser = ECCSV::Parser.new
|
240
|
+
parser.add_correction(1, 1, :delete, 1)
|
241
|
+
result = parser.parse(%{"foo,bar})
|
242
|
+
assert_equal [['foo', 'bar']], result
|
243
|
+
end
|
230
244
|
end
|
data/test/test_stream.rb
ADDED
@@ -0,0 +1,147 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestStream < Test::Unit::TestCase
|
4
|
+
test "#peek returns 1 character without advancing cursor" do
|
5
|
+
io = StringIO.new("foo")
|
6
|
+
stream = ECCSV::Stream.new(io)
|
7
|
+
assert_equal "f", stream.peek
|
8
|
+
assert_equal "f", stream.peek
|
9
|
+
assert_equal "f", stream.peek
|
10
|
+
end
|
11
|
+
|
12
|
+
test "#next advances cursor after peek" do
|
13
|
+
io = StringIO.new("foo")
|
14
|
+
stream = ECCSV::Stream.new(io)
|
15
|
+
assert_equal "f", stream.peek
|
16
|
+
stream.next
|
17
|
+
assert_equal "o", stream.peek
|
18
|
+
stream.next
|
19
|
+
assert_equal "o", stream.peek
|
20
|
+
end
|
21
|
+
|
22
|
+
test "#next advances cursor without peek" do
|
23
|
+
io = StringIO.new("foo")
|
24
|
+
stream = ECCSV::Stream.new(io)
|
25
|
+
stream.next
|
26
|
+
assert_equal "o", stream.peek
|
27
|
+
end
|
28
|
+
|
29
|
+
test "#next increases column" do
|
30
|
+
io = StringIO.new("foo")
|
31
|
+
stream = ECCSV::Stream.new(io)
|
32
|
+
assert_equal 1, stream.col
|
33
|
+
stream.next
|
34
|
+
assert_equal 2, stream.col
|
35
|
+
stream.next
|
36
|
+
assert_equal 3, stream.col
|
37
|
+
end
|
38
|
+
|
39
|
+
test "#next increases line and resets column after reaching newline" do
|
40
|
+
io = StringIO.new("f\noo")
|
41
|
+
stream = ECCSV::Stream.new(io)
|
42
|
+
assert_equal 1, stream.line
|
43
|
+
assert_equal 1, stream.col
|
44
|
+
stream.next
|
45
|
+
assert_equal 1, stream.line
|
46
|
+
assert_equal 2, stream.col
|
47
|
+
stream.next
|
48
|
+
assert_equal 2, stream.line
|
49
|
+
assert_equal 1, stream.col
|
50
|
+
end
|
51
|
+
|
52
|
+
test "#next increases pos for single-byte character" do
|
53
|
+
io = StringIO.new("foo")
|
54
|
+
stream = ECCSV::Stream.new(io)
|
55
|
+
assert_equal 0, stream.pos
|
56
|
+
stream.next
|
57
|
+
assert_equal 1, stream.pos
|
58
|
+
end
|
59
|
+
|
60
|
+
test "#next increases pos for multi-byte character" do
|
61
|
+
io = StringIO.new("♫")
|
62
|
+
stream = ECCSV::Stream.new(io)
|
63
|
+
assert_equal 0, stream.pos
|
64
|
+
stream.next
|
65
|
+
assert_equal 3, stream.pos
|
66
|
+
end
|
67
|
+
|
68
|
+
test "#eof? returns true if at end" do
|
69
|
+
io = StringIO.new("foo")
|
70
|
+
stream = ECCSV::Stream.new(io)
|
71
|
+
stream.peek
|
72
|
+
stream.next
|
73
|
+
stream.peek
|
74
|
+
stream.next
|
75
|
+
stream.peek
|
76
|
+
stream.next
|
77
|
+
assert stream.eof?
|
78
|
+
end
|
79
|
+
|
80
|
+
test "#insert at line and col" do
|
81
|
+
io = StringIO.new("foo")
|
82
|
+
stream = ECCSV::Stream.new(io)
|
83
|
+
stream.insert("x", 1, 2)
|
84
|
+
assert_equal "f", stream.next
|
85
|
+
assert_equal "x", stream.next
|
86
|
+
assert_equal "o", stream.next
|
87
|
+
assert_equal "o", stream.next
|
88
|
+
end
|
89
|
+
|
90
|
+
test "#insert at end of input" do
|
91
|
+
io = StringIO.new("foo")
|
92
|
+
stream = ECCSV::Stream.new(io)
|
93
|
+
stream.insert("x", 1, 4)
|
94
|
+
assert_equal "f", stream.next
|
95
|
+
assert_equal "o", stream.next
|
96
|
+
assert_equal "o", stream.next
|
97
|
+
assert !stream.eof?
|
98
|
+
assert_equal "x", stream.next
|
99
|
+
end
|
100
|
+
|
101
|
+
test "#insert multi-character string" do
|
102
|
+
io = StringIO.new("foo")
|
103
|
+
stream = ECCSV::Stream.new(io)
|
104
|
+
stream.insert("bar", 1, 2)
|
105
|
+
assert_equal "f", stream.next
|
106
|
+
assert_equal "b", stream.next
|
107
|
+
assert_equal "a", stream.next
|
108
|
+
assert_equal "r", stream.next
|
109
|
+
assert_equal "o", stream.next
|
110
|
+
assert_equal "o", stream.next
|
111
|
+
end
|
112
|
+
|
113
|
+
test "#insert newline" do
|
114
|
+
io = StringIO.new("foo")
|
115
|
+
stream = ECCSV::Stream.new(io)
|
116
|
+
stream.insert("bar\nbaz", 1, 2)
|
117
|
+
assert_equal "f", stream.next
|
118
|
+
assert_equal "b", stream.next
|
119
|
+
assert_equal "a", stream.next
|
120
|
+
assert_equal "r", stream.next
|
121
|
+
assert_equal "\n", stream.next
|
122
|
+
assert_equal "b", stream.next
|
123
|
+
assert_equal "a", stream.next
|
124
|
+
assert_equal "z", stream.next
|
125
|
+
assert_equal "o", stream.next
|
126
|
+
assert_equal "o", stream.next
|
127
|
+
end
|
128
|
+
|
129
|
+
test "#delete 1 character at line and col" do
|
130
|
+
io = StringIO.new("foo")
|
131
|
+
stream = ECCSV::Stream.new(io)
|
132
|
+
stream.delete(1, 1, 2)
|
133
|
+
assert_equal "f", stream.next
|
134
|
+
assert_equal "o", stream.next
|
135
|
+
assert stream.eof?
|
136
|
+
end
|
137
|
+
|
138
|
+
test "#delete 3 characters at line and col" do
|
139
|
+
io = StringIO.new("fbaroo")
|
140
|
+
stream = ECCSV::Stream.new(io)
|
141
|
+
stream.delete(3, 1, 2)
|
142
|
+
assert_equal "f", stream.next
|
143
|
+
assert_equal "o", stream.next
|
144
|
+
assert_equal "o", stream.next
|
145
|
+
assert stream.eof?
|
146
|
+
end
|
147
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: eccsv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeremy Stephens
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-10-
|
11
|
+
date: 2014-10-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -74,18 +74,25 @@ extensions: []
|
|
74
74
|
extra_rdoc_files: []
|
75
75
|
files:
|
76
76
|
- ".gitignore"
|
77
|
+
- ".projections.json"
|
77
78
|
- Gemfile
|
78
79
|
- LICENSE.txt
|
79
80
|
- README.md
|
80
81
|
- Rakefile
|
81
82
|
- eccsv.gemspec
|
82
83
|
- lib/eccsv.rb
|
84
|
+
- lib/eccsv/correction.rb
|
83
85
|
- lib/eccsv/errors.rb
|
86
|
+
- lib/eccsv/lexer.rb
|
87
|
+
- lib/eccsv/node.rb
|
84
88
|
- lib/eccsv/parser.rb
|
85
89
|
- lib/eccsv/parser.y
|
90
|
+
- lib/eccsv/stream.rb
|
86
91
|
- lib/eccsv/version.rb
|
92
|
+
- test.rb
|
87
93
|
- test/helper.rb
|
88
94
|
- test/test_parser.rb
|
95
|
+
- test/test_stream.rb
|
89
96
|
homepage: https://github.com/coupler/eccsv
|
90
97
|
licenses:
|
91
98
|
- MIT
|
@@ -113,3 +120,5 @@ summary: CSV library with advanced error reporting
|
|
113
120
|
test_files:
|
114
121
|
- test/helper.rb
|
115
122
|
- test/test_parser.rb
|
123
|
+
- test/test_stream.rb
|
124
|
+
has_rdoc:
|