eccsv 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.projections.json +10 -0
- data/README.md +34 -0
- data/lib/eccsv.rb +5 -0
- data/lib/eccsv/correction.rb +42 -0
- data/lib/eccsv/lexer.rb +47 -0
- data/lib/eccsv/node.rb +101 -0
- data/lib/eccsv/parser.rb +38 -137
- data/lib/eccsv/parser.y +37 -136
- data/lib/eccsv/stream.rb +77 -0
- data/lib/eccsv/version.rb +1 -1
- data/test.rb +5 -0
- data/test/test_parser.rb +14 -0
- data/test/test_stream.rb +147 -0
- metadata +11 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6cd8161c122089744642e805673f8758e1f9eead
|
4
|
+
data.tar.gz: 2243015e40dfa275dd74b291fc1303fd533ad0bb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ebb75489bb52a02c4c6690476bdaea6a45fe750ed3b16dfee051e511086a50e5c7921e9b0546d340342f3b04febfda9a99869104dc866305177c268aa532c295
|
7
|
+
data.tar.gz: 20b6014bc62d0138e454dec1ca2f71679058cb199104246afb2670958b615145112f58cd92c21d269efca0cca4f12fa7df28b231922be3cf8ff39802defe366c
|
data/.gitignore
CHANGED
data/.projections.json
ADDED
data/README.md
CHANGED
@@ -104,6 +104,40 @@ parser.warnings[0].line #=> 2
|
|
104
104
|
parser.warnings[0].col #=> 4
|
105
105
|
```
|
106
106
|
|
107
|
+
## Corrections
|
108
|
+
|
109
|
+
It is possible to provide corrections to errors by inserting and deleting.
|
110
|
+
|
111
|
+
### Examples
|
112
|
+
|
113
|
+
#### Inserting
|
114
|
+
|
115
|
+
```ruby
|
116
|
+
require 'eccsv'
|
117
|
+
|
118
|
+
data = <<EOF
|
119
|
+
foo",bar
|
120
|
+
EOF
|
121
|
+
|
122
|
+
parser = ECCSV::Parser.new
|
123
|
+
parser.add_correction(1, 1, :insert, '"')
|
124
|
+
parser.parse(data) #=> [["foo", "bar"]]]
|
125
|
+
```
|
126
|
+
|
127
|
+
#### Deleting
|
128
|
+
|
129
|
+
```ruby
|
130
|
+
require 'eccsv'
|
131
|
+
|
132
|
+
data = <<EOF
|
133
|
+
foo",bar
|
134
|
+
EOF
|
135
|
+
|
136
|
+
parser = ECCSV::Parser.new
|
137
|
+
parser.add_correction(1, 4, :delete, 1)
|
138
|
+
parser.parse(data) #=> [["foo", "bar"]]]
|
139
|
+
```
|
140
|
+
|
107
141
|
## Contributing
|
108
142
|
|
109
143
|
1. Fork it
|
data/lib/eccsv.rb
CHANGED
@@ -0,0 +1,42 @@
|
|
1
|
+
module ECCSV
|
2
|
+
class Correction
|
3
|
+
attr_reader :line, :col
|
4
|
+
|
5
|
+
def initialize(line, col, *args)
|
6
|
+
@line = line
|
7
|
+
@col = col
|
8
|
+
end
|
9
|
+
|
10
|
+
def apply(stream)
|
11
|
+
raise NotImplementedError
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
class InsertCorrection < Correction
|
16
|
+
attr_reader :string
|
17
|
+
|
18
|
+
def initialize(line, col, string)
|
19
|
+
super
|
20
|
+
@string = string
|
21
|
+
end
|
22
|
+
|
23
|
+
def length
|
24
|
+
@string.length
|
25
|
+
end
|
26
|
+
|
27
|
+
def apply(stream)
|
28
|
+
stream.insert(@string, @line, @col)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
class DeleteCorrection < Correction
|
33
|
+
def initialize(line, col, amount)
|
34
|
+
super
|
35
|
+
@amount = amount
|
36
|
+
end
|
37
|
+
|
38
|
+
def apply(stream)
|
39
|
+
stream.delete(@amount, @line, @col)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
data/lib/eccsv/lexer.rb
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
module ECCSV
|
2
|
+
class Lexer
|
3
|
+
def initialize(stream)
|
4
|
+
@stream = stream
|
5
|
+
end
|
6
|
+
|
7
|
+
def next_token
|
8
|
+
unless @stream.eof?
|
9
|
+
token = nil
|
10
|
+
match = ""
|
11
|
+
line = @stream.line
|
12
|
+
col = @stream.col
|
13
|
+
|
14
|
+
until @stream.eof?
|
15
|
+
c = @stream.peek
|
16
|
+
if token.nil?
|
17
|
+
match << c
|
18
|
+
@stream.next
|
19
|
+
if c == ","
|
20
|
+
token = :COMMA
|
21
|
+
break
|
22
|
+
elsif c == '"'
|
23
|
+
token = :QUOTE
|
24
|
+
break
|
25
|
+
elsif c == "\n"
|
26
|
+
token = :NEWLINE
|
27
|
+
break
|
28
|
+
else
|
29
|
+
token = :TEXT
|
30
|
+
end
|
31
|
+
elsif c != "," && c != '"' && c != "\n"
|
32
|
+
match << c
|
33
|
+
@stream.next
|
34
|
+
else
|
35
|
+
break
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
if match.length == 0
|
40
|
+
raise "Stream error"
|
41
|
+
end
|
42
|
+
node = Node.new(match, token, line, col)
|
43
|
+
[token, node]
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
data/lib/eccsv/node.rb
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
module ECCSV
|
2
|
+
class Node
|
3
|
+
attr_reader :value, :token, :line, :col
|
4
|
+
|
5
|
+
def initialize(value = "", token = nil, line = nil, col = nil)
|
6
|
+
@value = value
|
7
|
+
@token = token
|
8
|
+
@line = line
|
9
|
+
@col = col
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class ParentNode < Node
|
14
|
+
def initialize(children = [], line = nil, col = nil)
|
15
|
+
last = children.last
|
16
|
+
if last && last.is_a?(Node)
|
17
|
+
line = last.line
|
18
|
+
col = last.col
|
19
|
+
end
|
20
|
+
super(nil, nil, line, col)
|
21
|
+
@children = children
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
class QuotedTextNode < ParentNode
|
26
|
+
def value
|
27
|
+
@value ||= @children.collect(&:value).join
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
class FieldNode < ParentNode
|
32
|
+
def value
|
33
|
+
@value ||=
|
34
|
+
if @children[0].token == :TEXT
|
35
|
+
@children[0].value
|
36
|
+
else
|
37
|
+
# quoted text
|
38
|
+
@children[1].value
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
class DelimFieldNode < ParentNode
|
44
|
+
def value
|
45
|
+
@value ||= @children[0].value
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
class DelimFieldsNode < ParentNode
|
50
|
+
def value
|
51
|
+
@value ||=
|
52
|
+
if @children.empty?
|
53
|
+
[]
|
54
|
+
else
|
55
|
+
@children[0].value + [@children[1].value]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
class RecordNode < ParentNode
|
61
|
+
def value
|
62
|
+
# TODO: 'consume' children to produce value to reduce memory footprint
|
63
|
+
@value ||= @children[0].value + [@children[1].value]
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
class DelimRecordNode < ParentNode
|
68
|
+
def value
|
69
|
+
@value ||= @children.length == 1 ? [] : @children[0].value
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
class DelimRecordsNode < ParentNode
|
74
|
+
def value
|
75
|
+
if @value.nil?
|
76
|
+
if @children.empty?
|
77
|
+
@value = []
|
78
|
+
else
|
79
|
+
@value = @children[0].value
|
80
|
+
val = @children[1].value
|
81
|
+
if !val.empty?
|
82
|
+
@value += [val]
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
@value
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
class RootNode < ParentNode
|
91
|
+
def value
|
92
|
+
if @value.nil?
|
93
|
+
@value = @children[0].value
|
94
|
+
if @children[1]
|
95
|
+
@value += [@children[1].value]
|
96
|
+
end
|
97
|
+
end
|
98
|
+
@value
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
data/lib/eccsv/parser.rb
CHANGED
@@ -12,141 +12,46 @@ module ECCSV
|
|
12
12
|
class Parser < Racc::Parser
|
13
13
|
|
14
14
|
module_eval(<<'...end parser.y/module_eval...', 'parser.y', 36)
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
col = last.col
|
15
|
+
attr_reader :error, :warnings
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@warnings = []
|
19
|
+
@corrections = []
|
20
|
+
end
|
21
|
+
|
22
|
+
def add_correction(line, col, type, *args)
|
23
|
+
klass =
|
24
|
+
case type
|
25
|
+
when :insert
|
26
|
+
InsertCorrection
|
27
|
+
when :delete
|
28
|
+
DeleteCorrection
|
29
|
+
else
|
30
|
+
raise "invalid correction type: #{type.inspect}"
|
32
31
|
end
|
33
|
-
|
34
|
-
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
class QuotedTextNode < ParentNode
|
39
|
-
def value
|
40
|
-
@value ||= @children.collect(&:value).join
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
class FieldNode < ParentNode
|
45
|
-
def value
|
46
|
-
@value ||=
|
47
|
-
if @children[0].token == :TEXT
|
48
|
-
@children[0].value
|
49
|
-
else
|
50
|
-
# quoted text
|
51
|
-
@children[1].value
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
class DelimFieldNode < ParentNode
|
57
|
-
def value
|
58
|
-
@value ||= @children[0].value
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
class DelimFieldsNode < ParentNode
|
63
|
-
def value
|
64
|
-
@value ||=
|
65
|
-
if @children.empty?
|
66
|
-
[]
|
67
|
-
else
|
68
|
-
@children[0].value + [@children[1].value]
|
69
|
-
end
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
class RecordNode < ParentNode
|
74
|
-
def value
|
75
|
-
# TODO: 'consume' children to produce value to reduce memory footprint
|
76
|
-
@value ||= @children[0].value + [@children[1].value]
|
77
|
-
end
|
32
|
+
correction = klass.new(line, col, *args)
|
33
|
+
@corrections << correction
|
78
34
|
end
|
79
35
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
end
|
85
|
-
|
86
|
-
class DelimRecordsNode < ParentNode
|
87
|
-
def value
|
88
|
-
if @value.nil?
|
89
|
-
if @children.empty?
|
90
|
-
@value = []
|
91
|
-
else
|
92
|
-
@value = @children[0].value
|
93
|
-
val = @children[1].value
|
94
|
-
if !val.empty?
|
95
|
-
@value += [val]
|
96
|
-
end
|
97
|
-
end
|
98
|
-
end
|
99
|
-
@value
|
36
|
+
def parse(str)
|
37
|
+
@stream = Stream.new(StringIO.new(str))
|
38
|
+
@corrections.each do |correction|
|
39
|
+
correction.apply(@stream)
|
100
40
|
end
|
41
|
+
@lexer = Lexer.new(@stream)
|
42
|
+
do_parse
|
101
43
|
end
|
102
44
|
|
103
|
-
|
104
|
-
|
105
|
-
if @value.nil?
|
106
|
-
@value = @children[0].value
|
107
|
-
if @children[1]
|
108
|
-
@value += [@children[1].value]
|
109
|
-
end
|
110
|
-
end
|
111
|
-
@value
|
112
|
-
end
|
45
|
+
def curr_line
|
46
|
+
@stream.line
|
113
47
|
end
|
114
48
|
|
115
|
-
|
116
|
-
|
117
|
-
def parse(str)
|
118
|
-
@scanner = StringScanner.new(str)
|
119
|
-
@line = 1
|
120
|
-
@col = 1
|
121
|
-
do_parse
|
49
|
+
def curr_col
|
50
|
+
@stream.col
|
122
51
|
end
|
123
52
|
|
124
53
|
def next_token
|
125
|
-
|
126
|
-
next_line = @line
|
127
|
-
next_col = @col
|
128
|
-
case
|
129
|
-
when match = @scanner.scan(/,/)
|
130
|
-
token = :COMMA
|
131
|
-
when match = @scanner.scan(/"/)
|
132
|
-
token = :QUOTE
|
133
|
-
when match = @scanner.scan(/\n/)
|
134
|
-
token = :NEWLINE
|
135
|
-
next_line += 1
|
136
|
-
next_col = 0
|
137
|
-
when match = @scanner.scan(/[^,\n"]+/)
|
138
|
-
token = :TEXT
|
139
|
-
else
|
140
|
-
raise "can't recognize <#{@scanner.peek(5)}>"
|
141
|
-
end
|
142
|
-
next_col += match.length
|
143
|
-
|
144
|
-
value = node(match, token)
|
145
|
-
@line = next_line
|
146
|
-
@col = next_col
|
147
|
-
|
148
|
-
return [token, value]
|
149
|
-
end
|
54
|
+
@lexer.next_token
|
150
55
|
end
|
151
56
|
|
152
57
|
def warnings
|
@@ -155,27 +60,23 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 36)
|
|
155
60
|
|
156
61
|
private
|
157
62
|
|
158
|
-
def
|
159
|
-
Node.new(value, token, line, col)
|
160
|
-
end
|
161
|
-
|
162
|
-
def quoted_text(children = [], line = @line, col = @col)
|
63
|
+
def quoted_text(children = [], line = curr_line, col = curr_col)
|
163
64
|
QuotedTextNode.new(children, line, col)
|
164
65
|
end
|
165
66
|
|
166
|
-
def field(children = [], line =
|
67
|
+
def field(children = [], line = curr_line, col = curr_col)
|
167
68
|
FieldNode.new(children, line, col)
|
168
69
|
end
|
169
70
|
|
170
|
-
def delim_field(children = [], line =
|
71
|
+
def delim_field(children = [], line = curr_line, col = curr_col)
|
171
72
|
DelimFieldNode.new(children, line, col)
|
172
73
|
end
|
173
74
|
|
174
|
-
def delim_fields(children = [], line =
|
75
|
+
def delim_fields(children = [], line = curr_line, col = curr_col)
|
175
76
|
DelimFieldsNode.new(children, line, col)
|
176
77
|
end
|
177
78
|
|
178
|
-
def record(children = [], line =
|
79
|
+
def record(children = [], line = curr_line, col = curr_col)
|
179
80
|
record = RecordNode.new(children, line, col)
|
180
81
|
value = record.value
|
181
82
|
if defined? @num_fields
|
@@ -196,15 +97,15 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 36)
|
|
196
97
|
record
|
197
98
|
end
|
198
99
|
|
199
|
-
def delim_record(children = [], line =
|
100
|
+
def delim_record(children = [], line = curr_line, col = curr_col)
|
200
101
|
DelimRecordNode.new(children, line, col)
|
201
102
|
end
|
202
103
|
|
203
|
-
def delim_records(children = [], line =
|
104
|
+
def delim_records(children = [], line = curr_line, col = curr_col)
|
204
105
|
DelimRecordsNode.new(children, line, col)
|
205
106
|
end
|
206
107
|
|
207
|
-
def root(children = [], line =
|
108
|
+
def root(children = [], line = curr_line, col = curr_col)
|
208
109
|
RootNode.new(children, line, col)
|
209
110
|
end
|
210
111
|
|
@@ -338,7 +239,7 @@ Racc_token_to_s_table = [
|
|
338
239
|
"delim_field",
|
339
240
|
"quoted_text" ]
|
340
241
|
|
341
|
-
Racc_debug_parser =
|
242
|
+
Racc_debug_parser = false
|
342
243
|
|
343
244
|
##### State transition tables end #####
|
344
245
|
|
data/lib/eccsv/parser.y
CHANGED
@@ -33,141 +33,46 @@ require 'strscan'
|
|
33
33
|
|
34
34
|
module ECCSV
|
35
35
|
---- inner
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
col = last.col
|
36
|
+
attr_reader :error, :warnings
|
37
|
+
|
38
|
+
def initialize
|
39
|
+
@warnings = []
|
40
|
+
@corrections = []
|
41
|
+
end
|
42
|
+
|
43
|
+
def add_correction(line, col, type, *args)
|
44
|
+
klass =
|
45
|
+
case type
|
46
|
+
when :insert
|
47
|
+
InsertCorrection
|
48
|
+
when :delete
|
49
|
+
DeleteCorrection
|
50
|
+
else
|
51
|
+
raise "invalid correction type: #{type.inspect}"
|
53
52
|
end
|
54
|
-
|
55
|
-
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
class QuotedTextNode < ParentNode
|
60
|
-
def value
|
61
|
-
@value ||= @children.collect(&:value).join
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
class FieldNode < ParentNode
|
66
|
-
def value
|
67
|
-
@value ||=
|
68
|
-
if @children[0].token == :TEXT
|
69
|
-
@children[0].value
|
70
|
-
else
|
71
|
-
# quoted text
|
72
|
-
@children[1].value
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
class DelimFieldNode < ParentNode
|
78
|
-
def value
|
79
|
-
@value ||= @children[0].value
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
class DelimFieldsNode < ParentNode
|
84
|
-
def value
|
85
|
-
@value ||=
|
86
|
-
if @children.empty?
|
87
|
-
[]
|
88
|
-
else
|
89
|
-
@children[0].value + [@children[1].value]
|
90
|
-
end
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
class RecordNode < ParentNode
|
95
|
-
def value
|
96
|
-
# TODO: 'consume' children to produce value to reduce memory footprint
|
97
|
-
@value ||= @children[0].value + [@children[1].value]
|
98
|
-
end
|
53
|
+
correction = klass.new(line, col, *args)
|
54
|
+
@corrections << correction
|
99
55
|
end
|
100
56
|
|
101
|
-
|
102
|
-
|
103
|
-
|
57
|
+
def parse(str)
|
58
|
+
@stream = Stream.new(StringIO.new(str))
|
59
|
+
@corrections.each do |correction|
|
60
|
+
correction.apply(@stream)
|
104
61
|
end
|
62
|
+
@lexer = Lexer.new(@stream)
|
63
|
+
do_parse
|
105
64
|
end
|
106
65
|
|
107
|
-
|
108
|
-
|
109
|
-
if @value.nil?
|
110
|
-
if @children.empty?
|
111
|
-
@value = []
|
112
|
-
else
|
113
|
-
@value = @children[0].value
|
114
|
-
val = @children[1].value
|
115
|
-
if !val.empty?
|
116
|
-
@value += [val]
|
117
|
-
end
|
118
|
-
end
|
119
|
-
end
|
120
|
-
@value
|
121
|
-
end
|
66
|
+
def curr_line
|
67
|
+
@stream.line
|
122
68
|
end
|
123
69
|
|
124
|
-
|
125
|
-
|
126
|
-
if @value.nil?
|
127
|
-
@value = @children[0].value
|
128
|
-
if @children[1]
|
129
|
-
@value += [@children[1].value]
|
130
|
-
end
|
131
|
-
end
|
132
|
-
@value
|
133
|
-
end
|
134
|
-
end
|
135
|
-
|
136
|
-
attr_reader :error
|
137
|
-
|
138
|
-
def parse(str)
|
139
|
-
@scanner = StringScanner.new(str)
|
140
|
-
@line = 1
|
141
|
-
@col = 1
|
142
|
-
do_parse
|
70
|
+
def curr_col
|
71
|
+
@stream.col
|
143
72
|
end
|
144
73
|
|
145
74
|
def next_token
|
146
|
-
|
147
|
-
next_line = @line
|
148
|
-
next_col = @col
|
149
|
-
case
|
150
|
-
when match = @scanner.scan(/,/)
|
151
|
-
token = :COMMA
|
152
|
-
when match = @scanner.scan(/"/)
|
153
|
-
token = :QUOTE
|
154
|
-
when match = @scanner.scan(/\n/)
|
155
|
-
token = :NEWLINE
|
156
|
-
next_line += 1
|
157
|
-
next_col = 0
|
158
|
-
when match = @scanner.scan(/[^,\n"]+/)
|
159
|
-
token = :TEXT
|
160
|
-
else
|
161
|
-
raise "can't recognize <#{@scanner.peek(5)}>"
|
162
|
-
end
|
163
|
-
next_col += match.length
|
164
|
-
|
165
|
-
value = node(match, token)
|
166
|
-
@line = next_line
|
167
|
-
@col = next_col
|
168
|
-
|
169
|
-
return [token, value]
|
170
|
-
end
|
75
|
+
@lexer.next_token
|
171
76
|
end
|
172
77
|
|
173
78
|
def warnings
|
@@ -176,27 +81,23 @@ module ECCSV
|
|
176
81
|
|
177
82
|
private
|
178
83
|
|
179
|
-
def
|
180
|
-
Node.new(value, token, line, col)
|
181
|
-
end
|
182
|
-
|
183
|
-
def quoted_text(children = [], line = @line, col = @col)
|
84
|
+
def quoted_text(children = [], line = curr_line, col = curr_col)
|
184
85
|
QuotedTextNode.new(children, line, col)
|
185
86
|
end
|
186
87
|
|
187
|
-
def field(children = [], line =
|
88
|
+
def field(children = [], line = curr_line, col = curr_col)
|
188
89
|
FieldNode.new(children, line, col)
|
189
90
|
end
|
190
91
|
|
191
|
-
def delim_field(children = [], line =
|
92
|
+
def delim_field(children = [], line = curr_line, col = curr_col)
|
192
93
|
DelimFieldNode.new(children, line, col)
|
193
94
|
end
|
194
95
|
|
195
|
-
def delim_fields(children = [], line =
|
96
|
+
def delim_fields(children = [], line = curr_line, col = curr_col)
|
196
97
|
DelimFieldsNode.new(children, line, col)
|
197
98
|
end
|
198
99
|
|
199
|
-
def record(children = [], line =
|
100
|
+
def record(children = [], line = curr_line, col = curr_col)
|
200
101
|
record = RecordNode.new(children, line, col)
|
201
102
|
value = record.value
|
202
103
|
if defined? @num_fields
|
@@ -217,15 +118,15 @@ module ECCSV
|
|
217
118
|
record
|
218
119
|
end
|
219
120
|
|
220
|
-
def delim_record(children = [], line =
|
121
|
+
def delim_record(children = [], line = curr_line, col = curr_col)
|
221
122
|
DelimRecordNode.new(children, line, col)
|
222
123
|
end
|
223
124
|
|
224
|
-
def delim_records(children = [], line =
|
125
|
+
def delim_records(children = [], line = curr_line, col = curr_col)
|
225
126
|
DelimRecordsNode.new(children, line, col)
|
226
127
|
end
|
227
128
|
|
228
|
-
def root(children = [], line =
|
129
|
+
def root(children = [], line = curr_line, col = curr_col)
|
229
130
|
RootNode.new(children, line, col)
|
230
131
|
end
|
231
132
|
|
data/lib/eccsv/stream.rb
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
module ECCSV
|
2
|
+
class Stream
|
3
|
+
attr_reader :line, :col, :pos
|
4
|
+
|
5
|
+
def initialize(io)
|
6
|
+
@io = io
|
7
|
+
@line = 1
|
8
|
+
@col = 1
|
9
|
+
@pos = 0
|
10
|
+
@inserts = Hash.new { |h, k| h[k] = {} }
|
11
|
+
@deletions = Hash.new { |h, k| h[k] = {} }
|
12
|
+
end
|
13
|
+
|
14
|
+
def peek
|
15
|
+
unless defined? @buf
|
16
|
+
@buf = getc
|
17
|
+
end
|
18
|
+
@buf
|
19
|
+
end
|
20
|
+
|
21
|
+
def next
|
22
|
+
if defined? @buf
|
23
|
+
val = @buf
|
24
|
+
remove_instance_variable(:@buf)
|
25
|
+
else
|
26
|
+
val = getc
|
27
|
+
end
|
28
|
+
|
29
|
+
if val
|
30
|
+
if val == "\n"
|
31
|
+
@line += 1
|
32
|
+
@col = 1
|
33
|
+
else
|
34
|
+
@col += 1
|
35
|
+
end
|
36
|
+
@pos += val.bytesize
|
37
|
+
end
|
38
|
+
val
|
39
|
+
end
|
40
|
+
|
41
|
+
def eof?
|
42
|
+
peek.nil?
|
43
|
+
end
|
44
|
+
|
45
|
+
def insert(str, line, col)
|
46
|
+
i = 0
|
47
|
+
str.each_char do |c|
|
48
|
+
@inserts[line][col+i] = c
|
49
|
+
if c == "\n"
|
50
|
+
line += 1
|
51
|
+
col = 1
|
52
|
+
i = 0
|
53
|
+
else
|
54
|
+
i += 1
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def delete(len, line, col)
|
60
|
+
@deletions[line][col] = len
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
|
65
|
+
def getc
|
66
|
+
if @deletions.has_key?(@line) && @deletions[@line].has_key?(@col)
|
67
|
+
@io.seek(@deletions[@line][@col], IO::SEEK_CUR)
|
68
|
+
end
|
69
|
+
|
70
|
+
if @inserts.has_key?(@line) && @inserts[@line].has_key?(@col)
|
71
|
+
@inserts[@line][@col]
|
72
|
+
else
|
73
|
+
@io.getc
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
data/lib/eccsv/version.rb
CHANGED
data/test.rb
ADDED
data/test/test_parser.rb
CHANGED
@@ -227,4 +227,18 @@ class TestParser < Test::Unit::TestCase
|
|
227
227
|
assert error
|
228
228
|
end
|
229
229
|
=end
|
230
|
+
|
231
|
+
test "single insertion correction" do
|
232
|
+
parser = ECCSV::Parser.new
|
233
|
+
parser.add_correction(2, 5, :insert, '"')
|
234
|
+
result = parser.parse(%{foo,bar\n"foo})
|
235
|
+
assert_equal [['foo', 'bar'], ['foo']], result
|
236
|
+
end
|
237
|
+
|
238
|
+
test "single deletion correction" do
|
239
|
+
parser = ECCSV::Parser.new
|
240
|
+
parser.add_correction(1, 1, :delete, 1)
|
241
|
+
result = parser.parse(%{"foo,bar})
|
242
|
+
assert_equal [['foo', 'bar']], result
|
243
|
+
end
|
230
244
|
end
|
data/test/test_stream.rb
ADDED
@@ -0,0 +1,147 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestStream < Test::Unit::TestCase
|
4
|
+
test "#peek returns 1 character without advancing cursor" do
|
5
|
+
io = StringIO.new("foo")
|
6
|
+
stream = ECCSV::Stream.new(io)
|
7
|
+
assert_equal "f", stream.peek
|
8
|
+
assert_equal "f", stream.peek
|
9
|
+
assert_equal "f", stream.peek
|
10
|
+
end
|
11
|
+
|
12
|
+
test "#next advances cursor after peek" do
|
13
|
+
io = StringIO.new("foo")
|
14
|
+
stream = ECCSV::Stream.new(io)
|
15
|
+
assert_equal "f", stream.peek
|
16
|
+
stream.next
|
17
|
+
assert_equal "o", stream.peek
|
18
|
+
stream.next
|
19
|
+
assert_equal "o", stream.peek
|
20
|
+
end
|
21
|
+
|
22
|
+
test "#next advances cursor without peek" do
|
23
|
+
io = StringIO.new("foo")
|
24
|
+
stream = ECCSV::Stream.new(io)
|
25
|
+
stream.next
|
26
|
+
assert_equal "o", stream.peek
|
27
|
+
end
|
28
|
+
|
29
|
+
test "#next increases column" do
|
30
|
+
io = StringIO.new("foo")
|
31
|
+
stream = ECCSV::Stream.new(io)
|
32
|
+
assert_equal 1, stream.col
|
33
|
+
stream.next
|
34
|
+
assert_equal 2, stream.col
|
35
|
+
stream.next
|
36
|
+
assert_equal 3, stream.col
|
37
|
+
end
|
38
|
+
|
39
|
+
test "#next increases line and resets column after reaching newline" do
|
40
|
+
io = StringIO.new("f\noo")
|
41
|
+
stream = ECCSV::Stream.new(io)
|
42
|
+
assert_equal 1, stream.line
|
43
|
+
assert_equal 1, stream.col
|
44
|
+
stream.next
|
45
|
+
assert_equal 1, stream.line
|
46
|
+
assert_equal 2, stream.col
|
47
|
+
stream.next
|
48
|
+
assert_equal 2, stream.line
|
49
|
+
assert_equal 1, stream.col
|
50
|
+
end
|
51
|
+
|
52
|
+
test "#next increases pos for single-byte character" do
|
53
|
+
io = StringIO.new("foo")
|
54
|
+
stream = ECCSV::Stream.new(io)
|
55
|
+
assert_equal 0, stream.pos
|
56
|
+
stream.next
|
57
|
+
assert_equal 1, stream.pos
|
58
|
+
end
|
59
|
+
|
60
|
+
test "#next increases pos for multi-byte character" do
|
61
|
+
io = StringIO.new("♫")
|
62
|
+
stream = ECCSV::Stream.new(io)
|
63
|
+
assert_equal 0, stream.pos
|
64
|
+
stream.next
|
65
|
+
assert_equal 3, stream.pos
|
66
|
+
end
|
67
|
+
|
68
|
+
test "#eof? returns true if at end" do
|
69
|
+
io = StringIO.new("foo")
|
70
|
+
stream = ECCSV::Stream.new(io)
|
71
|
+
stream.peek
|
72
|
+
stream.next
|
73
|
+
stream.peek
|
74
|
+
stream.next
|
75
|
+
stream.peek
|
76
|
+
stream.next
|
77
|
+
assert stream.eof?
|
78
|
+
end
|
79
|
+
|
80
|
+
test "#insert at line and col" do
|
81
|
+
io = StringIO.new("foo")
|
82
|
+
stream = ECCSV::Stream.new(io)
|
83
|
+
stream.insert("x", 1, 2)
|
84
|
+
assert_equal "f", stream.next
|
85
|
+
assert_equal "x", stream.next
|
86
|
+
assert_equal "o", stream.next
|
87
|
+
assert_equal "o", stream.next
|
88
|
+
end
|
89
|
+
|
90
|
+
test "#insert at end of input" do
|
91
|
+
io = StringIO.new("foo")
|
92
|
+
stream = ECCSV::Stream.new(io)
|
93
|
+
stream.insert("x", 1, 4)
|
94
|
+
assert_equal "f", stream.next
|
95
|
+
assert_equal "o", stream.next
|
96
|
+
assert_equal "o", stream.next
|
97
|
+
assert !stream.eof?
|
98
|
+
assert_equal "x", stream.next
|
99
|
+
end
|
100
|
+
|
101
|
+
test "#insert multi-character string" do
|
102
|
+
io = StringIO.new("foo")
|
103
|
+
stream = ECCSV::Stream.new(io)
|
104
|
+
stream.insert("bar", 1, 2)
|
105
|
+
assert_equal "f", stream.next
|
106
|
+
assert_equal "b", stream.next
|
107
|
+
assert_equal "a", stream.next
|
108
|
+
assert_equal "r", stream.next
|
109
|
+
assert_equal "o", stream.next
|
110
|
+
assert_equal "o", stream.next
|
111
|
+
end
|
112
|
+
|
113
|
+
test "#insert newline" do
|
114
|
+
io = StringIO.new("foo")
|
115
|
+
stream = ECCSV::Stream.new(io)
|
116
|
+
stream.insert("bar\nbaz", 1, 2)
|
117
|
+
assert_equal "f", stream.next
|
118
|
+
assert_equal "b", stream.next
|
119
|
+
assert_equal "a", stream.next
|
120
|
+
assert_equal "r", stream.next
|
121
|
+
assert_equal "\n", stream.next
|
122
|
+
assert_equal "b", stream.next
|
123
|
+
assert_equal "a", stream.next
|
124
|
+
assert_equal "z", stream.next
|
125
|
+
assert_equal "o", stream.next
|
126
|
+
assert_equal "o", stream.next
|
127
|
+
end
|
128
|
+
|
129
|
+
test "#delete 1 character at line and col" do
|
130
|
+
io = StringIO.new("foo")
|
131
|
+
stream = ECCSV::Stream.new(io)
|
132
|
+
stream.delete(1, 1, 2)
|
133
|
+
assert_equal "f", stream.next
|
134
|
+
assert_equal "o", stream.next
|
135
|
+
assert stream.eof?
|
136
|
+
end
|
137
|
+
|
138
|
+
test "#delete 3 characters at line and col" do
|
139
|
+
io = StringIO.new("fbaroo")
|
140
|
+
stream = ECCSV::Stream.new(io)
|
141
|
+
stream.delete(3, 1, 2)
|
142
|
+
assert_equal "f", stream.next
|
143
|
+
assert_equal "o", stream.next
|
144
|
+
assert_equal "o", stream.next
|
145
|
+
assert stream.eof?
|
146
|
+
end
|
147
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: eccsv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeremy Stephens
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-10-
|
11
|
+
date: 2014-10-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -74,18 +74,25 @@ extensions: []
|
|
74
74
|
extra_rdoc_files: []
|
75
75
|
files:
|
76
76
|
- ".gitignore"
|
77
|
+
- ".projections.json"
|
77
78
|
- Gemfile
|
78
79
|
- LICENSE.txt
|
79
80
|
- README.md
|
80
81
|
- Rakefile
|
81
82
|
- eccsv.gemspec
|
82
83
|
- lib/eccsv.rb
|
84
|
+
- lib/eccsv/correction.rb
|
83
85
|
- lib/eccsv/errors.rb
|
86
|
+
- lib/eccsv/lexer.rb
|
87
|
+
- lib/eccsv/node.rb
|
84
88
|
- lib/eccsv/parser.rb
|
85
89
|
- lib/eccsv/parser.y
|
90
|
+
- lib/eccsv/stream.rb
|
86
91
|
- lib/eccsv/version.rb
|
92
|
+
- test.rb
|
87
93
|
- test/helper.rb
|
88
94
|
- test/test_parser.rb
|
95
|
+
- test/test_stream.rb
|
89
96
|
homepage: https://github.com/coupler/eccsv
|
90
97
|
licenses:
|
91
98
|
- MIT
|
@@ -113,3 +120,5 @@ summary: CSV library with advanced error reporting
|
|
113
120
|
test_files:
|
114
121
|
- test/helper.rb
|
115
122
|
- test/test_parser.rb
|
123
|
+
- test/test_stream.rb
|
124
|
+
has_rdoc:
|