sie 3.1.1 → 3.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/lib/sie/parser.rb +2 -2
- data/lib/sie/parser/tokenizer.rb +59 -47
- data/lib/sie/version.rb +1 -1
- data/spec/unit/parser/tokenizer_spec.rb +56 -0
- data/spec/unit/parser_spec.rb +24 -5
- metadata +2 -3
- data/lib/sie/parser/tokenizer/character.rb +0 -34
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 82618d146dcca7af3a358baa84ebabc1349d4e20
|
4
|
+
data.tar.gz: cbaa13f1ca6a9eaeb7aa043635df08c25e7f127f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0eb13e5a90a1437c06454f43a3e4d4397aa6010487241fe803c92e73fc9fc32fde59054ce762cfc73e09c1c88433eec048fe6d5851887f7c076cd7fb8bfe19ac
|
7
|
+
data.tar.gz: e1bbfe37c0f369312b63bc55fcc9a9efe69566e8bb514384b4790a9743105eeab4aeb105343964510d97bc46130b07b21c561ab1d2a164ff58eda5d12e148635
|
data/.gitignore
CHANGED
data/lib/sie/parser.rb
CHANGED
@@ -3,10 +3,10 @@ require "sie/parser/line_parser"
|
|
3
3
|
|
4
4
|
module Sie
|
5
5
|
class Parser
|
6
|
-
# TODO: Could this format knowledge be centrailized somewhere, some
|
7
|
-
# of this is duplicated in Character.
|
8
6
|
BEGINNING_OF_ARRAY = "{"
|
9
7
|
END_OF_ARRAY = "}"
|
8
|
+
|
9
|
+
# TODO: Could this format knowledge be shared with Tokenizer? It's slightly different there.
|
10
10
|
ENTRY = /^\s*#/
|
11
11
|
|
12
12
|
attr_private :options
|
data/lib/sie/parser/tokenizer.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
require "strscan"
|
2
2
|
require "sie/parser/tokenizer/token"
|
3
|
-
require "sie/parser/tokenizer/character"
|
4
3
|
|
5
4
|
module Sie
|
6
5
|
class Parser
|
@@ -8,22 +7,26 @@ module Sie
|
|
8
7
|
pattr_initialize :line
|
9
8
|
|
10
9
|
def tokenize
|
11
|
-
|
12
|
-
|
13
|
-
@quoted = false
|
10
|
+
tokens = []
|
11
|
+
check_for_control_characters
|
14
12
|
|
15
13
|
loop do
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
14
|
+
case
|
15
|
+
when whitespace?
|
16
|
+
next
|
17
|
+
when match = find_entry
|
18
|
+
tokens << EntryToken.new(match)
|
19
|
+
when begin_array?
|
20
|
+
tokens << BeginArrayToken.new
|
21
|
+
when end_array?
|
22
|
+
tokens << EndArrayToken.new
|
23
|
+
when match = find_string
|
24
|
+
tokens << StringToken.new(match)
|
25
|
+
when end_of_string?
|
26
|
+
break
|
25
27
|
else
|
26
|
-
|
28
|
+
# We shouldn't get here, but if we do we need to bail out, otherwise we get an infinite loop.
|
29
|
+
fail "Unhandled character in line at position #{scanner.pos}: " + scanner.string
|
27
30
|
end
|
28
31
|
end
|
29
32
|
|
@@ -32,55 +35,64 @@ module Sie
|
|
32
35
|
|
33
36
|
private
|
34
37
|
|
35
|
-
|
36
|
-
|
38
|
+
def check_for_control_characters
|
39
|
+
if /(.*?)([\x00-\x08\x0a-\x1f\x7f])/.match(line)
|
40
|
+
fail "Unhandled character in line at position #{$1.length + 1}: " + scanner.string
|
41
|
+
end
|
42
|
+
end
|
37
43
|
|
38
|
-
def
|
39
|
-
|
44
|
+
def whitespace?
|
45
|
+
scanner.scan(/[ \t]+/)
|
40
46
|
end
|
41
47
|
|
42
|
-
def
|
43
|
-
|
44
|
-
|
45
|
-
|
48
|
+
def find_entry
|
49
|
+
match = scanner.scan(/#\S+/)
|
50
|
+
|
51
|
+
if match
|
52
|
+
match.sub(/\A#/, "")
|
46
53
|
else
|
47
|
-
|
54
|
+
nil
|
48
55
|
end
|
49
56
|
end
|
50
57
|
|
51
|
-
def
|
52
|
-
|
53
|
-
|
58
|
+
def begin_array?
|
59
|
+
scanner.scan(/#{Sie::Parser::BEGINNING_OF_ARRAY}/)
|
60
|
+
end
|
61
|
+
|
62
|
+
def end_array?
|
63
|
+
scanner.scan(/#{Sie::Parser::END_OF_ARRAY}/)
|
64
|
+
end
|
65
|
+
|
66
|
+
def find_string
|
67
|
+
match = find_quoted_string || find_unquoted_string
|
68
|
+
|
69
|
+
if match
|
70
|
+
remove_unnessesary_escapes(match)
|
54
71
|
else
|
55
|
-
|
72
|
+
nil
|
56
73
|
end
|
57
74
|
end
|
58
75
|
|
59
|
-
def
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
elsif current_character.non_whitespace?
|
71
|
-
@consume = true
|
72
|
-
add_token StringToken.new(current_character.value)
|
73
|
-
elsif current_character.value != " "
|
74
|
-
raise "Unhandled character: #{current_character.value}"
|
76
|
+
def end_of_string?
|
77
|
+
scanner.eos?
|
78
|
+
end
|
79
|
+
|
80
|
+
def find_quoted_string
|
81
|
+
match = scanner.scan(/"(\\"|[^"])*"/)
|
82
|
+
|
83
|
+
if match
|
84
|
+
match.sub(/\A"/, "").sub(/"\z/, "")
|
85
|
+
else
|
86
|
+
nil
|
75
87
|
end
|
76
88
|
end
|
77
89
|
|
78
|
-
def
|
79
|
-
|
90
|
+
def find_unquoted_string
|
91
|
+
scanner.scan(/\S+/)
|
80
92
|
end
|
81
93
|
|
82
|
-
def
|
83
|
-
|
94
|
+
def remove_unnessesary_escapes(match)
|
95
|
+
match.gsub(/\\([\\"])/, "\\1")
|
84
96
|
end
|
85
97
|
|
86
98
|
def scanner
|
data/lib/sie/version.rb
CHANGED
@@ -34,6 +34,62 @@ describe Sie::Parser::Tokenizer do
|
|
34
34
|
])
|
35
35
|
end
|
36
36
|
|
37
|
+
it "handles escaped quotes in quoted strings" do
|
38
|
+
tokenizer = Sie::Parser::Tokenizer.new('"String with \\" quote"')
|
39
|
+
tokens = tokenizer.tokenize
|
40
|
+
|
41
|
+
expect(token_table_for(tokens)).to eq([
|
42
|
+
[ "StringToken", 'String with " quote']
|
43
|
+
])
|
44
|
+
end
|
45
|
+
|
46
|
+
it "handles escaped quotes in non-quoted strings" do
|
47
|
+
tokenizer = Sie::Parser::Tokenizer.new('String_with_\\"_quote')
|
48
|
+
tokens = tokenizer.tokenize
|
49
|
+
|
50
|
+
expect(token_table_for(tokens)).to eq([
|
51
|
+
[ "StringToken", 'String_with_"_quote']
|
52
|
+
])
|
53
|
+
end
|
54
|
+
|
55
|
+
it "handles escaped backslash in strings" do
|
56
|
+
tokenizer = Sie::Parser::Tokenizer.new('"String with \\\\ backslash"')
|
57
|
+
tokens = tokenizer.tokenize
|
58
|
+
|
59
|
+
expect(token_table_for(tokens)).to eq([
|
60
|
+
[ "StringToken", 'String with \\ backslash']
|
61
|
+
])
|
62
|
+
end
|
63
|
+
|
64
|
+
it "has reasonable behavior for consecutive escape characters" do
|
65
|
+
tokenizer = Sie::Parser::Tokenizer.new('"\\\\\\"\\\\"')
|
66
|
+
tokens = tokenizer.tokenize
|
67
|
+
|
68
|
+
expect(token_table_for(tokens)).to eq([
|
69
|
+
[ "StringToken", '\\"\\']
|
70
|
+
])
|
71
|
+
end
|
72
|
+
|
73
|
+
it "handles tab character as field separator" do
|
74
|
+
tokenizer = Sie::Parser::Tokenizer.new("#TRANS\t2400")
|
75
|
+
tokens = tokenizer.tokenize
|
76
|
+
|
77
|
+
expect(token_table_for(tokens)).to eq([
|
78
|
+
[ "EntryToken", "TRANS"],
|
79
|
+
[ "StringToken", "2400"]
|
80
|
+
])
|
81
|
+
end
|
82
|
+
|
83
|
+
it "rejects control characters" do
|
84
|
+
codes_not_allowed = (0..8).to_a + (10..31).to_a + [127]
|
85
|
+
codes_not_allowed.each do |x|
|
86
|
+
tokenizer = Sie::Parser::Tokenizer.new([x].pack("C"))
|
87
|
+
expect{tokenizer.tokenize}.to raise_error /Unhandled character/
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
37
93
|
def token_table_for(tokens)
|
38
94
|
tokens.map { |token|
|
39
95
|
[ token.class.name.split("::").last, token.value ]
|
data/spec/unit/parser_spec.rb
CHANGED
@@ -3,21 +3,40 @@ require "sie/parser"
|
|
3
3
|
|
4
4
|
describe Sie::Parser, "parse" do
|
5
5
|
it "parses sie data that includes arrays" do
|
6
|
+
data = <<-DATA
|
7
|
+
#VER "LF" 2222 20130101 "Foocorp expense"
|
8
|
+
{
|
9
|
+
#TRANS 2400 {} -200 20130101 "Foocorp expense"
|
10
|
+
#TRANS 4100 {} 180 20130101 "Widgets from foocorp"
|
11
|
+
#TRANS 2611 {} -20 20130101 "VAT"
|
12
|
+
}
|
13
|
+
DATA
|
14
|
+
|
6
15
|
parser = Sie::Parser.new
|
7
|
-
sie_file = parser.parse(
|
16
|
+
sie_file = parser.parse(data)
|
17
|
+
|
18
|
+
voucher_entry = sie_file.entries.first
|
19
|
+
expect(sie_file.entries.size).to eq(1)
|
20
|
+
expect(voucher_entry.attributes["verdatum"]).to eq("20130101")
|
21
|
+
expect(voucher_entry.entries.size).to eq(3)
|
22
|
+
expect(voucher_entry.entries.first.attributes["kontonr"]).to eq("2400")
|
23
|
+
end
|
24
|
+
|
25
|
+
it "handles leading whitespace" do
|
26
|
+
data = <<-DATA
|
8
27
|
#VER "LF" 2222 20130101 "Foocorp expense"
|
9
28
|
{
|
10
29
|
#TRANS 2400 {} -200 20130101 "Foocorp expense"
|
11
30
|
#TRANS 4100 {} 180 20130101 "Widgets from foocorp"
|
12
31
|
#TRANS 2611 {} -20 20130101 "VAT"
|
13
32
|
}
|
14
|
-
DATA
|
15
|
-
|
33
|
+
DATA
|
34
|
+
|
35
|
+
parser = Sie::Parser.new
|
36
|
+
sie_file = parser.parse(data)
|
16
37
|
|
17
38
|
voucher_entry = sie_file.entries.first
|
18
39
|
expect(sie_file.entries.size).to eq(1)
|
19
|
-
expect(voucher_entry.attributes["verdatum"]).to eq("20130101")
|
20
40
|
expect(voucher_entry.entries.size).to eq(3)
|
21
|
-
expect(voucher_entry.entries.first.attributes["kontonr"]).to eq("2400")
|
22
41
|
end
|
23
42
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sie
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Barsoom AB
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: attr_extras
|
@@ -105,7 +105,6 @@ files:
|
|
105
105
|
- lib/sie/parser/line_parser.rb
|
106
106
|
- lib/sie/parser/sie_file.rb
|
107
107
|
- lib/sie/parser/tokenizer.rb
|
108
|
-
- lib/sie/parser/tokenizer/character.rb
|
109
108
|
- lib/sie/parser/tokenizer/token.rb
|
110
109
|
- lib/sie/version.rb
|
111
110
|
- script/bootstrap
|
@@ -1,34 +0,0 @@
|
|
1
|
-
module Sie
|
2
|
-
class Parser
|
3
|
-
class Tokenizer
|
4
|
-
class Character
|
5
|
-
pattr_initialize :value
|
6
|
-
attr_reader :value
|
7
|
-
|
8
|
-
def unquoted_data?
|
9
|
-
non_whitespace? && !end_of_array?
|
10
|
-
end
|
11
|
-
|
12
|
-
def entry?
|
13
|
-
value == "#"
|
14
|
-
end
|
15
|
-
|
16
|
-
def beginning_of_array?
|
17
|
-
value == "{"
|
18
|
-
end
|
19
|
-
|
20
|
-
def end_of_array?
|
21
|
-
value == "}"
|
22
|
-
end
|
23
|
-
|
24
|
-
def quote?
|
25
|
-
value == '"'
|
26
|
-
end
|
27
|
-
|
28
|
-
def non_whitespace?
|
29
|
-
value != " " && value != "\t"
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|