sie 3.1.1 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/lib/sie/parser.rb +2 -2
- data/lib/sie/parser/tokenizer.rb +59 -47
- data/lib/sie/version.rb +1 -1
- data/spec/unit/parser/tokenizer_spec.rb +56 -0
- data/spec/unit/parser_spec.rb +24 -5
- metadata +2 -3
- data/lib/sie/parser/tokenizer/character.rb +0 -34
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 82618d146dcca7af3a358baa84ebabc1349d4e20
|
4
|
+
data.tar.gz: cbaa13f1ca6a9eaeb7aa043635df08c25e7f127f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0eb13e5a90a1437c06454f43a3e4d4397aa6010487241fe803c92e73fc9fc32fde59054ce762cfc73e09c1c88433eec048fe6d5851887f7c076cd7fb8bfe19ac
|
7
|
+
data.tar.gz: e1bbfe37c0f369312b63bc55fcc9a9efe69566e8bb514384b4790a9743105eeab4aeb105343964510d97bc46130b07b21c561ab1d2a164ff58eda5d12e148635
|
data/.gitignore
CHANGED
data/lib/sie/parser.rb
CHANGED
@@ -3,10 +3,10 @@ require "sie/parser/line_parser"
|
|
3
3
|
|
4
4
|
module Sie
|
5
5
|
class Parser
|
6
|
-
# TODO: Could this format knowledge be centrailized somewhere, some
|
7
|
-
# of this is duplicated in Character.
|
8
6
|
BEGINNING_OF_ARRAY = "{"
|
9
7
|
END_OF_ARRAY = "}"
|
8
|
+
|
9
|
+
# TODO: Could this format knowledge be shared with Tokenizer? It's slightly different there.
|
10
10
|
ENTRY = /^\s*#/
|
11
11
|
|
12
12
|
attr_private :options
|
data/lib/sie/parser/tokenizer.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
require "strscan"
|
2
2
|
require "sie/parser/tokenizer/token"
|
3
|
-
require "sie/parser/tokenizer/character"
|
4
3
|
|
5
4
|
module Sie
|
6
5
|
class Parser
|
@@ -8,22 +7,26 @@ module Sie
|
|
8
7
|
pattr_initialize :line
|
9
8
|
|
10
9
|
def tokenize
|
11
|
-
|
12
|
-
|
13
|
-
@quoted = false
|
10
|
+
tokens = []
|
11
|
+
check_for_control_characters
|
14
12
|
|
15
13
|
loop do
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
14
|
+
case
|
15
|
+
when whitespace?
|
16
|
+
next
|
17
|
+
when match = find_entry
|
18
|
+
tokens << EntryToken.new(match)
|
19
|
+
when begin_array?
|
20
|
+
tokens << BeginArrayToken.new
|
21
|
+
when end_array?
|
22
|
+
tokens << EndArrayToken.new
|
23
|
+
when match = find_string
|
24
|
+
tokens << StringToken.new(match)
|
25
|
+
when end_of_string?
|
26
|
+
break
|
25
27
|
else
|
26
|
-
|
28
|
+
# We shouldn't get here, but if we do we need to bail out, otherwise we get an infinite loop.
|
29
|
+
fail "Unhandled character in line at position #{scanner.pos}: " + scanner.string
|
27
30
|
end
|
28
31
|
end
|
29
32
|
|
@@ -32,55 +35,64 @@ module Sie
|
|
32
35
|
|
33
36
|
private
|
34
37
|
|
35
|
-
|
36
|
-
|
38
|
+
def check_for_control_characters
|
39
|
+
if /(.*?)([\x00-\x08\x0a-\x1f\x7f])/.match(line)
|
40
|
+
fail "Unhandled character in line at position #{$1.length + 1}: " + scanner.string
|
41
|
+
end
|
42
|
+
end
|
37
43
|
|
38
|
-
def
|
39
|
-
|
44
|
+
def whitespace?
|
45
|
+
scanner.scan(/[ \t]+/)
|
40
46
|
end
|
41
47
|
|
42
|
-
def
|
43
|
-
|
44
|
-
|
45
|
-
|
48
|
+
def find_entry
|
49
|
+
match = scanner.scan(/#\S+/)
|
50
|
+
|
51
|
+
if match
|
52
|
+
match.sub(/\A#/, "")
|
46
53
|
else
|
47
|
-
|
54
|
+
nil
|
48
55
|
end
|
49
56
|
end
|
50
57
|
|
51
|
-
def
|
52
|
-
|
53
|
-
|
58
|
+
def begin_array?
|
59
|
+
scanner.scan(/#{Sie::Parser::BEGINNING_OF_ARRAY}/)
|
60
|
+
end
|
61
|
+
|
62
|
+
def end_array?
|
63
|
+
scanner.scan(/#{Sie::Parser::END_OF_ARRAY}/)
|
64
|
+
end
|
65
|
+
|
66
|
+
def find_string
|
67
|
+
match = find_quoted_string || find_unquoted_string
|
68
|
+
|
69
|
+
if match
|
70
|
+
remove_unnessesary_escapes(match)
|
54
71
|
else
|
55
|
-
|
72
|
+
nil
|
56
73
|
end
|
57
74
|
end
|
58
75
|
|
59
|
-
def
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
elsif current_character.non_whitespace?
|
71
|
-
@consume = true
|
72
|
-
add_token StringToken.new(current_character.value)
|
73
|
-
elsif current_character.value != " "
|
74
|
-
raise "Unhandled character: #{current_character.value}"
|
76
|
+
def end_of_string?
|
77
|
+
scanner.eos?
|
78
|
+
end
|
79
|
+
|
80
|
+
def find_quoted_string
|
81
|
+
match = scanner.scan(/"(\\"|[^"])*"/)
|
82
|
+
|
83
|
+
if match
|
84
|
+
match.sub(/\A"/, "").sub(/"\z/, "")
|
85
|
+
else
|
86
|
+
nil
|
75
87
|
end
|
76
88
|
end
|
77
89
|
|
78
|
-
def
|
79
|
-
|
90
|
+
def find_unquoted_string
|
91
|
+
scanner.scan(/\S+/)
|
80
92
|
end
|
81
93
|
|
82
|
-
def
|
83
|
-
|
94
|
+
def remove_unnessesary_escapes(match)
|
95
|
+
match.gsub(/\\([\\"])/, "\\1")
|
84
96
|
end
|
85
97
|
|
86
98
|
def scanner
|
data/lib/sie/version.rb
CHANGED
@@ -34,6 +34,62 @@ describe Sie::Parser::Tokenizer do
|
|
34
34
|
])
|
35
35
|
end
|
36
36
|
|
37
|
+
it "handles escaped quotes in quoted strings" do
|
38
|
+
tokenizer = Sie::Parser::Tokenizer.new('"String with \\" quote"')
|
39
|
+
tokens = tokenizer.tokenize
|
40
|
+
|
41
|
+
expect(token_table_for(tokens)).to eq([
|
42
|
+
[ "StringToken", 'String with " quote']
|
43
|
+
])
|
44
|
+
end
|
45
|
+
|
46
|
+
it "handles escaped quotes in non-quoted strings" do
|
47
|
+
tokenizer = Sie::Parser::Tokenizer.new('String_with_\\"_quote')
|
48
|
+
tokens = tokenizer.tokenize
|
49
|
+
|
50
|
+
expect(token_table_for(tokens)).to eq([
|
51
|
+
[ "StringToken", 'String_with_"_quote']
|
52
|
+
])
|
53
|
+
end
|
54
|
+
|
55
|
+
it "handles escaped backslash in strings" do
|
56
|
+
tokenizer = Sie::Parser::Tokenizer.new('"String with \\\\ backslash"')
|
57
|
+
tokens = tokenizer.tokenize
|
58
|
+
|
59
|
+
expect(token_table_for(tokens)).to eq([
|
60
|
+
[ "StringToken", 'String with \\ backslash']
|
61
|
+
])
|
62
|
+
end
|
63
|
+
|
64
|
+
it "has reasonable behavior for consecutive escape characters" do
|
65
|
+
tokenizer = Sie::Parser::Tokenizer.new('"\\\\\\"\\\\"')
|
66
|
+
tokens = tokenizer.tokenize
|
67
|
+
|
68
|
+
expect(token_table_for(tokens)).to eq([
|
69
|
+
[ "StringToken", '\\"\\']
|
70
|
+
])
|
71
|
+
end
|
72
|
+
|
73
|
+
it "handles tab character as field separator" do
|
74
|
+
tokenizer = Sie::Parser::Tokenizer.new("#TRANS\t2400")
|
75
|
+
tokens = tokenizer.tokenize
|
76
|
+
|
77
|
+
expect(token_table_for(tokens)).to eq([
|
78
|
+
[ "EntryToken", "TRANS"],
|
79
|
+
[ "StringToken", "2400"]
|
80
|
+
])
|
81
|
+
end
|
82
|
+
|
83
|
+
it "rejects control characters" do
|
84
|
+
codes_not_allowed = (0..8).to_a + (10..31).to_a + [127]
|
85
|
+
codes_not_allowed.each do |x|
|
86
|
+
tokenizer = Sie::Parser::Tokenizer.new([x].pack("C"))
|
87
|
+
expect{tokenizer.tokenize}.to raise_error /Unhandled character/
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
37
93
|
def token_table_for(tokens)
|
38
94
|
tokens.map { |token|
|
39
95
|
[ token.class.name.split("::").last, token.value ]
|
data/spec/unit/parser_spec.rb
CHANGED
@@ -3,21 +3,40 @@ require "sie/parser"
|
|
3
3
|
|
4
4
|
describe Sie::Parser, "parse" do
|
5
5
|
it "parses sie data that includes arrays" do
|
6
|
+
data = <<-DATA
|
7
|
+
#VER "LF" 2222 20130101 "Foocorp expense"
|
8
|
+
{
|
9
|
+
#TRANS 2400 {} -200 20130101 "Foocorp expense"
|
10
|
+
#TRANS 4100 {} 180 20130101 "Widgets from foocorp"
|
11
|
+
#TRANS 2611 {} -20 20130101 "VAT"
|
12
|
+
}
|
13
|
+
DATA
|
14
|
+
|
6
15
|
parser = Sie::Parser.new
|
7
|
-
sie_file = parser.parse(
|
16
|
+
sie_file = parser.parse(data)
|
17
|
+
|
18
|
+
voucher_entry = sie_file.entries.first
|
19
|
+
expect(sie_file.entries.size).to eq(1)
|
20
|
+
expect(voucher_entry.attributes["verdatum"]).to eq("20130101")
|
21
|
+
expect(voucher_entry.entries.size).to eq(3)
|
22
|
+
expect(voucher_entry.entries.first.attributes["kontonr"]).to eq("2400")
|
23
|
+
end
|
24
|
+
|
25
|
+
it "handles leading whitespace" do
|
26
|
+
data = <<-DATA
|
8
27
|
#VER "LF" 2222 20130101 "Foocorp expense"
|
9
28
|
{
|
10
29
|
#TRANS 2400 {} -200 20130101 "Foocorp expense"
|
11
30
|
#TRANS 4100 {} 180 20130101 "Widgets from foocorp"
|
12
31
|
#TRANS 2611 {} -20 20130101 "VAT"
|
13
32
|
}
|
14
|
-
DATA
|
15
|
-
|
33
|
+
DATA
|
34
|
+
|
35
|
+
parser = Sie::Parser.new
|
36
|
+
sie_file = parser.parse(data)
|
16
37
|
|
17
38
|
voucher_entry = sie_file.entries.first
|
18
39
|
expect(sie_file.entries.size).to eq(1)
|
19
|
-
expect(voucher_entry.attributes["verdatum"]).to eq("20130101")
|
20
40
|
expect(voucher_entry.entries.size).to eq(3)
|
21
|
-
expect(voucher_entry.entries.first.attributes["kontonr"]).to eq("2400")
|
22
41
|
end
|
23
42
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sie
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Barsoom AB
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: attr_extras
|
@@ -105,7 +105,6 @@ files:
|
|
105
105
|
- lib/sie/parser/line_parser.rb
|
106
106
|
- lib/sie/parser/sie_file.rb
|
107
107
|
- lib/sie/parser/tokenizer.rb
|
108
|
-
- lib/sie/parser/tokenizer/character.rb
|
109
108
|
- lib/sie/parser/tokenizer/token.rb
|
110
109
|
- lib/sie/version.rb
|
111
110
|
- script/bootstrap
|
@@ -1,34 +0,0 @@
|
|
1
|
-
module Sie
|
2
|
-
class Parser
|
3
|
-
class Tokenizer
|
4
|
-
class Character
|
5
|
-
pattr_initialize :value
|
6
|
-
attr_reader :value
|
7
|
-
|
8
|
-
def unquoted_data?
|
9
|
-
non_whitespace? && !end_of_array?
|
10
|
-
end
|
11
|
-
|
12
|
-
def entry?
|
13
|
-
value == "#"
|
14
|
-
end
|
15
|
-
|
16
|
-
def beginning_of_array?
|
17
|
-
value == "{"
|
18
|
-
end
|
19
|
-
|
20
|
-
def end_of_array?
|
21
|
-
value == "}"
|
22
|
-
end
|
23
|
-
|
24
|
-
def quote?
|
25
|
-
value == '"'
|
26
|
-
end
|
27
|
-
|
28
|
-
def non_whitespace?
|
29
|
-
value != " " && value != "\t"
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|