sie 3.1.1 → 3.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b28813c3d6a7133a25a4addc2a52c800b14039ca
4
- data.tar.gz: 6c9dce2f745c1c95e4151c43e8aa6b7428ca5ce6
3
+ metadata.gz: 82618d146dcca7af3a358baa84ebabc1349d4e20
4
+ data.tar.gz: cbaa13f1ca6a9eaeb7aa043635df08c25e7f127f
5
5
  SHA512:
6
- metadata.gz: 63ff773eb13b891e0b88ef721b1f96d8befaf02a5bd62fce1b60233e2f850dcae1ccf01ce4b0e6e629463f374bee7c215595811b19a5b70a2dbae7f5fee9075a
7
- data.tar.gz: 1450f07a32f74d87dad7e1989082a04207393e545d7325c5ca1ba829f39462724020c16793f1227e8dad30b9f9d1fb4f72d93bcf645aa24bb66c3b67e123d5e5
6
+ metadata.gz: 0eb13e5a90a1437c06454f43a3e4d4397aa6010487241fe803c92e73fc9fc32fde59054ce762cfc73e09c1c88433eec048fe6d5851887f7c076cd7fb8bfe19ac
7
+ data.tar.gz: e1bbfe37c0f369312b63bc55fcc9a9efe69566e8bb514384b4790a9743105eeab4aeb105343964510d97bc46130b07b21c561ab1d2a164ff58eda5d12e148635
data/.gitignore CHANGED
@@ -15,3 +15,4 @@ spec/reports
15
15
  test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
+ .idea
@@ -3,10 +3,10 @@ require "sie/parser/line_parser"
3
3
 
4
4
  module Sie
5
5
  class Parser
6
- # TODO: Could this format knowledge be centrailized somewhere, some
7
- # of this is duplicated in Character.
8
6
  BEGINNING_OF_ARRAY = "{"
9
7
  END_OF_ARRAY = "}"
8
+
9
+ # TODO: Could this format knowledge be shared with Tokenizer? It's slightly different there.
10
10
  ENTRY = /^\s*#/
11
11
 
12
12
  attr_private :options
@@ -1,6 +1,5 @@
1
1
  require "strscan"
2
2
  require "sie/parser/tokenizer/token"
3
- require "sie/parser/tokenizer/character"
4
3
 
5
4
  module Sie
6
5
  class Parser
@@ -8,22 +7,26 @@ module Sie
8
7
  pattr_initialize :line
9
8
 
10
9
  def tokenize
11
- @tokens = []
12
- @consume = false
13
- @quoted = false
10
+ tokens = []
11
+ check_for_control_characters
14
12
 
15
13
  loop do
16
- move_to_next_character
17
- break unless current_character.value
18
-
19
- if consume?
20
- if quoted?
21
- consume_quoted_value
22
- else
23
- consume_unquoted_value
24
- end
14
+ case
15
+ when whitespace?
16
+ next
17
+ when match = find_entry
18
+ tokens << EntryToken.new(match)
19
+ when begin_array?
20
+ tokens << BeginArrayToken.new
21
+ when end_array?
22
+ tokens << EndArrayToken.new
23
+ when match = find_string
24
+ tokens << StringToken.new(match)
25
+ when end_of_string?
26
+ break
25
27
  else
26
- add_new_token
28
+ # We shouldn't get here, but if we do we need to bail out, otherwise we get an infinite loop.
29
+ fail "Unhandled character in line at position #{scanner.pos}: " + scanner.string
27
30
  end
28
31
  end
29
32
 
@@ -32,55 +35,64 @@ module Sie
32
35
 
33
36
  private
34
37
 
35
- attr_query :consume?, :quoted?
36
- attr_private :consume, :quoted, :tokens, :current_character
38
+ def check_for_control_characters
39
+ if /(.*?)([\x00-\x08\x0a-\x1f\x7f])/.match(line)
40
+ fail "Unhandled character in line at position #{$1.length + 1}: " + scanner.string
41
+ end
42
+ end
37
43
 
38
- def move_to_next_character
39
- @current_character = Character.new(scanner.getch)
44
+ def whitespace?
45
+ scanner.scan(/[ \t]+/)
40
46
  end
41
47
 
42
- def consume_quoted_value
43
- if current_character.quote?
44
- @quoted = false
45
- @consume = false
48
+ def find_entry
49
+ match = scanner.scan(/#\S+/)
50
+
51
+ if match
52
+ match.sub(/\A#/, "")
46
53
  else
47
- add_to_current_token current_character
54
+ nil
48
55
  end
49
56
  end
50
57
 
51
- def consume_unquoted_value
52
- if current_character.unquoted_data?
53
- add_to_current_token current_character
58
+ def begin_array?
59
+ scanner.scan(/#{Sie::Parser::BEGINNING_OF_ARRAY}/)
60
+ end
61
+
62
+ def end_array?
63
+ scanner.scan(/#{Sie::Parser::END_OF_ARRAY}/)
64
+ end
65
+
66
+ def find_string
67
+ match = find_quoted_string || find_unquoted_string
68
+
69
+ if match
70
+ remove_unnessesary_escapes(match)
54
71
  else
55
- @consume = false
72
+ nil
56
73
  end
57
74
  end
58
75
 
59
- def add_new_token
60
- if current_character.entry?
61
- @consume = true
62
- add_token EntryToken.new
63
- elsif current_character.beginning_of_array?
64
- add_token BeginArrayToken.new
65
- elsif current_character.end_of_array?
66
- add_token EndArrayToken.new
67
- elsif current_character.quote?
68
- @consume = @quoted = true
69
- add_token StringToken.new
70
- elsif current_character.non_whitespace?
71
- @consume = true
72
- add_token StringToken.new(current_character.value)
73
- elsif current_character.value != " "
74
- raise "Unhandled character: #{current_character.value}"
76
+ def end_of_string?
77
+ scanner.eos?
78
+ end
79
+
80
+ def find_quoted_string
81
+ match = scanner.scan(/"(\\"|[^"])*"/)
82
+
83
+ if match
84
+ match.sub(/\A"/, "").sub(/"\z/, "")
85
+ else
86
+ nil
75
87
  end
76
88
  end
77
89
 
78
- def add_token(token)
79
- tokens << token
90
+ def find_unquoted_string
91
+ scanner.scan(/\S+/)
80
92
  end
81
93
 
82
- def add_to_current_token(character)
83
- tokens.last.value += character.value
94
+ def remove_unnessesary_escapes(match)
95
+ match.gsub(/\\([\\"])/, "\\1")
84
96
  end
85
97
 
86
98
  def scanner
@@ -1,4 +1,4 @@
1
1
  module Sie
2
2
  # For versioning see: http://semver.org/
3
- VERSION = "3.1.1"
3
+ VERSION = "3.2.0"
4
4
  end
@@ -34,6 +34,62 @@ describe Sie::Parser::Tokenizer do
34
34
  ])
35
35
  end
36
36
 
37
+ it "handles escaped quotes in quoted strings" do
38
+ tokenizer = Sie::Parser::Tokenizer.new('"String with \\" quote"')
39
+ tokens = tokenizer.tokenize
40
+
41
+ expect(token_table_for(tokens)).to eq([
42
+ [ "StringToken", 'String with " quote']
43
+ ])
44
+ end
45
+
46
+ it "handles escaped quotes in non-quoted strings" do
47
+ tokenizer = Sie::Parser::Tokenizer.new('String_with_\\"_quote')
48
+ tokens = tokenizer.tokenize
49
+
50
+ expect(token_table_for(tokens)).to eq([
51
+ [ "StringToken", 'String_with_"_quote']
52
+ ])
53
+ end
54
+
55
+ it "handles escaped backslash in strings" do
56
+ tokenizer = Sie::Parser::Tokenizer.new('"String with \\\\ backslash"')
57
+ tokens = tokenizer.tokenize
58
+
59
+ expect(token_table_for(tokens)).to eq([
60
+ [ "StringToken", 'String with \\ backslash']
61
+ ])
62
+ end
63
+
64
+ it "has reasonable behavior for consecutive escape characters" do
65
+ tokenizer = Sie::Parser::Tokenizer.new('"\\\\\\"\\\\"')
66
+ tokens = tokenizer.tokenize
67
+
68
+ expect(token_table_for(tokens)).to eq([
69
+ [ "StringToken", '\\"\\']
70
+ ])
71
+ end
72
+
73
+ it "handles tab character as field separator" do
74
+ tokenizer = Sie::Parser::Tokenizer.new("#TRANS\t2400")
75
+ tokens = tokenizer.tokenize
76
+
77
+ expect(token_table_for(tokens)).to eq([
78
+ [ "EntryToken", "TRANS"],
79
+ [ "StringToken", "2400"]
80
+ ])
81
+ end
82
+
83
+ it "rejects control characters" do
84
+ codes_not_allowed = (0..8).to_a + (10..31).to_a + [127]
85
+ codes_not_allowed.each do |x|
86
+ tokenizer = Sie::Parser::Tokenizer.new([x].pack("C"))
87
+ expect{tokenizer.tokenize}.to raise_error /Unhandled character/
88
+ end
89
+ end
90
+
91
+ private
92
+
37
93
  def token_table_for(tokens)
38
94
  tokens.map { |token|
39
95
  [ token.class.name.split("::").last, token.value ]
@@ -3,21 +3,40 @@ require "sie/parser"
3
3
 
4
4
  describe Sie::Parser, "parse" do
5
5
  it "parses sie data that includes arrays" do
6
+ data = <<-DATA
7
+ #VER "LF" 2222 20130101 "Foocorp expense"
8
+ {
9
+ #TRANS 2400 {} -200 20130101 "Foocorp expense"
10
+ #TRANS 4100 {} 180 20130101 "Widgets from foocorp"
11
+ #TRANS 2611 {} -20 20130101 "VAT"
12
+ }
13
+ DATA
14
+
6
15
  parser = Sie::Parser.new
7
- sie_file = parser.parse(<<DATA
16
+ sie_file = parser.parse(data)
17
+
18
+ voucher_entry = sie_file.entries.first
19
+ expect(sie_file.entries.size).to eq(1)
20
+ expect(voucher_entry.attributes["verdatum"]).to eq("20130101")
21
+ expect(voucher_entry.entries.size).to eq(3)
22
+ expect(voucher_entry.entries.first.attributes["kontonr"]).to eq("2400")
23
+ end
24
+
25
+ it "handles leading whitespace" do
26
+ data = <<-DATA
8
27
  #VER "LF" 2222 20130101 "Foocorp expense"
9
28
  {
10
29
  #TRANS 2400 {} -200 20130101 "Foocorp expense"
11
30
  #TRANS 4100 {} 180 20130101 "Widgets from foocorp"
12
31
  #TRANS 2611 {} -20 20130101 "VAT"
13
32
  }
14
- DATA
15
- )
33
+ DATA
34
+
35
+ parser = Sie::Parser.new
36
+ sie_file = parser.parse(data)
16
37
 
17
38
  voucher_entry = sie_file.entries.first
18
39
  expect(sie_file.entries.size).to eq(1)
19
- expect(voucher_entry.attributes["verdatum"]).to eq("20130101")
20
40
  expect(voucher_entry.entries.size).to eq(3)
21
- expect(voucher_entry.entries.first.attributes["kontonr"]).to eq("2400")
22
41
  end
23
42
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sie
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.1
4
+ version: 3.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Barsoom AB
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-02 00:00:00.000000000 Z
11
+ date: 2015-10-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: attr_extras
@@ -105,7 +105,6 @@ files:
105
105
  - lib/sie/parser/line_parser.rb
106
106
  - lib/sie/parser/sie_file.rb
107
107
  - lib/sie/parser/tokenizer.rb
108
- - lib/sie/parser/tokenizer/character.rb
109
108
  - lib/sie/parser/tokenizer/token.rb
110
109
  - lib/sie/version.rb
111
110
  - script/bootstrap
@@ -1,34 +0,0 @@
1
- module Sie
2
- class Parser
3
- class Tokenizer
4
- class Character
5
- pattr_initialize :value
6
- attr_reader :value
7
-
8
- def unquoted_data?
9
- non_whitespace? && !end_of_array?
10
- end
11
-
12
- def entry?
13
- value == "#"
14
- end
15
-
16
- def beginning_of_array?
17
- value == "{"
18
- end
19
-
20
- def end_of_array?
21
- value == "}"
22
- end
23
-
24
- def quote?
25
- value == '"'
26
- end
27
-
28
- def non_whitespace?
29
- value != " " && value != "\t"
30
- end
31
- end
32
- end
33
- end
34
- end