sie 3.1.1 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b28813c3d6a7133a25a4addc2a52c800b14039ca
4
- data.tar.gz: 6c9dce2f745c1c95e4151c43e8aa6b7428ca5ce6
3
+ metadata.gz: 82618d146dcca7af3a358baa84ebabc1349d4e20
4
+ data.tar.gz: cbaa13f1ca6a9eaeb7aa043635df08c25e7f127f
5
5
  SHA512:
6
- metadata.gz: 63ff773eb13b891e0b88ef721b1f96d8befaf02a5bd62fce1b60233e2f850dcae1ccf01ce4b0e6e629463f374bee7c215595811b19a5b70a2dbae7f5fee9075a
7
- data.tar.gz: 1450f07a32f74d87dad7e1989082a04207393e545d7325c5ca1ba829f39462724020c16793f1227e8dad30b9f9d1fb4f72d93bcf645aa24bb66c3b67e123d5e5
6
+ metadata.gz: 0eb13e5a90a1437c06454f43a3e4d4397aa6010487241fe803c92e73fc9fc32fde59054ce762cfc73e09c1c88433eec048fe6d5851887f7c076cd7fb8bfe19ac
7
+ data.tar.gz: e1bbfe37c0f369312b63bc55fcc9a9efe69566e8bb514384b4790a9743105eeab4aeb105343964510d97bc46130b07b21c561ab1d2a164ff58eda5d12e148635
data/.gitignore CHANGED
@@ -15,3 +15,4 @@ spec/reports
15
15
  test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
+ .idea
@@ -3,10 +3,10 @@ require "sie/parser/line_parser"
3
3
 
4
4
  module Sie
5
5
  class Parser
6
- # TODO: Could this format knowledge be centrailized somewhere, some
7
- # of this is duplicated in Character.
8
6
  BEGINNING_OF_ARRAY = "{"
9
7
  END_OF_ARRAY = "}"
8
+
9
+ # TODO: Could this format knowledge be shared with Tokenizer? It's slightly different there.
10
10
  ENTRY = /^\s*#/
11
11
 
12
12
  attr_private :options
@@ -1,6 +1,5 @@
1
1
  require "strscan"
2
2
  require "sie/parser/tokenizer/token"
3
- require "sie/parser/tokenizer/character"
4
3
 
5
4
  module Sie
6
5
  class Parser
@@ -8,22 +7,26 @@ module Sie
8
7
  pattr_initialize :line
9
8
 
10
9
  def tokenize
11
- @tokens = []
12
- @consume = false
13
- @quoted = false
10
+ tokens = []
11
+ check_for_control_characters
14
12
 
15
13
  loop do
16
- move_to_next_character
17
- break unless current_character.value
18
-
19
- if consume?
20
- if quoted?
21
- consume_quoted_value
22
- else
23
- consume_unquoted_value
24
- end
14
+ case
15
+ when whitespace?
16
+ next
17
+ when match = find_entry
18
+ tokens << EntryToken.new(match)
19
+ when begin_array?
20
+ tokens << BeginArrayToken.new
21
+ when end_array?
22
+ tokens << EndArrayToken.new
23
+ when match = find_string
24
+ tokens << StringToken.new(match)
25
+ when end_of_string?
26
+ break
25
27
  else
26
- add_new_token
28
+ # We shouldn't get here, but if we do we need to bail out, otherwise we get an infinite loop.
29
+ fail "Unhandled character in line at position #{scanner.pos}: " + scanner.string
27
30
  end
28
31
  end
29
32
 
@@ -32,55 +35,64 @@ module Sie
32
35
 
33
36
  private
34
37
 
35
- attr_query :consume?, :quoted?
36
- attr_private :consume, :quoted, :tokens, :current_character
38
+ def check_for_control_characters
39
+ if /(.*?)([\x00-\x08\x0a-\x1f\x7f])/.match(line)
40
+ fail "Unhandled character in line at position #{$1.length + 1}: " + scanner.string
41
+ end
42
+ end
37
43
 
38
- def move_to_next_character
39
- @current_character = Character.new(scanner.getch)
44
+ def whitespace?
45
+ scanner.scan(/[ \t]+/)
40
46
  end
41
47
 
42
- def consume_quoted_value
43
- if current_character.quote?
44
- @quoted = false
45
- @consume = false
48
+ def find_entry
49
+ match = scanner.scan(/#\S+/)
50
+
51
+ if match
52
+ match.sub(/\A#/, "")
46
53
  else
47
- add_to_current_token current_character
54
+ nil
48
55
  end
49
56
  end
50
57
 
51
- def consume_unquoted_value
52
- if current_character.unquoted_data?
53
- add_to_current_token current_character
58
+ def begin_array?
59
+ scanner.scan(/#{Sie::Parser::BEGINNING_OF_ARRAY}/)
60
+ end
61
+
62
+ def end_array?
63
+ scanner.scan(/#{Sie::Parser::END_OF_ARRAY}/)
64
+ end
65
+
66
+ def find_string
67
+ match = find_quoted_string || find_unquoted_string
68
+
69
+ if match
70
+ remove_unnessesary_escapes(match)
54
71
  else
55
- @consume = false
72
+ nil
56
73
  end
57
74
  end
58
75
 
59
- def add_new_token
60
- if current_character.entry?
61
- @consume = true
62
- add_token EntryToken.new
63
- elsif current_character.beginning_of_array?
64
- add_token BeginArrayToken.new
65
- elsif current_character.end_of_array?
66
- add_token EndArrayToken.new
67
- elsif current_character.quote?
68
- @consume = @quoted = true
69
- add_token StringToken.new
70
- elsif current_character.non_whitespace?
71
- @consume = true
72
- add_token StringToken.new(current_character.value)
73
- elsif current_character.value != " "
74
- raise "Unhandled character: #{current_character.value}"
76
+ def end_of_string?
77
+ scanner.eos?
78
+ end
79
+
80
+ def find_quoted_string
81
+ match = scanner.scan(/"(\\"|[^"])*"/)
82
+
83
+ if match
84
+ match.sub(/\A"/, "").sub(/"\z/, "")
85
+ else
86
+ nil
75
87
  end
76
88
  end
77
89
 
78
- def add_token(token)
79
- tokens << token
90
+ def find_unquoted_string
91
+ scanner.scan(/\S+/)
80
92
  end
81
93
 
82
- def add_to_current_token(character)
83
- tokens.last.value += character.value
94
+ def remove_unnessesary_escapes(match)
95
+ match.gsub(/\\([\\"])/, "\\1")
84
96
  end
85
97
 
86
98
  def scanner
@@ -1,4 +1,4 @@
1
1
  module Sie
2
2
  # For versioning see: http://semver.org/
3
- VERSION = "3.1.1"
3
+ VERSION = "3.2.0"
4
4
  end
@@ -34,6 +34,62 @@ describe Sie::Parser::Tokenizer do
34
34
  ])
35
35
  end
36
36
 
37
+ it "handles escaped quotes in quoted strings" do
38
+ tokenizer = Sie::Parser::Tokenizer.new('"String with \\" quote"')
39
+ tokens = tokenizer.tokenize
40
+
41
+ expect(token_table_for(tokens)).to eq([
42
+ [ "StringToken", 'String with " quote']
43
+ ])
44
+ end
45
+
46
+ it "handles escaped quotes in non-quoted strings" do
47
+ tokenizer = Sie::Parser::Tokenizer.new('String_with_\\"_quote')
48
+ tokens = tokenizer.tokenize
49
+
50
+ expect(token_table_for(tokens)).to eq([
51
+ [ "StringToken", 'String_with_"_quote']
52
+ ])
53
+ end
54
+
55
+ it "handles escaped backslash in strings" do
56
+ tokenizer = Sie::Parser::Tokenizer.new('"String with \\\\ backslash"')
57
+ tokens = tokenizer.tokenize
58
+
59
+ expect(token_table_for(tokens)).to eq([
60
+ [ "StringToken", 'String with \\ backslash']
61
+ ])
62
+ end
63
+
64
+ it "has reasonable behavior for consecutive escape characters" do
65
+ tokenizer = Sie::Parser::Tokenizer.new('"\\\\\\"\\\\"')
66
+ tokens = tokenizer.tokenize
67
+
68
+ expect(token_table_for(tokens)).to eq([
69
+ [ "StringToken", '\\"\\']
70
+ ])
71
+ end
72
+
73
+ it "handles tab character as field separator" do
74
+ tokenizer = Sie::Parser::Tokenizer.new("#TRANS\t2400")
75
+ tokens = tokenizer.tokenize
76
+
77
+ expect(token_table_for(tokens)).to eq([
78
+ [ "EntryToken", "TRANS"],
79
+ [ "StringToken", "2400"]
80
+ ])
81
+ end
82
+
83
+ it "rejects control characters" do
84
+ codes_not_allowed = (0..8).to_a + (10..31).to_a + [127]
85
+ codes_not_allowed.each do |x|
86
+ tokenizer = Sie::Parser::Tokenizer.new([x].pack("C"))
87
+ expect{tokenizer.tokenize}.to raise_error /Unhandled character/
88
+ end
89
+ end
90
+
91
+ private
92
+
37
93
  def token_table_for(tokens)
38
94
  tokens.map { |token|
39
95
  [ token.class.name.split("::").last, token.value ]
@@ -3,21 +3,40 @@ require "sie/parser"
3
3
 
4
4
  describe Sie::Parser, "parse" do
5
5
  it "parses sie data that includes arrays" do
6
+ data = <<-DATA
7
+ #VER "LF" 2222 20130101 "Foocorp expense"
8
+ {
9
+ #TRANS 2400 {} -200 20130101 "Foocorp expense"
10
+ #TRANS 4100 {} 180 20130101 "Widgets from foocorp"
11
+ #TRANS 2611 {} -20 20130101 "VAT"
12
+ }
13
+ DATA
14
+
6
15
  parser = Sie::Parser.new
7
- sie_file = parser.parse(<<DATA
16
+ sie_file = parser.parse(data)
17
+
18
+ voucher_entry = sie_file.entries.first
19
+ expect(sie_file.entries.size).to eq(1)
20
+ expect(voucher_entry.attributes["verdatum"]).to eq("20130101")
21
+ expect(voucher_entry.entries.size).to eq(3)
22
+ expect(voucher_entry.entries.first.attributes["kontonr"]).to eq("2400")
23
+ end
24
+
25
+ it "handles leading whitespace" do
26
+ data = <<-DATA
8
27
  #VER "LF" 2222 20130101 "Foocorp expense"
9
28
  {
10
29
  #TRANS 2400 {} -200 20130101 "Foocorp expense"
11
30
  #TRANS 4100 {} 180 20130101 "Widgets from foocorp"
12
31
  #TRANS 2611 {} -20 20130101 "VAT"
13
32
  }
14
- DATA
15
- )
33
+ DATA
34
+
35
+ parser = Sie::Parser.new
36
+ sie_file = parser.parse(data)
16
37
 
17
38
  voucher_entry = sie_file.entries.first
18
39
  expect(sie_file.entries.size).to eq(1)
19
- expect(voucher_entry.attributes["verdatum"]).to eq("20130101")
20
40
  expect(voucher_entry.entries.size).to eq(3)
21
- expect(voucher_entry.entries.first.attributes["kontonr"]).to eq("2400")
22
41
  end
23
42
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sie
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.1
4
+ version: 3.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Barsoom AB
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-02 00:00:00.000000000 Z
11
+ date: 2015-10-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: attr_extras
@@ -105,7 +105,6 @@ files:
105
105
  - lib/sie/parser/line_parser.rb
106
106
  - lib/sie/parser/sie_file.rb
107
107
  - lib/sie/parser/tokenizer.rb
108
- - lib/sie/parser/tokenizer/character.rb
109
108
  - lib/sie/parser/tokenizer/token.rb
110
109
  - lib/sie/version.rb
111
110
  - script/bootstrap
@@ -1,34 +0,0 @@
1
- module Sie
2
- class Parser
3
- class Tokenizer
4
- class Character
5
- pattr_initialize :value
6
- attr_reader :value
7
-
8
- def unquoted_data?
9
- non_whitespace? && !end_of_array?
10
- end
11
-
12
- def entry?
13
- value == "#"
14
- end
15
-
16
- def beginning_of_array?
17
- value == "{"
18
- end
19
-
20
- def end_of_array?
21
- value == "}"
22
- end
23
-
24
- def quote?
25
- value == '"'
26
- end
27
-
28
- def non_whitespace?
29
- value != " " && value != "\t"
30
- end
31
- end
32
- end
33
- end
34
- end