scanner 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,16 +6,31 @@ module Scanner
6
6
  aModule.instance_eval do
7
7
  @language_tokens = {}
8
8
  @ignore = nil
9
-
10
- def token(token_symbol, regular_expression)
11
- @language_tokens[token_symbol] = regular_expression
9
+ @keywords = nil
10
+ @check_for_token_separator = {}
11
+ @separator = nil
12
+
13
+ def token(token_symbol, regular_expression, options = {})
14
+ modified_reg_exp = "\\A#{regular_expression}"
15
+ @language_tokens[token_symbol] = /#{modified_reg_exp}/
16
+ @check_for_token_separator[token_symbol] = options[:check_for_token_separator] == true
12
17
  end
13
18
 
14
19
  def ignore(regular_expression)
15
- @ignore = regular_expression
20
+ modified_reg_exp = "\\A#{regular_expression}"
21
+ @ignore = /#{modified_reg_exp}/
22
+ end
23
+
24
+ def keywords(keywords)
25
+ @keywords = keywords
16
26
  end
17
27
 
18
- token :eof, /\A\z/
28
+ def token_separator(regular_expression)
29
+ modified_reg_exp = "\\A#{regular_expression}"
30
+ @separator = /#{modified_reg_exp}/
31
+ end
32
+
33
+ token :eof, '\z'
19
34
  end
20
35
 
21
36
  end
@@ -29,11 +44,25 @@ module Scanner
29
44
  self.class.instance_eval { @ignore }
30
45
  end
31
46
 
47
+ def keywords
48
+ self.class.instance_eval { @keywords }
49
+ end
50
+
51
+ def check_for_token_separator
52
+ self.class.instance_eval { @check_for_token_separator }
53
+ end
54
+
55
+ def separator
56
+ self.class.instance_eval { @separator }
57
+ end
58
+
32
59
  public
33
60
 
34
61
  def parse(program)
35
62
  @program = program
36
63
  @token_list = []
64
+ @line_number = 1
65
+ @column_number = 1
37
66
  end
38
67
 
39
68
  def consume
@@ -55,24 +84,62 @@ module Scanner
55
84
  @token_list[-1]
56
85
  end
57
86
 
87
+ def token_is?(token_type)
88
+ look_ahead.is? token_type
89
+ end
90
+
91
+ def token_is_not?(token_type)
92
+ not (look_ahead.is? token_type)
93
+ end
94
+
95
+ def tokens_are?(*tokens)
96
+ look_ahead_index = 1
97
+ tokens.each do |token|
98
+ return false unless look_ahead(look_ahead_index).is? token
99
+ look_ahead_index += 1
100
+ end
101
+ return true
102
+ end
103
+
58
104
  private
59
105
 
60
106
 
61
107
  def consume_next_token
62
108
  clear_ignore_text
63
109
 
110
+ currently_at_column = @column_number
64
111
  language_tokens.each do |symbol, reg_exp|
65
112
  if @program =~ reg_exp
66
- return Token.new(symbol, consume_regular_expression(reg_exp), 0, 0)
113
+ content, token_type = get_token_from_reg_exp(reg_exp, symbol)
114
+ if check_for_token_separator[symbol]
115
+ check_for_separator
116
+ end
117
+ return Token.new(token_type, content, @line_number, currently_at_column)
67
118
  end
68
119
  end
69
120
 
70
121
  throw :scanner_exception
71
122
  end
72
123
 
124
+ def check_for_separator
125
+ eof = language_tokens[:eof]
126
+ throw :scanner_exception unless @program =~ separator || @program =~ eof
127
+ end
128
+
129
+ def get_token_from_reg_exp(reg_exp, symbol)
130
+ content = consume_regular_expression(reg_exp)
131
+ if keywords.include? content
132
+ token_type = content.to_sym
133
+ else
134
+ token_type = symbol
135
+ end
136
+ return content, token_type
137
+ end
138
+
73
139
  def consume_regular_expression(regexp)
74
140
  content = @program[regexp]
75
141
  @program.gsub!(regexp,"")
142
+ calculate_position_after content
76
143
  content
77
144
  end
78
145
 
@@ -80,5 +147,15 @@ module Scanner
80
147
  consume_regular_expression(ignore) if ignore
81
148
  end
82
149
 
83
-
150
+ def calculate_position_after(content)
151
+ if content
152
+ number_of_new_lines = content.scan(/\n/).size
153
+ if number_of_new_lines > 0
154
+ @line_number += number_of_new_lines
155
+ @column_number = content.gsub(/.*\n/,"").length + 1
156
+ else
157
+ @column_number += content.length
158
+ end
159
+ end
160
+ end
84
161
  end
@@ -1,3 +1,3 @@
1
1
  module Scanner
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -4,9 +4,11 @@ describe Scanner do
4
4
  before(:each) do
5
5
  class TestScanner
6
6
  include Scanner
7
- ignore /\s+/
8
- token :number, /\d+/
9
- token :id, /\w+/
7
+ ignore '[\s|\n]+'
8
+ token :number, '\d+', check_for_token_separator: true
9
+ token :id, '[a-z]+', check_for_token_separator: true
10
+ keywords %w{if}
11
+ token_separator '\s'
10
12
  end
11
13
 
12
14
  @scanner = TestScanner.new
@@ -27,7 +29,7 @@ describe Scanner do
27
29
  end
28
30
 
29
31
  it "consume clears ignore tokens before token" do
30
- @scanner.parse(" 123")
32
+ @scanner.parse(" \n 123")
31
33
  token = @scanner.consume
32
34
  token.is?(:number).should be true
33
35
  end
@@ -51,5 +53,85 @@ describe Scanner do
51
53
  end
52
54
  end
53
55
 
56
+ describe "line number" do
57
+ it "starts with one" do
58
+ @scanner.parse("123")
59
+ @scanner.consume.line.should eq 1
60
+ end
61
+
62
+ it "increases after newlines" do
63
+ @scanner.parse("\n\n123")
64
+ @scanner.consume.line.should eq 3
65
+ end
66
+ end
67
+
68
+ describe "column number" do
69
+ it "starts with one" do
70
+ @scanner.parse("123")
71
+ @scanner.consume.column.should eq 1
72
+ end
73
+
74
+ it "increases after tokens are consumed" do
75
+ @scanner.parse("123 abc")
76
+ @scanner.consume # 123
77
+ @scanner.consume.column.should eq 5
78
+ end
79
+
80
+ it "resets after new lines" do
81
+ @scanner.parse("123\n abc")
82
+ @scanner.consume # 123
83
+ @scanner.consume.column.should eq 2
84
+ end
85
+ end
86
+
87
+ describe "keywords" do
88
+ it "a keyword is identified as such" do
89
+ @scanner.parse("if")
90
+ @scanner.consume.is?(:if).should be true
91
+ end
92
+ end
93
+
94
+ describe "token is?" do
95
+ it "correctly identifies token to come" do
96
+ @scanner.parse("if")
97
+ @scanner.token_is?(:if).should be true
98
+ end
99
+ end
100
+
101
+ describe "token is not?" do
102
+ it "correctly identifies tokens that aren't" do
103
+ @scanner.parse("if")
104
+ @scanner.token_is_not?(:id).should be true
105
+ end
106
+
107
+ it "return false if the token is the one in the parameter" do
108
+ @scanner.parse("if")
109
+ @scanner.token_is_not?(:if).should be false
110
+ end
111
+ end
112
+
113
+ describe "tokens are?" do
114
+ it "correctly identifies valid sequences" do
115
+ @scanner.parse("if other 123")
116
+ @scanner.tokens_are?(:if, :id, :number).should be true
117
+ end
118
+
119
+ it "correctly identifies invalid sequences" do
120
+ @scanner.parse("if other 123")
121
+ @scanner.tokens_are?(:if, :id, :id).should be false
122
+ end
123
+ end
124
+
125
+ describe "token separator" do
126
+ it "fails to recognise token without separator" do
127
+ @scanner.parse("other123")
128
+ lambda { @scanner.consume }.should raise_error
129
+ end
130
+
131
+ it "works if token after is eof" do
132
+ @scanner.parse("123")
133
+ @scanner.consume.is?(:number).should be true
134
+ end
135
+ end
54
136
 
55
137
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scanner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-07-31 00:00:00.000000000 Z
12
+ date: 2012-08-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
@@ -77,7 +77,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
77
77
  version: '0'
78
78
  segments:
79
79
  - 0
80
- hash: -2890427904096243855
80
+ hash: 1008594902208819548
81
81
  required_rubygems_version: !ruby/object:Gem::Requirement
82
82
  none: false
83
83
  requirements:
@@ -86,7 +86,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
86
86
  version: '0'
87
87
  segments:
88
88
  - 0
89
- hash: -2890427904096243855
89
+ hash: 1008594902208819548
90
90
  requirements: []
91
91
  rubyforge_project:
92
92
  rubygems_version: 1.8.24