scanner 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -6,16 +6,31 @@ module Scanner
6
6
  aModule.instance_eval do
7
7
  @language_tokens = {}
8
8
  @ignore = nil
9
-
10
- def token(token_symbol, regular_expression)
11
- @language_tokens[token_symbol] = regular_expression
9
+ @keywords = nil
10
+ @check_for_token_separator = {}
11
+ @separator = nil
12
+
13
+ def token(token_symbol, regular_expression, options = {})
14
+ modified_reg_exp = "\\A#{regular_expression}"
15
+ @language_tokens[token_symbol] = /#{modified_reg_exp}/
16
+ @check_for_token_separator[token_symbol] = options[:check_for_token_separator] == true
12
17
  end
13
18
 
14
19
  def ignore(regular_expression)
15
- @ignore = regular_expression
20
+ modified_reg_exp = "\\A#{regular_expression}"
21
+ @ignore = /#{modified_reg_exp}/
22
+ end
23
+
24
+ def keywords(keywords)
25
+ @keywords = keywords
16
26
  end
17
27
 
18
- token :eof, /\A\z/
28
+ def token_separator(regular_expression)
29
+ modified_reg_exp = "\\A#{regular_expression}"
30
+ @separator = /#{modified_reg_exp}/
31
+ end
32
+
33
+ token :eof, '\z'
19
34
  end
20
35
 
21
36
  end
@@ -29,11 +44,25 @@ module Scanner
29
44
  self.class.instance_eval { @ignore }
30
45
  end
31
46
 
47
+ def keywords
48
+ self.class.instance_eval { @keywords }
49
+ end
50
+
51
+ def check_for_token_separator
52
+ self.class.instance_eval { @check_for_token_separator }
53
+ end
54
+
55
+ def separator
56
+ self.class.instance_eval { @separator }
57
+ end
58
+
32
59
  public
33
60
 
34
61
  def parse(program)
35
62
  @program = program
36
63
  @token_list = []
64
+ @line_number = 1
65
+ @column_number = 1
37
66
  end
38
67
 
39
68
  def consume
@@ -55,24 +84,62 @@ module Scanner
55
84
  @token_list[-1]
56
85
  end
57
86
 
87
+ def token_is?(token_type)
88
+ look_ahead.is? token_type
89
+ end
90
+
91
+ def token_is_not?(token_type)
92
+ not (look_ahead.is? token_type)
93
+ end
94
+
95
+ def tokens_are?(*tokens)
96
+ look_ahead_index = 1
97
+ tokens.each do |token|
98
+ return false unless look_ahead(look_ahead_index).is? token
99
+ look_ahead_index += 1
100
+ end
101
+ return true
102
+ end
103
+
58
104
  private
59
105
 
60
106
 
61
107
  def consume_next_token
62
108
  clear_ignore_text
63
109
 
110
+ currently_at_column = @column_number
64
111
  language_tokens.each do |symbol, reg_exp|
65
112
  if @program =~ reg_exp
66
- return Token.new(symbol, consume_regular_expression(reg_exp), 0, 0)
113
+ content, token_type = get_token_from_reg_exp(reg_exp, symbol)
114
+ if check_for_token_separator[symbol]
115
+ check_for_separator
116
+ end
117
+ return Token.new(token_type, content, @line_number, currently_at_column)
67
118
  end
68
119
  end
69
120
 
70
121
  throw :scanner_exception
71
122
  end
72
123
 
124
+ def check_for_separator
125
+ eof = language_tokens[:eof]
126
+ throw :scanner_exception unless @program =~ separator || @program =~ eof
127
+ end
128
+
129
+ def get_token_from_reg_exp(reg_exp, symbol)
130
+ content = consume_regular_expression(reg_exp)
131
+ if keywords.include? content
132
+ token_type = content.to_sym
133
+ else
134
+ token_type = symbol
135
+ end
136
+ return content, token_type
137
+ end
138
+
73
139
  def consume_regular_expression(regexp)
74
140
  content = @program[regexp]
75
141
  @program.gsub!(regexp,"")
142
+ calculate_position_after content
76
143
  content
77
144
  end
78
145
 
@@ -80,5 +147,15 @@ module Scanner
80
147
  consume_regular_expression(ignore) if ignore
81
148
  end
82
149
 
83
-
150
+ def calculate_position_after(content)
151
+ if content
152
+ number_of_new_lines = content.scan(/\n/).size
153
+ if number_of_new_lines > 0
154
+ @line_number += number_of_new_lines
155
+ @column_number = content.gsub(/.*\n/,"").length + 1
156
+ else
157
+ @column_number += content.length
158
+ end
159
+ end
160
+ end
84
161
  end
@@ -1,3 +1,3 @@
1
1
  module Scanner
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -4,9 +4,11 @@ describe Scanner do
4
4
  before(:each) do
5
5
  class TestScanner
6
6
  include Scanner
7
- ignore /\s+/
8
- token :number, /\d+/
9
- token :id, /\w+/
7
+ ignore '[\s|\n]+'
8
+ token :number, '\d+', check_for_token_separator: true
9
+ token :id, '[a-z]+', check_for_token_separator: true
10
+ keywords %w{if}
11
+ token_separator '\s'
10
12
  end
11
13
 
12
14
  @scanner = TestScanner.new
@@ -27,7 +29,7 @@ describe Scanner do
27
29
  end
28
30
 
29
31
  it "consume clears ignore tokens before token" do
30
- @scanner.parse(" 123")
32
+ @scanner.parse(" \n 123")
31
33
  token = @scanner.consume
32
34
  token.is?(:number).should be true
33
35
  end
@@ -51,5 +53,85 @@ describe Scanner do
51
53
  end
52
54
  end
53
55
 
56
+ describe "line number" do
57
+ it "starts with one" do
58
+ @scanner.parse("123")
59
+ @scanner.consume.line.should eq 1
60
+ end
61
+
62
+ it "increases after newlines" do
63
+ @scanner.parse("\n\n123")
64
+ @scanner.consume.line.should eq 3
65
+ end
66
+ end
67
+
68
+ describe "column number" do
69
+ it "starts with one" do
70
+ @scanner.parse("123")
71
+ @scanner.consume.column.should eq 1
72
+ end
73
+
74
+ it "increases after tokens are consumed" do
75
+ @scanner.parse("123 abc")
76
+ @scanner.consume # 123
77
+ @scanner.consume.column.should eq 5
78
+ end
79
+
80
+ it "resets after new lines" do
81
+ @scanner.parse("123\n abc")
82
+ @scanner.consume # 123
83
+ @scanner.consume.column.should eq 2
84
+ end
85
+ end
86
+
87
+ describe "keywords" do
88
+ it "a keyword is identified as such" do
89
+ @scanner.parse("if")
90
+ @scanner.consume.is?(:if).should be true
91
+ end
92
+ end
93
+
94
+ describe "token is?" do
95
+ it "correctly identifies token to come" do
96
+ @scanner.parse("if")
97
+ @scanner.token_is?(:if).should be true
98
+ end
99
+ end
100
+
101
+ describe "token is not?" do
102
+ it "correctly identifies tokens that aren't" do
103
+ @scanner.parse("if")
104
+ @scanner.token_is_not?(:id).should be true
105
+ end
106
+
107
+ it "return false if the token is the one in the parameter" do
108
+ @scanner.parse("if")
109
+ @scanner.token_is_not?(:if).should be false
110
+ end
111
+ end
112
+
113
+ describe "tokens are?" do
114
+ it "correctly identifies valid sequences" do
115
+ @scanner.parse("if other 123")
116
+ @scanner.tokens_are?(:if, :id, :number).should be true
117
+ end
118
+
119
+ it "correctly identifies invalid sequences" do
120
+ @scanner.parse("if other 123")
121
+ @scanner.tokens_are?(:if, :id, :id).should be false
122
+ end
123
+ end
124
+
125
+ describe "token separator" do
126
+ it "fails to recognise token without separator" do
127
+ @scanner.parse("other123")
128
+ lambda { @scanner.consume }.should raise_error
129
+ end
130
+
131
+ it "works if token after is eof" do
132
+ @scanner.parse("123")
133
+ @scanner.consume.is?(:number).should be true
134
+ end
135
+ end
54
136
 
55
137
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scanner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-07-31 00:00:00.000000000 Z
12
+ date: 2012-08-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
@@ -77,7 +77,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
77
77
  version: '0'
78
78
  segments:
79
79
  - 0
80
- hash: -2890427904096243855
80
+ hash: 1008594902208819548
81
81
  required_rubygems_version: !ruby/object:Gem::Requirement
82
82
  none: false
83
83
  requirements:
@@ -86,7 +86,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
86
86
  version: '0'
87
87
  segments:
88
88
  - 0
89
- hash: -2890427904096243855
89
+ hash: 1008594902208819548
90
90
  requirements: []
91
91
  rubyforge_project:
92
92
  rubygems_version: 1.8.24