scanner 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/scanner/scanner.rb +84 -7
- data/lib/scanner/version.rb +1 -1
- data/spec/scanner/scanner_spec.rb +86 -4
- metadata +4 -4
data/lib/scanner/scanner.rb
CHANGED
@@ -6,16 +6,31 @@ module Scanner
|
|
6
6
|
aModule.instance_eval do
|
7
7
|
@language_tokens = {}
|
8
8
|
@ignore = nil
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
@keywords = nil
|
10
|
+
@check_for_token_separator = {}
|
11
|
+
@separator = nil
|
12
|
+
|
13
|
+
def token(token_symbol, regular_expression, options = {})
|
14
|
+
modified_reg_exp = "\\A#{regular_expression}"
|
15
|
+
@language_tokens[token_symbol] = /#{modified_reg_exp}/
|
16
|
+
@check_for_token_separator[token_symbol] = options[:check_for_token_separator] == true
|
12
17
|
end
|
13
18
|
|
14
19
|
def ignore(regular_expression)
|
15
|
-
|
20
|
+
modified_reg_exp = "\\A#{regular_expression}"
|
21
|
+
@ignore = /#{modified_reg_exp}/
|
22
|
+
end
|
23
|
+
|
24
|
+
def keywords(keywords)
|
25
|
+
@keywords = keywords
|
16
26
|
end
|
17
27
|
|
18
|
-
|
28
|
+
def token_separator(regular_expression)
|
29
|
+
modified_reg_exp = "\\A#{regular_expression}"
|
30
|
+
@separator = /#{modified_reg_exp}/
|
31
|
+
end
|
32
|
+
|
33
|
+
token :eof, '\z'
|
19
34
|
end
|
20
35
|
|
21
36
|
end
|
@@ -29,11 +44,25 @@ module Scanner
|
|
29
44
|
self.class.instance_eval { @ignore }
|
30
45
|
end
|
31
46
|
|
47
|
+
def keywords
|
48
|
+
self.class.instance_eval { @keywords }
|
49
|
+
end
|
50
|
+
|
51
|
+
def check_for_token_separator
|
52
|
+
self.class.instance_eval { @check_for_token_separator }
|
53
|
+
end
|
54
|
+
|
55
|
+
def separator
|
56
|
+
self.class.instance_eval { @separator }
|
57
|
+
end
|
58
|
+
|
32
59
|
public
|
33
60
|
|
34
61
|
def parse(program)
|
35
62
|
@program = program
|
36
63
|
@token_list = []
|
64
|
+
@line_number = 1
|
65
|
+
@column_number = 1
|
37
66
|
end
|
38
67
|
|
39
68
|
def consume
|
@@ -55,24 +84,62 @@ module Scanner
|
|
55
84
|
@token_list[-1]
|
56
85
|
end
|
57
86
|
|
87
|
+
def token_is?(token_type)
|
88
|
+
look_ahead.is? token_type
|
89
|
+
end
|
90
|
+
|
91
|
+
def token_is_not?(token_type)
|
92
|
+
not (look_ahead.is? token_type)
|
93
|
+
end
|
94
|
+
|
95
|
+
def tokens_are?(*tokens)
|
96
|
+
look_ahead_index = 1
|
97
|
+
tokens.each do |token|
|
98
|
+
return false unless look_ahead(look_ahead_index).is? token
|
99
|
+
look_ahead_index += 1
|
100
|
+
end
|
101
|
+
return true
|
102
|
+
end
|
103
|
+
|
58
104
|
private
|
59
105
|
|
60
106
|
|
61
107
|
def consume_next_token
|
62
108
|
clear_ignore_text
|
63
109
|
|
110
|
+
currently_at_column = @column_number
|
64
111
|
language_tokens.each do |symbol, reg_exp|
|
65
112
|
if @program =~ reg_exp
|
66
|
-
|
113
|
+
content, token_type = get_token_from_reg_exp(reg_exp, symbol)
|
114
|
+
if check_for_token_separator[symbol]
|
115
|
+
check_for_separator
|
116
|
+
end
|
117
|
+
return Token.new(token_type, content, @line_number, currently_at_column)
|
67
118
|
end
|
68
119
|
end
|
69
120
|
|
70
121
|
throw :scanner_exception
|
71
122
|
end
|
72
123
|
|
124
|
+
def check_for_separator
|
125
|
+
eof = language_tokens[:eof]
|
126
|
+
throw :scanner_exception unless @program =~ separator || @program =~ eof
|
127
|
+
end
|
128
|
+
|
129
|
+
def get_token_from_reg_exp(reg_exp, symbol)
|
130
|
+
content = consume_regular_expression(reg_exp)
|
131
|
+
if keywords.include? content
|
132
|
+
token_type = content.to_sym
|
133
|
+
else
|
134
|
+
token_type = symbol
|
135
|
+
end
|
136
|
+
return content, token_type
|
137
|
+
end
|
138
|
+
|
73
139
|
def consume_regular_expression(regexp)
|
74
140
|
content = @program[regexp]
|
75
141
|
@program.gsub!(regexp,"")
|
142
|
+
calculate_position_after content
|
76
143
|
content
|
77
144
|
end
|
78
145
|
|
@@ -80,5 +147,15 @@ module Scanner
|
|
80
147
|
consume_regular_expression(ignore) if ignore
|
81
148
|
end
|
82
149
|
|
83
|
-
|
150
|
+
def calculate_position_after(content)
|
151
|
+
if content
|
152
|
+
number_of_new_lines = content.scan(/\n/).size
|
153
|
+
if number_of_new_lines > 0
|
154
|
+
@line_number += number_of_new_lines
|
155
|
+
@column_number = content.gsub(/.*\n/,"").length + 1
|
156
|
+
else
|
157
|
+
@column_number += content.length
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
84
161
|
end
|
data/lib/scanner/version.rb
CHANGED
@@ -4,9 +4,11 @@ describe Scanner do
|
|
4
4
|
before(:each) do
|
5
5
|
class TestScanner
|
6
6
|
include Scanner
|
7
|
-
ignore
|
8
|
-
token :number,
|
9
|
-
token :id,
|
7
|
+
ignore '[\s|\n]+'
|
8
|
+
token :number, '\d+', check_for_token_separator: true
|
9
|
+
token :id, '[a-z]+', check_for_token_separator: true
|
10
|
+
keywords %w{if}
|
11
|
+
token_separator '\s'
|
10
12
|
end
|
11
13
|
|
12
14
|
@scanner = TestScanner.new
|
@@ -27,7 +29,7 @@ describe Scanner do
|
|
27
29
|
end
|
28
30
|
|
29
31
|
it "consume clears ignore tokens before token" do
|
30
|
-
@scanner.parse("
|
32
|
+
@scanner.parse(" \n 123")
|
31
33
|
token = @scanner.consume
|
32
34
|
token.is?(:number).should be true
|
33
35
|
end
|
@@ -51,5 +53,85 @@ describe Scanner do
|
|
51
53
|
end
|
52
54
|
end
|
53
55
|
|
56
|
+
describe "line number" do
|
57
|
+
it "starts with one" do
|
58
|
+
@scanner.parse("123")
|
59
|
+
@scanner.consume.line.should eq 1
|
60
|
+
end
|
61
|
+
|
62
|
+
it "increases after newlines" do
|
63
|
+
@scanner.parse("\n\n123")
|
64
|
+
@scanner.consume.line.should eq 3
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
describe "column number" do
|
69
|
+
it "starts with one" do
|
70
|
+
@scanner.parse("123")
|
71
|
+
@scanner.consume.column.should eq 1
|
72
|
+
end
|
73
|
+
|
74
|
+
it "increases after tokens are consumed" do
|
75
|
+
@scanner.parse("123 abc")
|
76
|
+
@scanner.consume # 123
|
77
|
+
@scanner.consume.column.should eq 5
|
78
|
+
end
|
79
|
+
|
80
|
+
it "resets after new lines" do
|
81
|
+
@scanner.parse("123\n abc")
|
82
|
+
@scanner.consume # 123
|
83
|
+
@scanner.consume.column.should eq 2
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
describe "keywords" do
|
88
|
+
it "a keyword is identified as such" do
|
89
|
+
@scanner.parse("if")
|
90
|
+
@scanner.consume.is?(:if).should be true
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
describe "token is?" do
|
95
|
+
it "correctly identifies token to come" do
|
96
|
+
@scanner.parse("if")
|
97
|
+
@scanner.token_is?(:if).should be true
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
describe "token is not?" do
|
102
|
+
it "correctly identifies tokens that aren't" do
|
103
|
+
@scanner.parse("if")
|
104
|
+
@scanner.token_is_not?(:id).should be true
|
105
|
+
end
|
106
|
+
|
107
|
+
it "return false if the token is the one in the parameter" do
|
108
|
+
@scanner.parse("if")
|
109
|
+
@scanner.token_is_not?(:if).should be false
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
describe "tokens are?" do
|
114
|
+
it "correctly identifies valid sequences" do
|
115
|
+
@scanner.parse("if other 123")
|
116
|
+
@scanner.tokens_are?(:if, :id, :number).should be true
|
117
|
+
end
|
118
|
+
|
119
|
+
it "correctly identifies invalid sequences" do
|
120
|
+
@scanner.parse("if other 123")
|
121
|
+
@scanner.tokens_are?(:if, :id, :id).should be false
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
describe "token separator" do
|
126
|
+
it "fails to recognise token without separator" do
|
127
|
+
@scanner.parse("other123")
|
128
|
+
lambda { @scanner.consume }.should raise_error
|
129
|
+
end
|
130
|
+
|
131
|
+
it "works if token after is eof" do
|
132
|
+
@scanner.parse("123")
|
133
|
+
@scanner.consume.is?(:number).should be true
|
134
|
+
end
|
135
|
+
end
|
54
136
|
|
55
137
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scanner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-08-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
@@ -77,7 +77,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
77
77
|
version: '0'
|
78
78
|
segments:
|
79
79
|
- 0
|
80
|
-
hash:
|
80
|
+
hash: 1008594902208819548
|
81
81
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
82
82
|
none: false
|
83
83
|
requirements:
|
@@ -86,7 +86,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
86
86
|
version: '0'
|
87
87
|
segments:
|
88
88
|
- 0
|
89
|
-
hash:
|
89
|
+
hash: 1008594902208819548
|
90
90
|
requirements: []
|
91
91
|
rubyforge_project:
|
92
92
|
rubygems_version: 1.8.24
|