scanner 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/scanner/scanner.rb +84 -7
- data/lib/scanner/version.rb +1 -1
- data/spec/scanner/scanner_spec.rb +86 -4
- metadata +4 -4
data/lib/scanner/scanner.rb
CHANGED
@@ -6,16 +6,31 @@ module Scanner
|
|
6
6
|
aModule.instance_eval do
|
7
7
|
@language_tokens = {}
|
8
8
|
@ignore = nil
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
@keywords = nil
|
10
|
+
@check_for_token_separator = {}
|
11
|
+
@separator = nil
|
12
|
+
|
13
|
+
def token(token_symbol, regular_expression, options = {})
|
14
|
+
modified_reg_exp = "\\A#{regular_expression}"
|
15
|
+
@language_tokens[token_symbol] = /#{modified_reg_exp}/
|
16
|
+
@check_for_token_separator[token_symbol] = options[:check_for_token_separator] == true
|
12
17
|
end
|
13
18
|
|
14
19
|
def ignore(regular_expression)
|
15
|
-
|
20
|
+
modified_reg_exp = "\\A#{regular_expression}"
|
21
|
+
@ignore = /#{modified_reg_exp}/
|
22
|
+
end
|
23
|
+
|
24
|
+
def keywords(keywords)
|
25
|
+
@keywords = keywords
|
16
26
|
end
|
17
27
|
|
18
|
-
|
28
|
+
def token_separator(regular_expression)
|
29
|
+
modified_reg_exp = "\\A#{regular_expression}"
|
30
|
+
@separator = /#{modified_reg_exp}/
|
31
|
+
end
|
32
|
+
|
33
|
+
token :eof, '\z'
|
19
34
|
end
|
20
35
|
|
21
36
|
end
|
@@ -29,11 +44,25 @@ module Scanner
|
|
29
44
|
self.class.instance_eval { @ignore }
|
30
45
|
end
|
31
46
|
|
47
|
+
def keywords
|
48
|
+
self.class.instance_eval { @keywords }
|
49
|
+
end
|
50
|
+
|
51
|
+
def check_for_token_separator
|
52
|
+
self.class.instance_eval { @check_for_token_separator }
|
53
|
+
end
|
54
|
+
|
55
|
+
def separator
|
56
|
+
self.class.instance_eval { @separator }
|
57
|
+
end
|
58
|
+
|
32
59
|
public
|
33
60
|
|
34
61
|
def parse(program)
|
35
62
|
@program = program
|
36
63
|
@token_list = []
|
64
|
+
@line_number = 1
|
65
|
+
@column_number = 1
|
37
66
|
end
|
38
67
|
|
39
68
|
def consume
|
@@ -55,24 +84,62 @@ module Scanner
|
|
55
84
|
@token_list[-1]
|
56
85
|
end
|
57
86
|
|
87
|
+
def token_is?(token_type)
|
88
|
+
look_ahead.is? token_type
|
89
|
+
end
|
90
|
+
|
91
|
+
def token_is_not?(token_type)
|
92
|
+
not (look_ahead.is? token_type)
|
93
|
+
end
|
94
|
+
|
95
|
+
def tokens_are?(*tokens)
|
96
|
+
look_ahead_index = 1
|
97
|
+
tokens.each do |token|
|
98
|
+
return false unless look_ahead(look_ahead_index).is? token
|
99
|
+
look_ahead_index += 1
|
100
|
+
end
|
101
|
+
return true
|
102
|
+
end
|
103
|
+
|
58
104
|
private
|
59
105
|
|
60
106
|
|
61
107
|
def consume_next_token
|
62
108
|
clear_ignore_text
|
63
109
|
|
110
|
+
currently_at_column = @column_number
|
64
111
|
language_tokens.each do |symbol, reg_exp|
|
65
112
|
if @program =~ reg_exp
|
66
|
-
|
113
|
+
content, token_type = get_token_from_reg_exp(reg_exp, symbol)
|
114
|
+
if check_for_token_separator[symbol]
|
115
|
+
check_for_separator
|
116
|
+
end
|
117
|
+
return Token.new(token_type, content, @line_number, currently_at_column)
|
67
118
|
end
|
68
119
|
end
|
69
120
|
|
70
121
|
throw :scanner_exception
|
71
122
|
end
|
72
123
|
|
124
|
+
def check_for_separator
|
125
|
+
eof = language_tokens[:eof]
|
126
|
+
throw :scanner_exception unless @program =~ separator || @program =~ eof
|
127
|
+
end
|
128
|
+
|
129
|
+
def get_token_from_reg_exp(reg_exp, symbol)
|
130
|
+
content = consume_regular_expression(reg_exp)
|
131
|
+
if keywords.include? content
|
132
|
+
token_type = content.to_sym
|
133
|
+
else
|
134
|
+
token_type = symbol
|
135
|
+
end
|
136
|
+
return content, token_type
|
137
|
+
end
|
138
|
+
|
73
139
|
def consume_regular_expression(regexp)
|
74
140
|
content = @program[regexp]
|
75
141
|
@program.gsub!(regexp,"")
|
142
|
+
calculate_position_after content
|
76
143
|
content
|
77
144
|
end
|
78
145
|
|
@@ -80,5 +147,15 @@ module Scanner
|
|
80
147
|
consume_regular_expression(ignore) if ignore
|
81
148
|
end
|
82
149
|
|
83
|
-
|
150
|
+
def calculate_position_after(content)
|
151
|
+
if content
|
152
|
+
number_of_new_lines = content.scan(/\n/).size
|
153
|
+
if number_of_new_lines > 0
|
154
|
+
@line_number += number_of_new_lines
|
155
|
+
@column_number = content.gsub(/.*\n/,"").length + 1
|
156
|
+
else
|
157
|
+
@column_number += content.length
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
84
161
|
end
|
data/lib/scanner/version.rb
CHANGED
@@ -4,9 +4,11 @@ describe Scanner do
|
|
4
4
|
before(:each) do
|
5
5
|
class TestScanner
|
6
6
|
include Scanner
|
7
|
-
ignore
|
8
|
-
token :number,
|
9
|
-
token :id,
|
7
|
+
ignore '[\s|\n]+'
|
8
|
+
token :number, '\d+', check_for_token_separator: true
|
9
|
+
token :id, '[a-z]+', check_for_token_separator: true
|
10
|
+
keywords %w{if}
|
11
|
+
token_separator '\s'
|
10
12
|
end
|
11
13
|
|
12
14
|
@scanner = TestScanner.new
|
@@ -27,7 +29,7 @@ describe Scanner do
|
|
27
29
|
end
|
28
30
|
|
29
31
|
it "consume clears ignore tokens before token" do
|
30
|
-
@scanner.parse("
|
32
|
+
@scanner.parse(" \n 123")
|
31
33
|
token = @scanner.consume
|
32
34
|
token.is?(:number).should be true
|
33
35
|
end
|
@@ -51,5 +53,85 @@ describe Scanner do
|
|
51
53
|
end
|
52
54
|
end
|
53
55
|
|
56
|
+
describe "line number" do
|
57
|
+
it "starts with one" do
|
58
|
+
@scanner.parse("123")
|
59
|
+
@scanner.consume.line.should eq 1
|
60
|
+
end
|
61
|
+
|
62
|
+
it "increases after newlines" do
|
63
|
+
@scanner.parse("\n\n123")
|
64
|
+
@scanner.consume.line.should eq 3
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
describe "column number" do
|
69
|
+
it "starts with one" do
|
70
|
+
@scanner.parse("123")
|
71
|
+
@scanner.consume.column.should eq 1
|
72
|
+
end
|
73
|
+
|
74
|
+
it "increases after tokens are consumed" do
|
75
|
+
@scanner.parse("123 abc")
|
76
|
+
@scanner.consume # 123
|
77
|
+
@scanner.consume.column.should eq 5
|
78
|
+
end
|
79
|
+
|
80
|
+
it "resets after new lines" do
|
81
|
+
@scanner.parse("123\n abc")
|
82
|
+
@scanner.consume # 123
|
83
|
+
@scanner.consume.column.should eq 2
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
describe "keywords" do
|
88
|
+
it "a keyword is identified as such" do
|
89
|
+
@scanner.parse("if")
|
90
|
+
@scanner.consume.is?(:if).should be true
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
describe "token is?" do
|
95
|
+
it "correctly identifies token to come" do
|
96
|
+
@scanner.parse("if")
|
97
|
+
@scanner.token_is?(:if).should be true
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
describe "token is not?" do
|
102
|
+
it "correctly identifies tokens that aren't" do
|
103
|
+
@scanner.parse("if")
|
104
|
+
@scanner.token_is_not?(:id).should be true
|
105
|
+
end
|
106
|
+
|
107
|
+
it "return false if the token is the one in the parameter" do
|
108
|
+
@scanner.parse("if")
|
109
|
+
@scanner.token_is_not?(:if).should be false
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
describe "tokens are?" do
|
114
|
+
it "correctly identifies valid sequences" do
|
115
|
+
@scanner.parse("if other 123")
|
116
|
+
@scanner.tokens_are?(:if, :id, :number).should be true
|
117
|
+
end
|
118
|
+
|
119
|
+
it "correctly identifies invalid sequences" do
|
120
|
+
@scanner.parse("if other 123")
|
121
|
+
@scanner.tokens_are?(:if, :id, :id).should be false
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
describe "token separator" do
|
126
|
+
it "fails to recognise token without separator" do
|
127
|
+
@scanner.parse("other123")
|
128
|
+
lambda { @scanner.consume }.should raise_error
|
129
|
+
end
|
130
|
+
|
131
|
+
it "works if token after is eof" do
|
132
|
+
@scanner.parse("123")
|
133
|
+
@scanner.consume.is?(:number).should be true
|
134
|
+
end
|
135
|
+
end
|
54
136
|
|
55
137
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scanner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-08-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
@@ -77,7 +77,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
77
77
|
version: '0'
|
78
78
|
segments:
|
79
79
|
- 0
|
80
|
-
hash:
|
80
|
+
hash: 1008594902208819548
|
81
81
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
82
82
|
none: false
|
83
83
|
requirements:
|
@@ -86,7 +86,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
86
86
|
version: '0'
|
87
87
|
segments:
|
88
88
|
- 0
|
89
|
-
hash:
|
89
|
+
hash: 1008594902208819548
|
90
90
|
requirements: []
|
91
91
|
rubyforge_project:
|
92
92
|
rubygems_version: 1.8.24
|