lex 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.rspec +3 -0
- data/.ruby-version +1 -0
- data/.travis.yml +22 -0
- data/Gemfile +19 -0
- data/LICENSE.txt +22 -0
- data/README.md +423 -0
- data/Rakefile +8 -0
- data/lex.gemspec +22 -0
- data/lib/lex.rb +22 -0
- data/lib/lex/lexeme.rb +27 -0
- data/lib/lex/lexer.rb +210 -0
- data/lib/lex/lexer/dsl.rb +49 -0
- data/lib/lex/lexer/rule_dsl.rb +165 -0
- data/lib/lex/lexers.rb +11 -0
- data/lib/lex/lexers/html.rb +8 -0
- data/lib/lex/linter.rb +114 -0
- data/lib/lex/logger.rb +21 -0
- data/lib/lex/source_line.rb +13 -0
- data/lib/lex/state.rb +37 -0
- data/lib/lex/token.rb +47 -0
- data/lib/lex/version.rb +5 -0
- data/spec/spec_helper.rb +50 -0
- data/spec/unit/error_spec.rb +42 -0
- data/spec/unit/keyword_spec.rb +34 -0
- data/spec/unit/lex_spec.rb +60 -0
- data/spec/unit/position_spec.rb +94 -0
- data/spec/unit/rule_spec.rb +63 -0
- data/spec/unit/state/clone_spec.rb +15 -0
- data/spec/unit/states_spec.rb +194 -0
- data/spec/unit/tokens_spec.rb +32 -0
- data/tasks/console.rake +10 -0
- data/tasks/coverage.rake +11 -0
- data/tasks/spec.rake +29 -0
- metadata +104 -0
@@ -0,0 +1,94 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Lex::Lexer, 'position' do
|
6
|
+
it "calculates line number and position info from input" do
|
7
|
+
code = unindent(<<-EOS)
|
8
|
+
x = 1
|
9
|
+
y = 2
|
10
|
+
s = x + y
|
11
|
+
EOS
|
12
|
+
|
13
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
14
|
+
tokens(
|
15
|
+
:NUMBER,
|
16
|
+
:PLUS,
|
17
|
+
:IDENTIFIER,
|
18
|
+
:EQUALS
|
19
|
+
)
|
20
|
+
|
21
|
+
rule(:PLUS, /\+/)
|
22
|
+
rule(:EQUALS, /=/)
|
23
|
+
rule(:IDENTIFIER, /\A[_\$a-zA-Z][_\$0-9a-zA-Z]*/)
|
24
|
+
|
25
|
+
rule(:NUMBER, /[0-9]+/) do |lexer, token|
|
26
|
+
token.value = token.value.to_i
|
27
|
+
token
|
28
|
+
end
|
29
|
+
|
30
|
+
ignore " \t"
|
31
|
+
|
32
|
+
rule(:newline, /\n+/) do |lexer, token|
|
33
|
+
lexer.advance_line(token.value.length)
|
34
|
+
end
|
35
|
+
end)
|
36
|
+
|
37
|
+
my_lexer = MyLexer.new
|
38
|
+
expect(my_lexer.lex(code).map(&:to_ary)).to eq([
|
39
|
+
[:IDENTIFIER, 'x', 1, 1],
|
40
|
+
[:EQUALS, '=', 1, 3],
|
41
|
+
[:NUMBER, 1, 1, 5],
|
42
|
+
[:IDENTIFIER, 'y', 2, 1],
|
43
|
+
[:EQUALS, '=', 2, 3],
|
44
|
+
[:NUMBER, 2, 2, 5],
|
45
|
+
[:IDENTIFIER, 's', 3, 1],
|
46
|
+
[:EQUALS, '=', 3, 3],
|
47
|
+
[:IDENTIFIER, 'x', 3, 5],
|
48
|
+
[:PLUS, '+', 3, 7],
|
49
|
+
[:IDENTIFIER, 'y', 3, 9]
|
50
|
+
])
|
51
|
+
end
|
52
|
+
|
53
|
+
it "correctly tracks multiline content" do
|
54
|
+
code = unindent(<<-EOS)
|
55
|
+
This is
|
56
|
+
<b>webpage!</b>
|
57
|
+
EOS
|
58
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
59
|
+
tokens(
|
60
|
+
:WORD,
|
61
|
+
:LANGLE,
|
62
|
+
:RANGLE,
|
63
|
+
:LANGLESLASH
|
64
|
+
)
|
65
|
+
|
66
|
+
rule(:WORD, /[^ <>\n]+/)
|
67
|
+
rule(:LANGLE, /</)
|
68
|
+
rule(:RANGLE, />/)
|
69
|
+
rule(:LANGLESLASH, /<\//)
|
70
|
+
|
71
|
+
rule(:newline, /\n/) do |lexer, token|
|
72
|
+
lexer.advance_line(token.value.size)
|
73
|
+
end
|
74
|
+
|
75
|
+
ignore " "
|
76
|
+
|
77
|
+
error do |lexer, token|
|
78
|
+
end
|
79
|
+
end)
|
80
|
+
|
81
|
+
my_lexer = MyLexer.new
|
82
|
+
expect(my_lexer.lex(code).map(&:to_ary)).to eq([
|
83
|
+
[:WORD, 'This', 1, 1],
|
84
|
+
[:WORD, 'is', 1, 6],
|
85
|
+
[:LANGLE, '<', 2, 4],
|
86
|
+
[:WORD, 'b', 2, 5],
|
87
|
+
[:RANGLE, '>', 2, 6],
|
88
|
+
[:WORD, 'webpage!', 2, 7],
|
89
|
+
[:LANGLESLASH, '</', 2, 15],
|
90
|
+
[:WORD, 'b', 2, 17],
|
91
|
+
[:RANGLE, '>', 2, 18]
|
92
|
+
])
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Lex::Lexer, '#rule' do
|
6
|
+
|
7
|
+
it "raises error with no rules" do
|
8
|
+
expect {
|
9
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
10
|
+
tokens(:ID)
|
11
|
+
end)
|
12
|
+
MyLexer.new
|
13
|
+
}.to raise_error(Lex::Linter::Failure, /No rules of the form/)
|
14
|
+
end
|
15
|
+
|
16
|
+
it "skips rule that has action but doesn't return token" do
|
17
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
18
|
+
tokens(
|
19
|
+
:IDENTIFIER,
|
20
|
+
:LBRACE,
|
21
|
+
:RBRACE
|
22
|
+
)
|
23
|
+
|
24
|
+
rule(:IDENTIFIER, /a|b/)
|
25
|
+
|
26
|
+
rule(:LBRACE, /{/) do |lexer, token|
|
27
|
+
end
|
28
|
+
|
29
|
+
rule(:RBRACE, /}/) do |lexer, token|
|
30
|
+
token
|
31
|
+
end
|
32
|
+
end)
|
33
|
+
my_lexer = MyLexer.new
|
34
|
+
expect(my_lexer.lex("a{b}a").map(&:to_ary)).to eq([
|
35
|
+
[:IDENTIFIER, 'a', 1, 1],
|
36
|
+
[:IDENTIFIER, 'b', 1, 3],
|
37
|
+
[:RBRACE, '}', 1, 4],
|
38
|
+
[:IDENTIFIER, 'a', 1, 5]
|
39
|
+
])
|
40
|
+
end
|
41
|
+
|
42
|
+
it "validates uniquness" do
|
43
|
+
expect {
|
44
|
+
Class.new(Lex::Lexer) do
|
45
|
+
tokens( :WORD )
|
46
|
+
|
47
|
+
rule(:WORD, /\w+/)
|
48
|
+
|
49
|
+
rule(:WORD, /\w+/)
|
50
|
+
end
|
51
|
+
}.to raise_error(Lex::LexerError, /Rule 'WORD' redefined./)
|
52
|
+
end
|
53
|
+
|
54
|
+
it "throws error if using token in rule without prior specifying" do
|
55
|
+
expect {
|
56
|
+
Class.new(Lex::Lexer) do
|
57
|
+
tokens(:ID)
|
58
|
+
|
59
|
+
rule(:UNKNOWN, /a/)
|
60
|
+
end
|
61
|
+
}.to raise_error(Lex::LexerError, /Rule 'UNKNOWN' defined for an unspecified token UNKNOWN/)
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Lex::State, '.clone' do
|
6
|
+
it "clones state instance" do
|
7
|
+
lexeme = double(:lexeme)
|
8
|
+
lexemes = [lexeme, lexeme]
|
9
|
+
state = Lex::State.new(:initial, lexemes)
|
10
|
+
new_state = state.clone
|
11
|
+
|
12
|
+
expect(new_state).to_not eql(state)
|
13
|
+
expect(new_state.lexemes).to_not eql(state.lexemes)
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,194 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Lex::Lexer, '#states' do
|
6
|
+
|
7
|
+
it "checks states" do
|
8
|
+
expect {
|
9
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
10
|
+
tokens(:IDENTIFIER)
|
11
|
+
|
12
|
+
states(foo: :unknown)
|
13
|
+
end)
|
14
|
+
MyLexer.new
|
15
|
+
}.to raise_error(Lex::Linter::Failure, /State type for state foo must be/)
|
16
|
+
end
|
17
|
+
|
18
|
+
it "lexes ignoring :exclusive state tokens" do
|
19
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
20
|
+
tokens(
|
21
|
+
:IDENTIFIER,
|
22
|
+
:LBRACE,
|
23
|
+
:RBRACE
|
24
|
+
)
|
25
|
+
states( brace: :exclusive )
|
26
|
+
|
27
|
+
rule(:IDENTIFIER, /a|b/)
|
28
|
+
|
29
|
+
rule(:LBRACE, /{/) do |lexer, token|
|
30
|
+
lexer.push_state(:brace)
|
31
|
+
token
|
32
|
+
end
|
33
|
+
|
34
|
+
rule(:brace_RBRACE, /}/) do |lexer, token|
|
35
|
+
lexer.pop_state
|
36
|
+
token
|
37
|
+
end
|
38
|
+
|
39
|
+
error(:brace) do |lexer, token|
|
40
|
+
end
|
41
|
+
|
42
|
+
ignore(:brace, " \t")
|
43
|
+
end)
|
44
|
+
my_lexer = MyLexer.new
|
45
|
+
expect(my_lexer.lex("a{bb}a").map(&:to_ary)).to eq([
|
46
|
+
[:IDENTIFIER, 'a', 1, 1],
|
47
|
+
[:LBRACE, '{', 1, 2],
|
48
|
+
[:RBRACE, '}', 1, 5],
|
49
|
+
[:IDENTIFIER, 'a', 1, 6]
|
50
|
+
])
|
51
|
+
end
|
52
|
+
|
53
|
+
it "lexes in :exclusive state" do
|
54
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
55
|
+
tokens( :WORD )
|
56
|
+
|
57
|
+
states( htmlcomment: :exclusive )
|
58
|
+
|
59
|
+
rule(:WORD, /\w+/)
|
60
|
+
|
61
|
+
rule(:htmlcomment, /<!--/) do |lexer, token|
|
62
|
+
lexer.push_state(:htmlcomment)
|
63
|
+
end
|
64
|
+
|
65
|
+
rule(:htmlcomment_end, /-->/) do |lexer, token|
|
66
|
+
lexer.pop_state
|
67
|
+
end
|
68
|
+
|
69
|
+
error(:htmlcomment) do |lexer, token|
|
70
|
+
end
|
71
|
+
|
72
|
+
ignore(:htmlcomment, " \t")
|
73
|
+
|
74
|
+
ignore " \t"
|
75
|
+
end)
|
76
|
+
my_lexer = MyLexer.new
|
77
|
+
expect(my_lexer.lex("hello <!-- comment --> world").map(&:to_ary)).to eq([
|
78
|
+
[:WORD, 'hello', 1, 1],
|
79
|
+
[:WORD, 'world', 1, 24]
|
80
|
+
])
|
81
|
+
end
|
82
|
+
|
83
|
+
it "warns about lack of error condition in :exclusive state" do
|
84
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
85
|
+
tokens( :WORD )
|
86
|
+
|
87
|
+
states( htmlcomment: :exclusive )
|
88
|
+
|
89
|
+
rule(:WORD, /\w+/)
|
90
|
+
|
91
|
+
rule(:htmlcomment_WORD, /\w+/)
|
92
|
+
|
93
|
+
ignore " "
|
94
|
+
end)
|
95
|
+
expect {
|
96
|
+
MyLexer.new
|
97
|
+
}.to output(/No error rule is defined for exclusive state 'htmlcomment'/).
|
98
|
+
to_stderr_from_any_process
|
99
|
+
end
|
100
|
+
|
101
|
+
it "warns about lack of ignore condition in :inclusive state" do
|
102
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
103
|
+
tokens( :WORD )
|
104
|
+
|
105
|
+
states( htmlcomment: :exclusive )
|
106
|
+
|
107
|
+
rule(:WORD, /\w+/)
|
108
|
+
|
109
|
+
rule(:htmlcomment_WORD, /\w+/)
|
110
|
+
|
111
|
+
error(:htmlcomment)
|
112
|
+
end)
|
113
|
+
expect {
|
114
|
+
MyLexer.new
|
115
|
+
}.to output(/No ignore rule is defined for exclusive state 'htmlcomment'/).
|
116
|
+
to_stderr_from_any_process
|
117
|
+
end
|
118
|
+
|
119
|
+
it "lexes in :inclusive state" do
|
120
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
121
|
+
tokens( :WORD )
|
122
|
+
|
123
|
+
states( htmlcomment: :inclusive )
|
124
|
+
|
125
|
+
rule(:WORD, /\w+/)
|
126
|
+
|
127
|
+
rule(:htmlcomment, /<!--/) do |lexer, token|
|
128
|
+
lexer.push_state(:htmlcomment)
|
129
|
+
end
|
130
|
+
|
131
|
+
rule(:htmlcomment_end, /-->/) do |lexer, token|
|
132
|
+
lexer.pop_state
|
133
|
+
end
|
134
|
+
|
135
|
+
error(:htmlcomment) do |lexer, token|
|
136
|
+
end
|
137
|
+
|
138
|
+
ignore(:htmlcomment, " \t")
|
139
|
+
|
140
|
+
ignore " \t"
|
141
|
+
end)
|
142
|
+
my_lexer = MyLexer.new
|
143
|
+
expect(my_lexer.lex("hello <!-- comment --> world").map(&:to_ary)).to eq([
|
144
|
+
[:WORD, 'hello', 1, 1],
|
145
|
+
[:WORD, 'comment', 1, 12],
|
146
|
+
[:WORD, 'world', 1, 24]
|
147
|
+
])
|
148
|
+
end
|
149
|
+
|
150
|
+
it "includes error condition in :inclusive state" do
|
151
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
152
|
+
tokens( :WORD )
|
153
|
+
|
154
|
+
states( htmlcomment: :inclusive )
|
155
|
+
|
156
|
+
rule(:WORD, /\w+/)
|
157
|
+
|
158
|
+
rule(:htmlcomment, /<!--/) do |lexer, token|
|
159
|
+
lexer.push_state(:htmlcomment)
|
160
|
+
end
|
161
|
+
|
162
|
+
rule(:htmlcomment_end, /-->/) do |lexer, token|
|
163
|
+
lexer.pop_state
|
164
|
+
end
|
165
|
+
|
166
|
+
error do |lexer, token| end
|
167
|
+
|
168
|
+
ignore " \t"
|
169
|
+
end)
|
170
|
+
my_lexer = MyLexer.new
|
171
|
+
expect(my_lexer.lex("hello <!-- comment --> world").map(&:to_ary)).to eq([
|
172
|
+
[:WORD, 'hello', 1, 1],
|
173
|
+
[:WORD, 'comment', 1, 12],
|
174
|
+
[:WORD, 'world', 1, 24]
|
175
|
+
])
|
176
|
+
end
|
177
|
+
|
178
|
+
it "complains if there are no rules for state" do
|
179
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
180
|
+
tokens( :WORD )
|
181
|
+
|
182
|
+
states( htmlcomment: :inclusive )
|
183
|
+
|
184
|
+
rule(:WORD, /\w+/)
|
185
|
+
|
186
|
+
error do |lexer, token| end
|
187
|
+
|
188
|
+
ignore " \t"
|
189
|
+
end)
|
190
|
+
expect {
|
191
|
+
MyLexer.new
|
192
|
+
}.to raise_error(Lex::Linter::Failure, /No rules defined for state 'htmlcomment'/)
|
193
|
+
end
|
194
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Lex::Lexer, '#tokens' do
|
6
|
+
it "requires a non-empty list tokens" do
|
7
|
+
expect {
|
8
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
9
|
+
tokens()
|
10
|
+
end)
|
11
|
+
MyLexer.new
|
12
|
+
}.to raise_error(Lex::Linter::Failure, /No token list defined/)
|
13
|
+
end
|
14
|
+
|
15
|
+
it "requires a list of valid tokens" do
|
16
|
+
expect {
|
17
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
18
|
+
tokens(:"#token")
|
19
|
+
end)
|
20
|
+
MyLexer.new
|
21
|
+
}.to raise_error(Lex::Linter::Failure, /Bad token name `#token`/)
|
22
|
+
end
|
23
|
+
|
24
|
+
it "doesn't allow for multiple same tokens" do
|
25
|
+
expect {
|
26
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
27
|
+
tokens(:token, :token)
|
28
|
+
end)
|
29
|
+
MyLexer.new
|
30
|
+
}.to raise_error(Lex::Linter::Failure, /Token `token` already defined/)
|
31
|
+
end
|
32
|
+
end
|
data/tasks/console.rake
ADDED
data/tasks/coverage.rake
ADDED
data/tasks/spec.rake
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
begin
|
4
|
+
require 'rspec/core/rake_task'
|
5
|
+
|
6
|
+
desc 'Run all specs'
|
7
|
+
RSpec::Core::RakeTask.new(:spec) do |task|
|
8
|
+
task.pattern = 'spec/{unit,integration}{,/*/**}/*_spec.rb'
|
9
|
+
end
|
10
|
+
|
11
|
+
namespace :spec do
|
12
|
+
desc 'Run unit specs'
|
13
|
+
RSpec::Core::RakeTask.new(:unit) do |task|
|
14
|
+
task.pattern = 'spec/unit{,/*/**}/*_spec.rb'
|
15
|
+
end
|
16
|
+
|
17
|
+
desc 'Run integration specs'
|
18
|
+
RSpec::Core::RakeTask.new(:integration) do |task|
|
19
|
+
task.pattern = 'spec/integration{,/*/**}/*_spec.rb'
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
rescue LoadError
|
24
|
+
%w[spec spec:unit spec:integration].each do |name|
|
25
|
+
task name do
|
26
|
+
$stderr.puts "In order to run #{name}, do `gem install rspec`"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|