lex 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.rspec +3 -0
- data/.ruby-version +1 -0
- data/.travis.yml +22 -0
- data/Gemfile +19 -0
- data/LICENSE.txt +22 -0
- data/README.md +423 -0
- data/Rakefile +8 -0
- data/lex.gemspec +22 -0
- data/lib/lex.rb +22 -0
- data/lib/lex/lexeme.rb +27 -0
- data/lib/lex/lexer.rb +210 -0
- data/lib/lex/lexer/dsl.rb +49 -0
- data/lib/lex/lexer/rule_dsl.rb +165 -0
- data/lib/lex/lexers.rb +11 -0
- data/lib/lex/lexers/html.rb +8 -0
- data/lib/lex/linter.rb +114 -0
- data/lib/lex/logger.rb +21 -0
- data/lib/lex/source_line.rb +13 -0
- data/lib/lex/state.rb +37 -0
- data/lib/lex/token.rb +47 -0
- data/lib/lex/version.rb +5 -0
- data/spec/spec_helper.rb +50 -0
- data/spec/unit/error_spec.rb +42 -0
- data/spec/unit/keyword_spec.rb +34 -0
- data/spec/unit/lex_spec.rb +60 -0
- data/spec/unit/position_spec.rb +94 -0
- data/spec/unit/rule_spec.rb +63 -0
- data/spec/unit/state/clone_spec.rb +15 -0
- data/spec/unit/states_spec.rb +194 -0
- data/spec/unit/tokens_spec.rb +32 -0
- data/tasks/console.rake +10 -0
- data/tasks/coverage.rake +11 -0
- data/tasks/spec.rake +29 -0
- metadata +104 -0
@@ -0,0 +1,94 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Lex::Lexer, 'position' do
|
6
|
+
it "calculates line number and position info from input" do
|
7
|
+
code = unindent(<<-EOS)
|
8
|
+
x = 1
|
9
|
+
y = 2
|
10
|
+
s = x + y
|
11
|
+
EOS
|
12
|
+
|
13
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
14
|
+
tokens(
|
15
|
+
:NUMBER,
|
16
|
+
:PLUS,
|
17
|
+
:IDENTIFIER,
|
18
|
+
:EQUALS
|
19
|
+
)
|
20
|
+
|
21
|
+
rule(:PLUS, /\+/)
|
22
|
+
rule(:EQUALS, /=/)
|
23
|
+
rule(:IDENTIFIER, /\A[_\$a-zA-Z][_\$0-9a-zA-Z]*/)
|
24
|
+
|
25
|
+
rule(:NUMBER, /[0-9]+/) do |lexer, token|
|
26
|
+
token.value = token.value.to_i
|
27
|
+
token
|
28
|
+
end
|
29
|
+
|
30
|
+
ignore " \t"
|
31
|
+
|
32
|
+
rule(:newline, /\n+/) do |lexer, token|
|
33
|
+
lexer.advance_line(token.value.length)
|
34
|
+
end
|
35
|
+
end)
|
36
|
+
|
37
|
+
my_lexer = MyLexer.new
|
38
|
+
expect(my_lexer.lex(code).map(&:to_ary)).to eq([
|
39
|
+
[:IDENTIFIER, 'x', 1, 1],
|
40
|
+
[:EQUALS, '=', 1, 3],
|
41
|
+
[:NUMBER, 1, 1, 5],
|
42
|
+
[:IDENTIFIER, 'y', 2, 1],
|
43
|
+
[:EQUALS, '=', 2, 3],
|
44
|
+
[:NUMBER, 2, 2, 5],
|
45
|
+
[:IDENTIFIER, 's', 3, 1],
|
46
|
+
[:EQUALS, '=', 3, 3],
|
47
|
+
[:IDENTIFIER, 'x', 3, 5],
|
48
|
+
[:PLUS, '+', 3, 7],
|
49
|
+
[:IDENTIFIER, 'y', 3, 9]
|
50
|
+
])
|
51
|
+
end
|
52
|
+
|
53
|
+
it "correctly tracks multiline content" do
|
54
|
+
code = unindent(<<-EOS)
|
55
|
+
This is
|
56
|
+
<b>webpage!</b>
|
57
|
+
EOS
|
58
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
59
|
+
tokens(
|
60
|
+
:WORD,
|
61
|
+
:LANGLE,
|
62
|
+
:RANGLE,
|
63
|
+
:LANGLESLASH
|
64
|
+
)
|
65
|
+
|
66
|
+
rule(:WORD, /[^ <>\n]+/)
|
67
|
+
rule(:LANGLE, /</)
|
68
|
+
rule(:RANGLE, />/)
|
69
|
+
rule(:LANGLESLASH, /<\//)
|
70
|
+
|
71
|
+
rule(:newline, /\n/) do |lexer, token|
|
72
|
+
lexer.advance_line(token.value.size)
|
73
|
+
end
|
74
|
+
|
75
|
+
ignore " "
|
76
|
+
|
77
|
+
error do |lexer, token|
|
78
|
+
end
|
79
|
+
end)
|
80
|
+
|
81
|
+
my_lexer = MyLexer.new
|
82
|
+
expect(my_lexer.lex(code).map(&:to_ary)).to eq([
|
83
|
+
[:WORD, 'This', 1, 1],
|
84
|
+
[:WORD, 'is', 1, 6],
|
85
|
+
[:LANGLE, '<', 2, 4],
|
86
|
+
[:WORD, 'b', 2, 5],
|
87
|
+
[:RANGLE, '>', 2, 6],
|
88
|
+
[:WORD, 'webpage!', 2, 7],
|
89
|
+
[:LANGLESLASH, '</', 2, 15],
|
90
|
+
[:WORD, 'b', 2, 17],
|
91
|
+
[:RANGLE, '>', 2, 18]
|
92
|
+
])
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Lex::Lexer, '#rule' do
|
6
|
+
|
7
|
+
it "raises error with no rules" do
|
8
|
+
expect {
|
9
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
10
|
+
tokens(:ID)
|
11
|
+
end)
|
12
|
+
MyLexer.new
|
13
|
+
}.to raise_error(Lex::Linter::Failure, /No rules of the form/)
|
14
|
+
end
|
15
|
+
|
16
|
+
it "skips rule that has action but doesn't return token" do
|
17
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
18
|
+
tokens(
|
19
|
+
:IDENTIFIER,
|
20
|
+
:LBRACE,
|
21
|
+
:RBRACE
|
22
|
+
)
|
23
|
+
|
24
|
+
rule(:IDENTIFIER, /a|b/)
|
25
|
+
|
26
|
+
rule(:LBRACE, /{/) do |lexer, token|
|
27
|
+
end
|
28
|
+
|
29
|
+
rule(:RBRACE, /}/) do |lexer, token|
|
30
|
+
token
|
31
|
+
end
|
32
|
+
end)
|
33
|
+
my_lexer = MyLexer.new
|
34
|
+
expect(my_lexer.lex("a{b}a").map(&:to_ary)).to eq([
|
35
|
+
[:IDENTIFIER, 'a', 1, 1],
|
36
|
+
[:IDENTIFIER, 'b', 1, 3],
|
37
|
+
[:RBRACE, '}', 1, 4],
|
38
|
+
[:IDENTIFIER, 'a', 1, 5]
|
39
|
+
])
|
40
|
+
end
|
41
|
+
|
42
|
+
it "validates uniquness" do
|
43
|
+
expect {
|
44
|
+
Class.new(Lex::Lexer) do
|
45
|
+
tokens( :WORD )
|
46
|
+
|
47
|
+
rule(:WORD, /\w+/)
|
48
|
+
|
49
|
+
rule(:WORD, /\w+/)
|
50
|
+
end
|
51
|
+
}.to raise_error(Lex::LexerError, /Rule 'WORD' redefined./)
|
52
|
+
end
|
53
|
+
|
54
|
+
it "throws error if using token in rule without prior specifying" do
|
55
|
+
expect {
|
56
|
+
Class.new(Lex::Lexer) do
|
57
|
+
tokens(:ID)
|
58
|
+
|
59
|
+
rule(:UNKNOWN, /a/)
|
60
|
+
end
|
61
|
+
}.to raise_error(Lex::LexerError, /Rule 'UNKNOWN' defined for an unspecified token UNKNOWN/)
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Lex::State, '.clone' do
|
6
|
+
it "clones state instance" do
|
7
|
+
lexeme = double(:lexeme)
|
8
|
+
lexemes = [lexeme, lexeme]
|
9
|
+
state = Lex::State.new(:initial, lexemes)
|
10
|
+
new_state = state.clone
|
11
|
+
|
12
|
+
expect(new_state).to_not eql(state)
|
13
|
+
expect(new_state.lexemes).to_not eql(state.lexemes)
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,194 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Lex::Lexer, '#states' do
|
6
|
+
|
7
|
+
it "checks states" do
|
8
|
+
expect {
|
9
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
10
|
+
tokens(:IDENTIFIER)
|
11
|
+
|
12
|
+
states(foo: :unknown)
|
13
|
+
end)
|
14
|
+
MyLexer.new
|
15
|
+
}.to raise_error(Lex::Linter::Failure, /State type for state foo must be/)
|
16
|
+
end
|
17
|
+
|
18
|
+
it "lexes ignoring :exclusive state tokens" do
|
19
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
20
|
+
tokens(
|
21
|
+
:IDENTIFIER,
|
22
|
+
:LBRACE,
|
23
|
+
:RBRACE
|
24
|
+
)
|
25
|
+
states( brace: :exclusive )
|
26
|
+
|
27
|
+
rule(:IDENTIFIER, /a|b/)
|
28
|
+
|
29
|
+
rule(:LBRACE, /{/) do |lexer, token|
|
30
|
+
lexer.push_state(:brace)
|
31
|
+
token
|
32
|
+
end
|
33
|
+
|
34
|
+
rule(:brace_RBRACE, /}/) do |lexer, token|
|
35
|
+
lexer.pop_state
|
36
|
+
token
|
37
|
+
end
|
38
|
+
|
39
|
+
error(:brace) do |lexer, token|
|
40
|
+
end
|
41
|
+
|
42
|
+
ignore(:brace, " \t")
|
43
|
+
end)
|
44
|
+
my_lexer = MyLexer.new
|
45
|
+
expect(my_lexer.lex("a{bb}a").map(&:to_ary)).to eq([
|
46
|
+
[:IDENTIFIER, 'a', 1, 1],
|
47
|
+
[:LBRACE, '{', 1, 2],
|
48
|
+
[:RBRACE, '}', 1, 5],
|
49
|
+
[:IDENTIFIER, 'a', 1, 6]
|
50
|
+
])
|
51
|
+
end
|
52
|
+
|
53
|
+
it "lexes in :exclusive state" do
|
54
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
55
|
+
tokens( :WORD )
|
56
|
+
|
57
|
+
states( htmlcomment: :exclusive )
|
58
|
+
|
59
|
+
rule(:WORD, /\w+/)
|
60
|
+
|
61
|
+
rule(:htmlcomment, /<!--/) do |lexer, token|
|
62
|
+
lexer.push_state(:htmlcomment)
|
63
|
+
end
|
64
|
+
|
65
|
+
rule(:htmlcomment_end, /-->/) do |lexer, token|
|
66
|
+
lexer.pop_state
|
67
|
+
end
|
68
|
+
|
69
|
+
error(:htmlcomment) do |lexer, token|
|
70
|
+
end
|
71
|
+
|
72
|
+
ignore(:htmlcomment, " \t")
|
73
|
+
|
74
|
+
ignore " \t"
|
75
|
+
end)
|
76
|
+
my_lexer = MyLexer.new
|
77
|
+
expect(my_lexer.lex("hello <!-- comment --> world").map(&:to_ary)).to eq([
|
78
|
+
[:WORD, 'hello', 1, 1],
|
79
|
+
[:WORD, 'world', 1, 24]
|
80
|
+
])
|
81
|
+
end
|
82
|
+
|
83
|
+
it "warns about lack of error condition in :exclusive state" do
|
84
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
85
|
+
tokens( :WORD )
|
86
|
+
|
87
|
+
states( htmlcomment: :exclusive )
|
88
|
+
|
89
|
+
rule(:WORD, /\w+/)
|
90
|
+
|
91
|
+
rule(:htmlcomment_WORD, /\w+/)
|
92
|
+
|
93
|
+
ignore " "
|
94
|
+
end)
|
95
|
+
expect {
|
96
|
+
MyLexer.new
|
97
|
+
}.to output(/No error rule is defined for exclusive state 'htmlcomment'/).
|
98
|
+
to_stderr_from_any_process
|
99
|
+
end
|
100
|
+
|
101
|
+
it "warns about lack of ignore condition in :inclusive state" do
|
102
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
103
|
+
tokens( :WORD )
|
104
|
+
|
105
|
+
states( htmlcomment: :exclusive )
|
106
|
+
|
107
|
+
rule(:WORD, /\w+/)
|
108
|
+
|
109
|
+
rule(:htmlcomment_WORD, /\w+/)
|
110
|
+
|
111
|
+
error(:htmlcomment)
|
112
|
+
end)
|
113
|
+
expect {
|
114
|
+
MyLexer.new
|
115
|
+
}.to output(/No ignore rule is defined for exclusive state 'htmlcomment'/).
|
116
|
+
to_stderr_from_any_process
|
117
|
+
end
|
118
|
+
|
119
|
+
it "lexes in :inclusive state" do
|
120
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
121
|
+
tokens( :WORD )
|
122
|
+
|
123
|
+
states( htmlcomment: :inclusive )
|
124
|
+
|
125
|
+
rule(:WORD, /\w+/)
|
126
|
+
|
127
|
+
rule(:htmlcomment, /<!--/) do |lexer, token|
|
128
|
+
lexer.push_state(:htmlcomment)
|
129
|
+
end
|
130
|
+
|
131
|
+
rule(:htmlcomment_end, /-->/) do |lexer, token|
|
132
|
+
lexer.pop_state
|
133
|
+
end
|
134
|
+
|
135
|
+
error(:htmlcomment) do |lexer, token|
|
136
|
+
end
|
137
|
+
|
138
|
+
ignore(:htmlcomment, " \t")
|
139
|
+
|
140
|
+
ignore " \t"
|
141
|
+
end)
|
142
|
+
my_lexer = MyLexer.new
|
143
|
+
expect(my_lexer.lex("hello <!-- comment --> world").map(&:to_ary)).to eq([
|
144
|
+
[:WORD, 'hello', 1, 1],
|
145
|
+
[:WORD, 'comment', 1, 12],
|
146
|
+
[:WORD, 'world', 1, 24]
|
147
|
+
])
|
148
|
+
end
|
149
|
+
|
150
|
+
it "includes error condition in :inclusive state" do
|
151
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
152
|
+
tokens( :WORD )
|
153
|
+
|
154
|
+
states( htmlcomment: :inclusive )
|
155
|
+
|
156
|
+
rule(:WORD, /\w+/)
|
157
|
+
|
158
|
+
rule(:htmlcomment, /<!--/) do |lexer, token|
|
159
|
+
lexer.push_state(:htmlcomment)
|
160
|
+
end
|
161
|
+
|
162
|
+
rule(:htmlcomment_end, /-->/) do |lexer, token|
|
163
|
+
lexer.pop_state
|
164
|
+
end
|
165
|
+
|
166
|
+
error do |lexer, token| end
|
167
|
+
|
168
|
+
ignore " \t"
|
169
|
+
end)
|
170
|
+
my_lexer = MyLexer.new
|
171
|
+
expect(my_lexer.lex("hello <!-- comment --> world").map(&:to_ary)).to eq([
|
172
|
+
[:WORD, 'hello', 1, 1],
|
173
|
+
[:WORD, 'comment', 1, 12],
|
174
|
+
[:WORD, 'world', 1, 24]
|
175
|
+
])
|
176
|
+
end
|
177
|
+
|
178
|
+
it "complains if there are no rules for state" do
|
179
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
180
|
+
tokens( :WORD )
|
181
|
+
|
182
|
+
states( htmlcomment: :inclusive )
|
183
|
+
|
184
|
+
rule(:WORD, /\w+/)
|
185
|
+
|
186
|
+
error do |lexer, token| end
|
187
|
+
|
188
|
+
ignore " \t"
|
189
|
+
end)
|
190
|
+
expect {
|
191
|
+
MyLexer.new
|
192
|
+
}.to raise_error(Lex::Linter::Failure, /No rules defined for state 'htmlcomment'/)
|
193
|
+
end
|
194
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Lex::Lexer, '#tokens' do
|
6
|
+
it "requires a non-empty list tokens" do
|
7
|
+
expect {
|
8
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
9
|
+
tokens()
|
10
|
+
end)
|
11
|
+
MyLexer.new
|
12
|
+
}.to raise_error(Lex::Linter::Failure, /No token list defined/)
|
13
|
+
end
|
14
|
+
|
15
|
+
it "requires a list of valid tokens" do
|
16
|
+
expect {
|
17
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
18
|
+
tokens(:"#token")
|
19
|
+
end)
|
20
|
+
MyLexer.new
|
21
|
+
}.to raise_error(Lex::Linter::Failure, /Bad token name `#token`/)
|
22
|
+
end
|
23
|
+
|
24
|
+
it "doesn't allow for multiple same tokens" do
|
25
|
+
expect {
|
26
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
27
|
+
tokens(:token, :token)
|
28
|
+
end)
|
29
|
+
MyLexer.new
|
30
|
+
}.to raise_error(Lex::Linter::Failure, /Token `token` already defined/)
|
31
|
+
end
|
32
|
+
end
|
data/tasks/console.rake
ADDED
data/tasks/coverage.rake
ADDED
data/tasks/spec.rake
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
begin
|
4
|
+
require 'rspec/core/rake_task'
|
5
|
+
|
6
|
+
desc 'Run all specs'
|
7
|
+
RSpec::Core::RakeTask.new(:spec) do |task|
|
8
|
+
task.pattern = 'spec/{unit,integration}{,/*/**}/*_spec.rb'
|
9
|
+
end
|
10
|
+
|
11
|
+
namespace :spec do
|
12
|
+
desc 'Run unit specs'
|
13
|
+
RSpec::Core::RakeTask.new(:unit) do |task|
|
14
|
+
task.pattern = 'spec/unit{,/*/**}/*_spec.rb'
|
15
|
+
end
|
16
|
+
|
17
|
+
desc 'Run integration specs'
|
18
|
+
RSpec::Core::RakeTask.new(:integration) do |task|
|
19
|
+
task.pattern = 'spec/integration{,/*/**}/*_spec.rb'
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
rescue LoadError
|
24
|
+
%w[spec spec:unit spec:integration].each do |name|
|
25
|
+
task name do
|
26
|
+
$stderr.puts "In order to run #{name}, do `gem install rspec`"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|