lex 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.rspec +3 -0
- data/.ruby-version +1 -0
- data/.travis.yml +22 -0
- data/Gemfile +19 -0
- data/LICENSE.txt +22 -0
- data/README.md +423 -0
- data/Rakefile +8 -0
- data/lex.gemspec +22 -0
- data/lib/lex.rb +22 -0
- data/lib/lex/lexeme.rb +27 -0
- data/lib/lex/lexer.rb +210 -0
- data/lib/lex/lexer/dsl.rb +49 -0
- data/lib/lex/lexer/rule_dsl.rb +165 -0
- data/lib/lex/lexers.rb +11 -0
- data/lib/lex/lexers/html.rb +8 -0
- data/lib/lex/linter.rb +114 -0
- data/lib/lex/logger.rb +21 -0
- data/lib/lex/source_line.rb +13 -0
- data/lib/lex/state.rb +37 -0
- data/lib/lex/token.rb +47 -0
- data/lib/lex/version.rb +5 -0
- data/spec/spec_helper.rb +50 -0
- data/spec/unit/error_spec.rb +42 -0
- data/spec/unit/keyword_spec.rb +34 -0
- data/spec/unit/lex_spec.rb +60 -0
- data/spec/unit/position_spec.rb +94 -0
- data/spec/unit/rule_spec.rb +63 -0
- data/spec/unit/state/clone_spec.rb +15 -0
- data/spec/unit/states_spec.rb +194 -0
- data/spec/unit/tokens_spec.rb +32 -0
- data/tasks/console.rake +10 -0
- data/tasks/coverage.rake +11 -0
- data/tasks/spec.rake +29 -0
- metadata +104 -0
data/lib/lex/lexers.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'lex'
|
4
|
+
|
5
|
+
# Lexer implementations
|
6
|
+
#
|
7
|
+
# @note This file is not normally available. You must require
|
8
|
+
# `lex/lexers` to load it.
|
9
|
+
|
10
|
+
lexers = ::File.expand_path(::File.join('..', 'lexers'), __FILE__)
|
11
|
+
$LOAD_PATH.unshift(lexers) unless $LOAD_PATH.include?(lexers)
|
data/lib/lex/linter.rb
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
module Lex
|
4
|
+
# A class responsible for checking lexer definitions
|
5
|
+
#
|
6
|
+
# @api public
|
7
|
+
class Linter
|
8
|
+
IDENTIFIER_RE = /^[a-zA-Z0-9]+$/.freeze
|
9
|
+
|
10
|
+
# Failure raised by +complain+
|
11
|
+
Failure = Class.new(StandardError)
|
12
|
+
|
13
|
+
# Run linting of lexer
|
14
|
+
#
|
15
|
+
# @param [Lex::Lexer]
|
16
|
+
#
|
17
|
+
# @raise [Lex::Linter::Failure]
|
18
|
+
#
|
19
|
+
# @api public
|
20
|
+
def lint(lexer)
|
21
|
+
validate_tokens(lexer)
|
22
|
+
validate_states(lexer)
|
23
|
+
validate_rules(lexer)
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
# Check if token has valid name
|
29
|
+
#
|
30
|
+
# @param [Symbol,String] value
|
31
|
+
# token to check
|
32
|
+
#
|
33
|
+
# @return [Boolean]
|
34
|
+
#
|
35
|
+
# @api private
|
36
|
+
def identifier?(value)
|
37
|
+
value =~ IDENTIFIER_RE
|
38
|
+
end
|
39
|
+
|
40
|
+
# Validate provided tokens
|
41
|
+
#
|
42
|
+
# @api private
|
43
|
+
def validate_tokens(lexer)
|
44
|
+
if lexer.lex_tokens.empty?
|
45
|
+
complain("No token list defined")
|
46
|
+
end
|
47
|
+
if !lexer.lex_tokens.respond_to?(:to_ary)
|
48
|
+
complain("Tokens must be a list or enumerable")
|
49
|
+
end
|
50
|
+
|
51
|
+
terminals = []
|
52
|
+
lexer.lex_tokens.each do |token|
|
53
|
+
if !identifier?(token)
|
54
|
+
complain("Bad token name `#{token}`")
|
55
|
+
end
|
56
|
+
if terminals.include?(token)
|
57
|
+
complain("Token `#{token}` already defined")
|
58
|
+
end
|
59
|
+
terminals << token
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# Validate provided state names
|
64
|
+
#
|
65
|
+
# @api private
|
66
|
+
def validate_states(lexer)
|
67
|
+
if !lexer.state_info.respond_to?(:each_pair)
|
68
|
+
complain("States must be defined as a hash")
|
69
|
+
end
|
70
|
+
|
71
|
+
lexer.state_info.each do |state_name, state_type|
|
72
|
+
if ![:inclusive, :exclusive].include?(state_type)
|
73
|
+
complain("State type for state #{state_name}" \
|
74
|
+
" must be :inclusive or :exclusive")
|
75
|
+
end
|
76
|
+
|
77
|
+
if state_type == :exclusive
|
78
|
+
if !lexer.state_error.key?(state_name)
|
79
|
+
lexer.logger.warn("No error rule is defined " \
|
80
|
+
"for exclusive state '#{state_name}'")
|
81
|
+
end
|
82
|
+
if !lexer.state_ignore.key?(state_name)
|
83
|
+
lexer.logger.warn("No ignore rule is defined " \
|
84
|
+
"for exclusive state '#{state_name}'")
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
# Validate rules
|
91
|
+
#
|
92
|
+
# @api private
|
93
|
+
def validate_rules(lexer)
|
94
|
+
if lexer.state_re.empty?
|
95
|
+
complain("No rules of the form rule(name, pattern) are defined")
|
96
|
+
end
|
97
|
+
|
98
|
+
lexer.state_info.each do |state_name, state_type|
|
99
|
+
if !lexer.state_re.key?(state_name.to_sym)
|
100
|
+
complain("No rules defined for state '#{state_name}'")
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# Raise a failure if validation of a lexer fails
|
106
|
+
#
|
107
|
+
# @raise [Lex::Linter::Failure]
|
108
|
+
#
|
109
|
+
# @api private
|
110
|
+
def complain(*args)
|
111
|
+
raise Failure, *args
|
112
|
+
end
|
113
|
+
end # Linter
|
114
|
+
end # Lex
|
data/lib/lex/logger.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
module Lex
|
4
|
+
class Logger
|
5
|
+
def initialize(logger = nil)
|
6
|
+
@logger = ::Logger.new(STDERR)
|
7
|
+
end
|
8
|
+
|
9
|
+
def info(message)
|
10
|
+
@logger.info(message)
|
11
|
+
end
|
12
|
+
|
13
|
+
def error(message)
|
14
|
+
@logger.error(message)
|
15
|
+
end
|
16
|
+
|
17
|
+
def warn(message)
|
18
|
+
@logger.warn(message)
|
19
|
+
end
|
20
|
+
end # Logger
|
21
|
+
end # Lex
|
data/lib/lex/state.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
module Lex
|
4
|
+
class State
|
5
|
+
include Enumerable
|
6
|
+
|
7
|
+
attr_reader :name, :lexemes
|
8
|
+
|
9
|
+
def initialize(name, lexemes = [])
|
10
|
+
@name = name
|
11
|
+
@lexemes = lexemes
|
12
|
+
end
|
13
|
+
|
14
|
+
def each(&block)
|
15
|
+
@lexemes.each(&block)
|
16
|
+
end
|
17
|
+
|
18
|
+
def <<(lexeme)
|
19
|
+
@lexemes << lexeme
|
20
|
+
end
|
21
|
+
|
22
|
+
def update(values)
|
23
|
+
values.each do |lexeme|
|
24
|
+
lexemes << lexeme unless lexemes.include?(lexeme)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def ==(other)
|
29
|
+
@name == other.name &&
|
30
|
+
@lexemes == other.lexemes
|
31
|
+
end
|
32
|
+
|
33
|
+
def clone
|
34
|
+
self.class.new(@name, @lexemes.map(&:clone))
|
35
|
+
end
|
36
|
+
end # State
|
37
|
+
end # Lex
|
data/lib/lex/token.rb
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'forwardable'
|
4
|
+
|
5
|
+
module Lex
|
6
|
+
# Used to represent the tokens produced
|
7
|
+
class Token
|
8
|
+
extend Forwardable
|
9
|
+
|
10
|
+
attr_accessor :name, :value
|
11
|
+
|
12
|
+
attr_reader :action
|
13
|
+
|
14
|
+
def_delegators :@source_line, :line, :column
|
15
|
+
|
16
|
+
def initialize(name, value, &action)
|
17
|
+
@name = name
|
18
|
+
@value = value
|
19
|
+
@action = action
|
20
|
+
@source_line = SourceLine.new
|
21
|
+
end
|
22
|
+
|
23
|
+
def update_line(line, column)
|
24
|
+
@source_line.line = line
|
25
|
+
@source_line.column = column
|
26
|
+
end
|
27
|
+
|
28
|
+
# Return this token as array of values
|
29
|
+
#
|
30
|
+
# @return [Symbol, String, Integer, Integer]
|
31
|
+
#
|
32
|
+
# @api public
|
33
|
+
def to_ary
|
34
|
+
[name, value, line, column]
|
35
|
+
end
|
36
|
+
|
37
|
+
# Return a string representation
|
38
|
+
#
|
39
|
+
# @return String
|
40
|
+
#
|
41
|
+
# @api public
|
42
|
+
def to_s
|
43
|
+
"Lex::Token(#{to_ary.join(',')})"
|
44
|
+
end
|
45
|
+
alias_method :inspect, :to_s
|
46
|
+
end # Token
|
47
|
+
end # Lex
|
data/lib/lex/version.rb
ADDED
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
if RUBY_VERSION > '1.9' and (ENV['COVERAGE'] || ENV['TRAVIS'])
|
4
|
+
require 'simplecov'
|
5
|
+
require 'coveralls'
|
6
|
+
|
7
|
+
SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[
|
8
|
+
SimpleCov::Formatter::HTMLFormatter,
|
9
|
+
Coveralls::SimpleCov::Formatter
|
10
|
+
]
|
11
|
+
|
12
|
+
SimpleCov.start do
|
13
|
+
command_name 'spec'
|
14
|
+
add_filter 'spec'
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
require 'lex'
|
19
|
+
|
20
|
+
RSpec.configure do |config|
|
21
|
+
config.expect_with :rspec do |expectations|
|
22
|
+
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
|
23
|
+
end
|
24
|
+
|
25
|
+
config.mock_with :rspec do |mocks|
|
26
|
+
mocks.verify_partial_doubles = true
|
27
|
+
end
|
28
|
+
|
29
|
+
# Limits the available syntax to the non-monkey patched syntax that is recommended.
|
30
|
+
config.disable_monkey_patching!
|
31
|
+
|
32
|
+
# This setting enables warnings. It's recommended, but in some cases may
|
33
|
+
# be too noisy due to issues in dependencies.
|
34
|
+
config.warnings = true
|
35
|
+
|
36
|
+
if config.files_to_run.one?
|
37
|
+
config.default_formatter = 'doc'
|
38
|
+
end
|
39
|
+
|
40
|
+
config.profile_examples = 2
|
41
|
+
|
42
|
+
config.order = :random
|
43
|
+
|
44
|
+
Kernel.srand config.seed
|
45
|
+
end
|
46
|
+
|
47
|
+
def unindent(string)
|
48
|
+
prefix = string.scan(/^[ \t]+(?=\S)/).min
|
49
|
+
string.gsub(/^#{prefix}/, '').chomp
|
50
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Lex::Lexer, '#error' do
|
6
|
+
|
7
|
+
it "registers error handler" do
|
8
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
9
|
+
tokens(:IDENTIFIER)
|
10
|
+
|
11
|
+
rule(:IDENTIFIER, /a|b/)
|
12
|
+
|
13
|
+
error do |lexer, token|
|
14
|
+
token
|
15
|
+
end
|
16
|
+
|
17
|
+
ignore " \t"
|
18
|
+
end)
|
19
|
+
my_lexer = MyLexer.new
|
20
|
+
expect(my_lexer.lex("a(b)a").map(&:to_ary)).to eq([
|
21
|
+
[:IDENTIFIER, 'a', 1, 1],
|
22
|
+
[:error, '(', 1, 2],
|
23
|
+
[:IDENTIFIER, 'b', 1, 3],
|
24
|
+
[:error, ')', 1, 4],
|
25
|
+
[:IDENTIFIER, 'a', 1, 5]
|
26
|
+
])
|
27
|
+
end
|
28
|
+
|
29
|
+
it "raises error without error handler" do
|
30
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
31
|
+
tokens(:IDENTIFIER)
|
32
|
+
|
33
|
+
rule(:IDENTIFIER, /a|b/)
|
34
|
+
|
35
|
+
ignore " \t"
|
36
|
+
end)
|
37
|
+
my_lexer = MyLexer.new
|
38
|
+
expect {
|
39
|
+
my_lexer.lex("a(b)a").to_a
|
40
|
+
}.to raise_error(Lex::LexerError, /Illegal character `\(`/)
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Lex::Lexer, 'keywords' do
|
6
|
+
it "allows to easily create keyword tokens" do
|
7
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
8
|
+
def self.keywords
|
9
|
+
{
|
10
|
+
if: :IF,
|
11
|
+
then: :THEN,
|
12
|
+
else: :ELSE,
|
13
|
+
while: :WHILE
|
14
|
+
}
|
15
|
+
end
|
16
|
+
|
17
|
+
tokens(:IDENTIFIER, *keywords.values)
|
18
|
+
|
19
|
+
rule(:IDENTIFIER, /\w[\w\d]*/) do |lexer, token|
|
20
|
+
token.name = lexer.class.keywords.fetch(token.value.to_sym, :IDENTIFIER)
|
21
|
+
token
|
22
|
+
end
|
23
|
+
|
24
|
+
ignore(' ')
|
25
|
+
end)
|
26
|
+
my_lexer = MyLexer.new
|
27
|
+
|
28
|
+
expect(my_lexer.lex("if then else").map(&:to_ary)).to eq([
|
29
|
+
[:IF, 'if', 1, 1],
|
30
|
+
[:THEN, 'then', 1, 4],
|
31
|
+
[:ELSE, 'else', 1, 9]
|
32
|
+
])
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Lex::Lexer, 'lex' do
|
6
|
+
|
7
|
+
it "tokenizes simple input" do
|
8
|
+
code = unindent(<<-EOS)
|
9
|
+
x = 5 + 44 * (s - t)
|
10
|
+
EOS
|
11
|
+
|
12
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
13
|
+
tokens(
|
14
|
+
:NUMBER,
|
15
|
+
:PLUS,
|
16
|
+
:MINUS,
|
17
|
+
:TIMES,
|
18
|
+
:DIVIDE,
|
19
|
+
:LPAREN,
|
20
|
+
:RPAREN,
|
21
|
+
:EQUALS,
|
22
|
+
:IDENTIFIER
|
23
|
+
)
|
24
|
+
|
25
|
+
rule(:PLUS, /\+/)
|
26
|
+
rule(:MINUS, /\-/)
|
27
|
+
rule(:TIMES, /\*/)
|
28
|
+
rule(:DIVIDE, /\//)
|
29
|
+
rule(:LPAREN, /\(/)
|
30
|
+
rule(:RPAREN, /\)/)
|
31
|
+
rule(:EQUALS, /=/)
|
32
|
+
rule(:IDENTIFIER, /\A[_\$a-zA-Z][_\$0-9a-zA-Z]*/)
|
33
|
+
|
34
|
+
rule(:NUMBER, /[0-9]+/) do |lexer, token|
|
35
|
+
token.value = token.value.to_i
|
36
|
+
token
|
37
|
+
end
|
38
|
+
|
39
|
+
rule(:newline, /\n+/) do |lexer, token|
|
40
|
+
lexer.advance_line(token.value.length)
|
41
|
+
end
|
42
|
+
|
43
|
+
ignore " \t"
|
44
|
+
end)
|
45
|
+
my_lexer = MyLexer.new
|
46
|
+
expect(my_lexer.lex(code).map(&:to_ary)).to eq([
|
47
|
+
[:IDENTIFIER, 'x', 1, 1],
|
48
|
+
[:EQUALS, '=', 1, 3],
|
49
|
+
[:NUMBER, 5, 1, 5],
|
50
|
+
[:PLUS, '+', 1, 7],
|
51
|
+
[:NUMBER, 44, 1, 9],
|
52
|
+
[:TIMES, '*', 1, 12],
|
53
|
+
[:LPAREN, '(', 1, 14],
|
54
|
+
[:IDENTIFIER, 's', 1, 15],
|
55
|
+
[:MINUS, '-', 1, 17],
|
56
|
+
[:IDENTIFIER, 't', 1, 19],
|
57
|
+
[:RPAREN, ')', 1, 20]
|
58
|
+
])
|
59
|
+
end
|
60
|
+
end
|