lex 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.rspec +3 -0
- data/.ruby-version +1 -0
- data/.travis.yml +22 -0
- data/Gemfile +19 -0
- data/LICENSE.txt +22 -0
- data/README.md +423 -0
- data/Rakefile +8 -0
- data/lex.gemspec +22 -0
- data/lib/lex.rb +22 -0
- data/lib/lex/lexeme.rb +27 -0
- data/lib/lex/lexer.rb +210 -0
- data/lib/lex/lexer/dsl.rb +49 -0
- data/lib/lex/lexer/rule_dsl.rb +165 -0
- data/lib/lex/lexers.rb +11 -0
- data/lib/lex/lexers/html.rb +8 -0
- data/lib/lex/linter.rb +114 -0
- data/lib/lex/logger.rb +21 -0
- data/lib/lex/source_line.rb +13 -0
- data/lib/lex/state.rb +37 -0
- data/lib/lex/token.rb +47 -0
- data/lib/lex/version.rb +5 -0
- data/spec/spec_helper.rb +50 -0
- data/spec/unit/error_spec.rb +42 -0
- data/spec/unit/keyword_spec.rb +34 -0
- data/spec/unit/lex_spec.rb +60 -0
- data/spec/unit/position_spec.rb +94 -0
- data/spec/unit/rule_spec.rb +63 -0
- data/spec/unit/state/clone_spec.rb +15 -0
- data/spec/unit/states_spec.rb +194 -0
- data/spec/unit/tokens_spec.rb +32 -0
- data/tasks/console.rake +10 -0
- data/tasks/coverage.rake +11 -0
- data/tasks/spec.rake +29 -0
- metadata +104 -0
data/lib/lex/lexers.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'lex'
|
4
|
+
|
5
|
+
# Lexer implementations
|
6
|
+
#
|
7
|
+
# @note This file is not normally available. You must require
|
8
|
+
# `lex/lexers` to load it.
|
9
|
+
|
10
|
+
lexers = ::File.expand_path(::File.join('..', 'lexers'), __FILE__)
|
11
|
+
$LOAD_PATH.unshift(lexers) unless $LOAD_PATH.include?(lexers)
|
data/lib/lex/linter.rb
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
module Lex
|
4
|
+
# A class responsible for checking lexer definitions
|
5
|
+
#
|
6
|
+
# @api public
|
7
|
+
class Linter
|
8
|
+
IDENTIFIER_RE = /^[a-zA-Z0-9]+$/.freeze
|
9
|
+
|
10
|
+
# Failure raised by +complain+
|
11
|
+
Failure = Class.new(StandardError)
|
12
|
+
|
13
|
+
# Run linting of lexer
|
14
|
+
#
|
15
|
+
# @param [Lex::Lexer]
|
16
|
+
#
|
17
|
+
# @raise [Lex::Linter::Failure]
|
18
|
+
#
|
19
|
+
# @api public
|
20
|
+
def lint(lexer)
|
21
|
+
validate_tokens(lexer)
|
22
|
+
validate_states(lexer)
|
23
|
+
validate_rules(lexer)
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
# Check if token has valid name
|
29
|
+
#
|
30
|
+
# @param [Symbol,String] value
|
31
|
+
# token to check
|
32
|
+
#
|
33
|
+
# @return [Boolean]
|
34
|
+
#
|
35
|
+
# @api private
|
36
|
+
def identifier?(value)
|
37
|
+
value =~ IDENTIFIER_RE
|
38
|
+
end
|
39
|
+
|
40
|
+
# Validate provided tokens
|
41
|
+
#
|
42
|
+
# @api private
|
43
|
+
def validate_tokens(lexer)
|
44
|
+
if lexer.lex_tokens.empty?
|
45
|
+
complain("No token list defined")
|
46
|
+
end
|
47
|
+
if !lexer.lex_tokens.respond_to?(:to_ary)
|
48
|
+
complain("Tokens must be a list or enumerable")
|
49
|
+
end
|
50
|
+
|
51
|
+
terminals = []
|
52
|
+
lexer.lex_tokens.each do |token|
|
53
|
+
if !identifier?(token)
|
54
|
+
complain("Bad token name `#{token}`")
|
55
|
+
end
|
56
|
+
if terminals.include?(token)
|
57
|
+
complain("Token `#{token}` already defined")
|
58
|
+
end
|
59
|
+
terminals << token
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# Validate provided state names
|
64
|
+
#
|
65
|
+
# @api private
|
66
|
+
def validate_states(lexer)
|
67
|
+
if !lexer.state_info.respond_to?(:each_pair)
|
68
|
+
complain("States must be defined as a hash")
|
69
|
+
end
|
70
|
+
|
71
|
+
lexer.state_info.each do |state_name, state_type|
|
72
|
+
if ![:inclusive, :exclusive].include?(state_type)
|
73
|
+
complain("State type for state #{state_name}" \
|
74
|
+
" must be :inclusive or :exclusive")
|
75
|
+
end
|
76
|
+
|
77
|
+
if state_type == :exclusive
|
78
|
+
if !lexer.state_error.key?(state_name)
|
79
|
+
lexer.logger.warn("No error rule is defined " \
|
80
|
+
"for exclusive state '#{state_name}'")
|
81
|
+
end
|
82
|
+
if !lexer.state_ignore.key?(state_name)
|
83
|
+
lexer.logger.warn("No ignore rule is defined " \
|
84
|
+
"for exclusive state '#{state_name}'")
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
# Validate rules
|
91
|
+
#
|
92
|
+
# @api private
|
93
|
+
def validate_rules(lexer)
|
94
|
+
if lexer.state_re.empty?
|
95
|
+
complain("No rules of the form rule(name, pattern) are defined")
|
96
|
+
end
|
97
|
+
|
98
|
+
lexer.state_info.each do |state_name, state_type|
|
99
|
+
if !lexer.state_re.key?(state_name.to_sym)
|
100
|
+
complain("No rules defined for state '#{state_name}'")
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# Raise a failure if validation of a lexer fails
|
106
|
+
#
|
107
|
+
# @raise [Lex::Linter::Failure]
|
108
|
+
#
|
109
|
+
# @api private
|
110
|
+
def complain(*args)
|
111
|
+
raise Failure, *args
|
112
|
+
end
|
113
|
+
end # Linter
|
114
|
+
end # Lex
|
data/lib/lex/logger.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
module Lex
|
4
|
+
class Logger
|
5
|
+
def initialize(logger = nil)
|
6
|
+
@logger = ::Logger.new(STDERR)
|
7
|
+
end
|
8
|
+
|
9
|
+
def info(message)
|
10
|
+
@logger.info(message)
|
11
|
+
end
|
12
|
+
|
13
|
+
def error(message)
|
14
|
+
@logger.error(message)
|
15
|
+
end
|
16
|
+
|
17
|
+
def warn(message)
|
18
|
+
@logger.warn(message)
|
19
|
+
end
|
20
|
+
end # Logger
|
21
|
+
end # Lex
|
data/lib/lex/state.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
module Lex
|
4
|
+
class State
|
5
|
+
include Enumerable
|
6
|
+
|
7
|
+
attr_reader :name, :lexemes
|
8
|
+
|
9
|
+
def initialize(name, lexemes = [])
|
10
|
+
@name = name
|
11
|
+
@lexemes = lexemes
|
12
|
+
end
|
13
|
+
|
14
|
+
def each(&block)
|
15
|
+
@lexemes.each(&block)
|
16
|
+
end
|
17
|
+
|
18
|
+
def <<(lexeme)
|
19
|
+
@lexemes << lexeme
|
20
|
+
end
|
21
|
+
|
22
|
+
def update(values)
|
23
|
+
values.each do |lexeme|
|
24
|
+
lexemes << lexeme unless lexemes.include?(lexeme)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def ==(other)
|
29
|
+
@name == other.name &&
|
30
|
+
@lexemes == other.lexemes
|
31
|
+
end
|
32
|
+
|
33
|
+
def clone
|
34
|
+
self.class.new(@name, @lexemes.map(&:clone))
|
35
|
+
end
|
36
|
+
end # State
|
37
|
+
end # Lex
|
data/lib/lex/token.rb
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'forwardable'
|
4
|
+
|
5
|
+
module Lex
|
6
|
+
# Used to represent the tokens produced
|
7
|
+
class Token
|
8
|
+
extend Forwardable
|
9
|
+
|
10
|
+
attr_accessor :name, :value
|
11
|
+
|
12
|
+
attr_reader :action
|
13
|
+
|
14
|
+
def_delegators :@source_line, :line, :column
|
15
|
+
|
16
|
+
def initialize(name, value, &action)
|
17
|
+
@name = name
|
18
|
+
@value = value
|
19
|
+
@action = action
|
20
|
+
@source_line = SourceLine.new
|
21
|
+
end
|
22
|
+
|
23
|
+
def update_line(line, column)
|
24
|
+
@source_line.line = line
|
25
|
+
@source_line.column = column
|
26
|
+
end
|
27
|
+
|
28
|
+
# Return this token as array of values
|
29
|
+
#
|
30
|
+
# @return [Symbol, String, Integer, Integer]
|
31
|
+
#
|
32
|
+
# @api public
|
33
|
+
def to_ary
|
34
|
+
[name, value, line, column]
|
35
|
+
end
|
36
|
+
|
37
|
+
# Return a string representation
|
38
|
+
#
|
39
|
+
# @return String
|
40
|
+
#
|
41
|
+
# @api public
|
42
|
+
def to_s
|
43
|
+
"Lex::Token(#{to_ary.join(',')})"
|
44
|
+
end
|
45
|
+
alias_method :inspect, :to_s
|
46
|
+
end # Token
|
47
|
+
end # Lex
|
data/lib/lex/version.rb
ADDED
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
if RUBY_VERSION > '1.9' and (ENV['COVERAGE'] || ENV['TRAVIS'])
|
4
|
+
require 'simplecov'
|
5
|
+
require 'coveralls'
|
6
|
+
|
7
|
+
SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[
|
8
|
+
SimpleCov::Formatter::HTMLFormatter,
|
9
|
+
Coveralls::SimpleCov::Formatter
|
10
|
+
]
|
11
|
+
|
12
|
+
SimpleCov.start do
|
13
|
+
command_name 'spec'
|
14
|
+
add_filter 'spec'
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
require 'lex'
|
19
|
+
|
20
|
+
RSpec.configure do |config|
|
21
|
+
config.expect_with :rspec do |expectations|
|
22
|
+
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
|
23
|
+
end
|
24
|
+
|
25
|
+
config.mock_with :rspec do |mocks|
|
26
|
+
mocks.verify_partial_doubles = true
|
27
|
+
end
|
28
|
+
|
29
|
+
# Limits the available syntax to the non-monkey patched syntax that is recommended.
|
30
|
+
config.disable_monkey_patching!
|
31
|
+
|
32
|
+
# This setting enables warnings. It's recommended, but in some cases may
|
33
|
+
# be too noisy due to issues in dependencies.
|
34
|
+
config.warnings = true
|
35
|
+
|
36
|
+
if config.files_to_run.one?
|
37
|
+
config.default_formatter = 'doc'
|
38
|
+
end
|
39
|
+
|
40
|
+
config.profile_examples = 2
|
41
|
+
|
42
|
+
config.order = :random
|
43
|
+
|
44
|
+
Kernel.srand config.seed
|
45
|
+
end
|
46
|
+
|
47
|
+
def unindent(string)
|
48
|
+
prefix = string.scan(/^[ \t]+(?=\S)/).min
|
49
|
+
string.gsub(/^#{prefix}/, '').chomp
|
50
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Lex::Lexer, '#error' do
|
6
|
+
|
7
|
+
it "registers error handler" do
|
8
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
9
|
+
tokens(:IDENTIFIER)
|
10
|
+
|
11
|
+
rule(:IDENTIFIER, /a|b/)
|
12
|
+
|
13
|
+
error do |lexer, token|
|
14
|
+
token
|
15
|
+
end
|
16
|
+
|
17
|
+
ignore " \t"
|
18
|
+
end)
|
19
|
+
my_lexer = MyLexer.new
|
20
|
+
expect(my_lexer.lex("a(b)a").map(&:to_ary)).to eq([
|
21
|
+
[:IDENTIFIER, 'a', 1, 1],
|
22
|
+
[:error, '(', 1, 2],
|
23
|
+
[:IDENTIFIER, 'b', 1, 3],
|
24
|
+
[:error, ')', 1, 4],
|
25
|
+
[:IDENTIFIER, 'a', 1, 5]
|
26
|
+
])
|
27
|
+
end
|
28
|
+
|
29
|
+
it "raises error without error handler" do
|
30
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
31
|
+
tokens(:IDENTIFIER)
|
32
|
+
|
33
|
+
rule(:IDENTIFIER, /a|b/)
|
34
|
+
|
35
|
+
ignore " \t"
|
36
|
+
end)
|
37
|
+
my_lexer = MyLexer.new
|
38
|
+
expect {
|
39
|
+
my_lexer.lex("a(b)a").to_a
|
40
|
+
}.to raise_error(Lex::LexerError, /Illegal character `\(`/)
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Lex::Lexer, 'keywords' do
|
6
|
+
it "allows to easily create keyword tokens" do
|
7
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
8
|
+
def self.keywords
|
9
|
+
{
|
10
|
+
if: :IF,
|
11
|
+
then: :THEN,
|
12
|
+
else: :ELSE,
|
13
|
+
while: :WHILE
|
14
|
+
}
|
15
|
+
end
|
16
|
+
|
17
|
+
tokens(:IDENTIFIER, *keywords.values)
|
18
|
+
|
19
|
+
rule(:IDENTIFIER, /\w[\w\d]*/) do |lexer, token|
|
20
|
+
token.name = lexer.class.keywords.fetch(token.value.to_sym, :IDENTIFIER)
|
21
|
+
token
|
22
|
+
end
|
23
|
+
|
24
|
+
ignore(' ')
|
25
|
+
end)
|
26
|
+
my_lexer = MyLexer.new
|
27
|
+
|
28
|
+
expect(my_lexer.lex("if then else").map(&:to_ary)).to eq([
|
29
|
+
[:IF, 'if', 1, 1],
|
30
|
+
[:THEN, 'then', 1, 4],
|
31
|
+
[:ELSE, 'else', 1, 9]
|
32
|
+
])
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Lex::Lexer, 'lex' do
|
6
|
+
|
7
|
+
it "tokenizes simple input" do
|
8
|
+
code = unindent(<<-EOS)
|
9
|
+
x = 5 + 44 * (s - t)
|
10
|
+
EOS
|
11
|
+
|
12
|
+
stub_const('MyLexer', Class.new(Lex::Lexer) do
|
13
|
+
tokens(
|
14
|
+
:NUMBER,
|
15
|
+
:PLUS,
|
16
|
+
:MINUS,
|
17
|
+
:TIMES,
|
18
|
+
:DIVIDE,
|
19
|
+
:LPAREN,
|
20
|
+
:RPAREN,
|
21
|
+
:EQUALS,
|
22
|
+
:IDENTIFIER
|
23
|
+
)
|
24
|
+
|
25
|
+
rule(:PLUS, /\+/)
|
26
|
+
rule(:MINUS, /\-/)
|
27
|
+
rule(:TIMES, /\*/)
|
28
|
+
rule(:DIVIDE, /\//)
|
29
|
+
rule(:LPAREN, /\(/)
|
30
|
+
rule(:RPAREN, /\)/)
|
31
|
+
rule(:EQUALS, /=/)
|
32
|
+
rule(:IDENTIFIER, /\A[_\$a-zA-Z][_\$0-9a-zA-Z]*/)
|
33
|
+
|
34
|
+
rule(:NUMBER, /[0-9]+/) do |lexer, token|
|
35
|
+
token.value = token.value.to_i
|
36
|
+
token
|
37
|
+
end
|
38
|
+
|
39
|
+
rule(:newline, /\n+/) do |lexer, token|
|
40
|
+
lexer.advance_line(token.value.length)
|
41
|
+
end
|
42
|
+
|
43
|
+
ignore " \t"
|
44
|
+
end)
|
45
|
+
my_lexer = MyLexer.new
|
46
|
+
expect(my_lexer.lex(code).map(&:to_ary)).to eq([
|
47
|
+
[:IDENTIFIER, 'x', 1, 1],
|
48
|
+
[:EQUALS, '=', 1, 3],
|
49
|
+
[:NUMBER, 5, 1, 5],
|
50
|
+
[:PLUS, '+', 1, 7],
|
51
|
+
[:NUMBER, 44, 1, 9],
|
52
|
+
[:TIMES, '*', 1, 12],
|
53
|
+
[:LPAREN, '(', 1, 14],
|
54
|
+
[:IDENTIFIER, 's', 1, 15],
|
55
|
+
[:MINUS, '-', 1, 17],
|
56
|
+
[:IDENTIFIER, 't', 1, 19],
|
57
|
+
[:RPAREN, ')', 1, 20]
|
58
|
+
])
|
59
|
+
end
|
60
|
+
end
|