lex 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,11 @@
1
+ # coding: utf-8
2
+
3
+ require 'lex'
4
+
5
+ # Lexer implementations
6
+ #
7
+ # @note This file is not normally available. You must require
8
+ # `lex/lexers` to load it.
9
+
10
+ lexers = ::File.expand_path(::File.join('..', 'lexers'), __FILE__)
11
+ $LOAD_PATH.unshift(lexers) unless $LOAD_PATH.include?(lexers)
@@ -0,0 +1,8 @@
1
+ # coding: utf-8
2
+
3
+ module Lex
4
+ module Lexers
5
+ class HTML < Lexer
6
+ end
7
+ end # Lexers
8
+ end # Lex
@@ -0,0 +1,114 @@
1
+ # coding: utf-8
2
+
3
+ module Lex
4
+ # A class responsible for checking lexer definitions
5
+ #
6
+ # @api public
7
+ class Linter
8
+ IDENTIFIER_RE = /^[a-zA-Z0-9]+$/.freeze
9
+
10
+ # Failure raised by +complain+
11
+ Failure = Class.new(StandardError)
12
+
13
+ # Run linting of lexer
14
+ #
15
+ # @param [Lex::Lexer]
16
+ #
17
+ # @raise [Lex::Linter::Failure]
18
+ #
19
+ # @api public
20
+ def lint(lexer)
21
+ validate_tokens(lexer)
22
+ validate_states(lexer)
23
+ validate_rules(lexer)
24
+ end
25
+
26
+ private
27
+
28
+ # Check if token has valid name
29
+ #
30
+ # @param [Symbol,String] value
31
+ # token to check
32
+ #
33
+ # @return [Boolean]
34
+ #
35
+ # @api private
36
+ def identifier?(value)
37
+ value =~ IDENTIFIER_RE
38
+ end
39
+
40
+ # Validate provided tokens
41
+ #
42
+ # @api private
43
+ def validate_tokens(lexer)
44
+ if lexer.lex_tokens.empty?
45
+ complain("No token list defined")
46
+ end
47
+ if !lexer.lex_tokens.respond_to?(:to_ary)
48
+ complain("Tokens must be a list or enumerable")
49
+ end
50
+
51
+ terminals = []
52
+ lexer.lex_tokens.each do |token|
53
+ if !identifier?(token)
54
+ complain("Bad token name `#{token}`")
55
+ end
56
+ if terminals.include?(token)
57
+ complain("Token `#{token}` already defined")
58
+ end
59
+ terminals << token
60
+ end
61
+ end
62
+
63
+ # Validate provided state names
64
+ #
65
+ # @api private
66
+ def validate_states(lexer)
67
+ if !lexer.state_info.respond_to?(:each_pair)
68
+ complain("States must be defined as a hash")
69
+ end
70
+
71
+ lexer.state_info.each do |state_name, state_type|
72
+ if ![:inclusive, :exclusive].include?(state_type)
73
+ complain("State type for state #{state_name}" \
74
+ " must be :inclusive or :exclusive")
75
+ end
76
+
77
+ if state_type == :exclusive
78
+ if !lexer.state_error.key?(state_name)
79
+ lexer.logger.warn("No error rule is defined " \
80
+ "for exclusive state '#{state_name}'")
81
+ end
82
+ if !lexer.state_ignore.key?(state_name)
83
+ lexer.logger.warn("No ignore rule is defined " \
84
+ "for exclusive state '#{state_name}'")
85
+ end
86
+ end
87
+ end
88
+ end
89
+
90
+ # Validate rules
91
+ #
92
+ # @api private
93
+ def validate_rules(lexer)
94
+ if lexer.state_re.empty?
95
+ complain("No rules of the form rule(name, pattern) are defined")
96
+ end
97
+
98
+ lexer.state_info.each do |state_name, state_type|
99
+ if !lexer.state_re.key?(state_name.to_sym)
100
+ complain("No rules defined for state '#{state_name}'")
101
+ end
102
+ end
103
+ end
104
+
105
+ # Raise a failure if validation of a lexer fails
106
+ #
107
+ # @raise [Lex::Linter::Failure]
108
+ #
109
+ # @api private
110
+ def complain(*args)
111
+ raise Failure, *args
112
+ end
113
+ end # Linter
114
+ end # Lex
@@ -0,0 +1,21 @@
1
+ # coding: utf-8
2
+
3
+ module Lex
4
+ class Logger
5
+ def initialize(logger = nil)
6
+ @logger = ::Logger.new(STDERR)
7
+ end
8
+
9
+ def info(message)
10
+ @logger.info(message)
11
+ end
12
+
13
+ def error(message)
14
+ @logger.error(message)
15
+ end
16
+
17
+ def warn(message)
18
+ @logger.warn(message)
19
+ end
20
+ end # Logger
21
+ end # Lex
@@ -0,0 +1,13 @@
1
+ # coding: utf-8
2
+
3
+ module Lex
4
+ # Lexer tokens' source line
5
+ class SourceLine
6
+ attr_accessor :line, :column
7
+
8
+ def initialize(line = 1, column = 1)
9
+ @line = line
10
+ @column = column
11
+ end
12
+ end # SourceLine
13
+ end # Lex
@@ -0,0 +1,37 @@
1
+ # coding: utf-8
2
+
3
+ module Lex
4
+ class State
5
+ include Enumerable
6
+
7
+ attr_reader :name, :lexemes
8
+
9
+ def initialize(name, lexemes = [])
10
+ @name = name
11
+ @lexemes = lexemes
12
+ end
13
+
14
+ def each(&block)
15
+ @lexemes.each(&block)
16
+ end
17
+
18
+ def <<(lexeme)
19
+ @lexemes << lexeme
20
+ end
21
+
22
+ def update(values)
23
+ values.each do |lexeme|
24
+ lexemes << lexeme unless lexemes.include?(lexeme)
25
+ end
26
+ end
27
+
28
+ def ==(other)
29
+ @name == other.name &&
30
+ @lexemes == other.lexemes
31
+ end
32
+
33
+ def clone
34
+ self.class.new(@name, @lexemes.map(&:clone))
35
+ end
36
+ end # State
37
+ end # Lex
@@ -0,0 +1,47 @@
1
+ # coding: utf-8
2
+
3
+ require 'forwardable'
4
+
5
+ module Lex
6
+ # Used to represent the tokens produced
7
+ class Token
8
+ extend Forwardable
9
+
10
+ attr_accessor :name, :value
11
+
12
+ attr_reader :action
13
+
14
+ def_delegators :@source_line, :line, :column
15
+
16
+ def initialize(name, value, &action)
17
+ @name = name
18
+ @value = value
19
+ @action = action
20
+ @source_line = SourceLine.new
21
+ end
22
+
23
+ def update_line(line, column)
24
+ @source_line.line = line
25
+ @source_line.column = column
26
+ end
27
+
28
+ # Return this token as array of values
29
+ #
30
+ # @return [Symbol, String, Integer, Integer]
31
+ #
32
+ # @api public
33
+ def to_ary
34
+ [name, value, line, column]
35
+ end
36
+
37
+ # Return a string representation
38
+ #
39
+ # @return String
40
+ #
41
+ # @api public
42
+ def to_s
43
+ "Lex::Token(#{to_ary.join(',')})"
44
+ end
45
+ alias_method :inspect, :to_s
46
+ end # Token
47
+ end # Lex
@@ -0,0 +1,5 @@
1
+ # coding: utf-8
2
+
3
+ module Lex
4
+ VERSION = "0.1.0"
5
+ end # Lex
@@ -0,0 +1,50 @@
1
+ # coding: utf-8
2
+
3
+ if RUBY_VERSION > '1.9' and (ENV['COVERAGE'] || ENV['TRAVIS'])
4
+ require 'simplecov'
5
+ require 'coveralls'
6
+
7
+ SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[
8
+ SimpleCov::Formatter::HTMLFormatter,
9
+ Coveralls::SimpleCov::Formatter
10
+ ]
11
+
12
+ SimpleCov.start do
13
+ command_name 'spec'
14
+ add_filter 'spec'
15
+ end
16
+ end
17
+
18
+ require 'lex'
19
+
20
+ RSpec.configure do |config|
21
+ config.expect_with :rspec do |expectations|
22
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
23
+ end
24
+
25
+ config.mock_with :rspec do |mocks|
26
+ mocks.verify_partial_doubles = true
27
+ end
28
+
29
+ # Limits the available syntax to the non-monkey patched syntax that is recommended.
30
+ config.disable_monkey_patching!
31
+
32
+ # This setting enables warnings. It's recommended, but in some cases may
33
+ # be too noisy due to issues in dependencies.
34
+ config.warnings = true
35
+
36
+ if config.files_to_run.one?
37
+ config.default_formatter = 'doc'
38
+ end
39
+
40
+ config.profile_examples = 2
41
+
42
+ config.order = :random
43
+
44
+ Kernel.srand config.seed
45
+ end
46
+
47
+ def unindent(string)
48
+ prefix = string.scan(/^[ \t]+(?=\S)/).min
49
+ string.gsub(/^#{prefix}/, '').chomp
50
+ end
@@ -0,0 +1,42 @@
1
+ # coding: utf-8
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe Lex::Lexer, '#error' do
6
+
7
+ it "registers error handler" do
8
+ stub_const('MyLexer', Class.new(Lex::Lexer) do
9
+ tokens(:IDENTIFIER)
10
+
11
+ rule(:IDENTIFIER, /a|b/)
12
+
13
+ error do |lexer, token|
14
+ token
15
+ end
16
+
17
+ ignore " \t"
18
+ end)
19
+ my_lexer = MyLexer.new
20
+ expect(my_lexer.lex("a(b)a").map(&:to_ary)).to eq([
21
+ [:IDENTIFIER, 'a', 1, 1],
22
+ [:error, '(', 1, 2],
23
+ [:IDENTIFIER, 'b', 1, 3],
24
+ [:error, ')', 1, 4],
25
+ [:IDENTIFIER, 'a', 1, 5]
26
+ ])
27
+ end
28
+
29
+ it "raises error without error handler" do
30
+ stub_const('MyLexer', Class.new(Lex::Lexer) do
31
+ tokens(:IDENTIFIER)
32
+
33
+ rule(:IDENTIFIER, /a|b/)
34
+
35
+ ignore " \t"
36
+ end)
37
+ my_lexer = MyLexer.new
38
+ expect {
39
+ my_lexer.lex("a(b)a").to_a
40
+ }.to raise_error(Lex::LexerError, /Illegal character `\(`/)
41
+ end
42
+ end
@@ -0,0 +1,34 @@
1
+ # coding: utf-8
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe Lex::Lexer, 'keywords' do
6
+ it "allows to easily create keyword tokens" do
7
+ stub_const('MyLexer', Class.new(Lex::Lexer) do
8
+ def self.keywords
9
+ {
10
+ if: :IF,
11
+ then: :THEN,
12
+ else: :ELSE,
13
+ while: :WHILE
14
+ }
15
+ end
16
+
17
+ tokens(:IDENTIFIER, *keywords.values)
18
+
19
+ rule(:IDENTIFIER, /\w[\w\d]*/) do |lexer, token|
20
+ token.name = lexer.class.keywords.fetch(token.value.to_sym, :IDENTIFIER)
21
+ token
22
+ end
23
+
24
+ ignore(' ')
25
+ end)
26
+ my_lexer = MyLexer.new
27
+
28
+ expect(my_lexer.lex("if then else").map(&:to_ary)).to eq([
29
+ [:IF, 'if', 1, 1],
30
+ [:THEN, 'then', 1, 4],
31
+ [:ELSE, 'else', 1, 9]
32
+ ])
33
+ end
34
+ end
@@ -0,0 +1,60 @@
1
+ # coding: utf-8
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe Lex::Lexer, 'lex' do
6
+
7
+ it "tokenizes simple input" do
8
+ code = unindent(<<-EOS)
9
+ x = 5 + 44 * (s - t)
10
+ EOS
11
+
12
+ stub_const('MyLexer', Class.new(Lex::Lexer) do
13
+ tokens(
14
+ :NUMBER,
15
+ :PLUS,
16
+ :MINUS,
17
+ :TIMES,
18
+ :DIVIDE,
19
+ :LPAREN,
20
+ :RPAREN,
21
+ :EQUALS,
22
+ :IDENTIFIER
23
+ )
24
+
25
+ rule(:PLUS, /\+/)
26
+ rule(:MINUS, /\-/)
27
+ rule(:TIMES, /\*/)
28
+ rule(:DIVIDE, /\//)
29
+ rule(:LPAREN, /\(/)
30
+ rule(:RPAREN, /\)/)
31
+ rule(:EQUALS, /=/)
32
+ rule(:IDENTIFIER, /\A[_\$a-zA-Z][_\$0-9a-zA-Z]*/)
33
+
34
+ rule(:NUMBER, /[0-9]+/) do |lexer, token|
35
+ token.value = token.value.to_i
36
+ token
37
+ end
38
+
39
+ rule(:newline, /\n+/) do |lexer, token|
40
+ lexer.advance_line(token.value.length)
41
+ end
42
+
43
+ ignore " \t"
44
+ end)
45
+ my_lexer = MyLexer.new
46
+ expect(my_lexer.lex(code).map(&:to_ary)).to eq([
47
+ [:IDENTIFIER, 'x', 1, 1],
48
+ [:EQUALS, '=', 1, 3],
49
+ [:NUMBER, 5, 1, 5],
50
+ [:PLUS, '+', 1, 7],
51
+ [:NUMBER, 44, 1, 9],
52
+ [:TIMES, '*', 1, 12],
53
+ [:LPAREN, '(', 1, 14],
54
+ [:IDENTIFIER, 's', 1, 15],
55
+ [:MINUS, '-', 1, 17],
56
+ [:IDENTIFIER, 't', 1, 19],
57
+ [:RPAREN, ')', 1, 20]
58
+ ])
59
+ end
60
+ end