lex 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ # coding: utf-8
2
+
3
+ require 'lex'
4
+
5
+ # Lexer implementations
6
+ #
7
+ # @note This file is not normally available. You must require
8
+ # `lex/lexers` to load it.
9
+
10
+ lexers = ::File.expand_path(::File.join('..', 'lexers'), __FILE__)
11
+ $LOAD_PATH.unshift(lexers) unless $LOAD_PATH.include?(lexers)
@@ -0,0 +1,8 @@
1
+ # coding: utf-8
2
+
3
+ module Lex
4
+ module Lexers
5
+ class HTML < Lexer
6
+ end
7
+ end # Lexers
8
+ end # Lex
@@ -0,0 +1,114 @@
1
+ # coding: utf-8
2
+
3
+ module Lex
4
+ # A class responsible for checking lexer definitions
5
+ #
6
+ # @api public
7
+ class Linter
8
+ IDENTIFIER_RE = /^[a-zA-Z0-9]+$/.freeze
9
+
10
+ # Failure raised by +complain+
11
+ Failure = Class.new(StandardError)
12
+
13
+ # Run linting of lexer
14
+ #
15
+ # @param [Lex::Lexer]
16
+ #
17
+ # @raise [Lex::Linter::Failure]
18
+ #
19
+ # @api public
20
+ def lint(lexer)
21
+ validate_tokens(lexer)
22
+ validate_states(lexer)
23
+ validate_rules(lexer)
24
+ end
25
+
26
+ private
27
+
28
+ # Check if token has valid name
29
+ #
30
+ # @param [Symbol,String] value
31
+ # token to check
32
+ #
33
+ # @return [Boolean]
34
+ #
35
+ # @api private
36
+ def identifier?(value)
37
+ value =~ IDENTIFIER_RE
38
+ end
39
+
40
+ # Validate provided tokens
41
+ #
42
+ # @api private
43
+ def validate_tokens(lexer)
44
+ if lexer.lex_tokens.empty?
45
+ complain("No token list defined")
46
+ end
47
+ if !lexer.lex_tokens.respond_to?(:to_ary)
48
+ complain("Tokens must be a list or enumerable")
49
+ end
50
+
51
+ terminals = []
52
+ lexer.lex_tokens.each do |token|
53
+ if !identifier?(token)
54
+ complain("Bad token name `#{token}`")
55
+ end
56
+ if terminals.include?(token)
57
+ complain("Token `#{token}` already defined")
58
+ end
59
+ terminals << token
60
+ end
61
+ end
62
+
63
+ # Validate provided state names
64
+ #
65
+ # @api private
66
+ def validate_states(lexer)
67
+ if !lexer.state_info.respond_to?(:each_pair)
68
+ complain("States must be defined as a hash")
69
+ end
70
+
71
+ lexer.state_info.each do |state_name, state_type|
72
+ if ![:inclusive, :exclusive].include?(state_type)
73
+ complain("State type for state #{state_name}" \
74
+ " must be :inclusive or :exclusive")
75
+ end
76
+
77
+ if state_type == :exclusive
78
+ if !lexer.state_error.key?(state_name)
79
+ lexer.logger.warn("No error rule is defined " \
80
+ "for exclusive state '#{state_name}'")
81
+ end
82
+ if !lexer.state_ignore.key?(state_name)
83
+ lexer.logger.warn("No ignore rule is defined " \
84
+ "for exclusive state '#{state_name}'")
85
+ end
86
+ end
87
+ end
88
+ end
89
+
90
+ # Validate rules
91
+ #
92
+ # @api private
93
+ def validate_rules(lexer)
94
+ if lexer.state_re.empty?
95
+ complain("No rules of the form rule(name, pattern) are defined")
96
+ end
97
+
98
+ lexer.state_info.each do |state_name, state_type|
99
+ if !lexer.state_re.key?(state_name.to_sym)
100
+ complain("No rules defined for state '#{state_name}'")
101
+ end
102
+ end
103
+ end
104
+
105
+ # Raise a failure if validation of a lexer fails
106
+ #
107
+ # @raise [Lex::Linter::Failure]
108
+ #
109
+ # @api private
110
+ def complain(*args)
111
+ raise Failure, *args
112
+ end
113
+ end # Linter
114
+ end # Lex
@@ -0,0 +1,21 @@
1
+ # coding: utf-8
2
+
3
+ module Lex
4
+ class Logger
5
+ def initialize(logger = nil)
6
+ @logger = ::Logger.new(STDERR)
7
+ end
8
+
9
+ def info(message)
10
+ @logger.info(message)
11
+ end
12
+
13
+ def error(message)
14
+ @logger.error(message)
15
+ end
16
+
17
+ def warn(message)
18
+ @logger.warn(message)
19
+ end
20
+ end # Logger
21
+ end # Lex
@@ -0,0 +1,13 @@
1
+ # coding: utf-8
2
+
3
+ module Lex
4
+ # Lexer tokens' source line
5
+ class SourceLine
6
+ attr_accessor :line, :column
7
+
8
+ def initialize(line = 1, column = 1)
9
+ @line = line
10
+ @column = column
11
+ end
12
+ end # SourceLine
13
+ end # Lex
@@ -0,0 +1,37 @@
1
+ # coding: utf-8
2
+
3
+ module Lex
4
+ class State
5
+ include Enumerable
6
+
7
+ attr_reader :name, :lexemes
8
+
9
+ def initialize(name, lexemes = [])
10
+ @name = name
11
+ @lexemes = lexemes
12
+ end
13
+
14
+ def each(&block)
15
+ @lexemes.each(&block)
16
+ end
17
+
18
+ def <<(lexeme)
19
+ @lexemes << lexeme
20
+ end
21
+
22
+ def update(values)
23
+ values.each do |lexeme|
24
+ lexemes << lexeme unless lexemes.include?(lexeme)
25
+ end
26
+ end
27
+
28
+ def ==(other)
29
+ @name == other.name &&
30
+ @lexemes == other.lexemes
31
+ end
32
+
33
+ def clone
34
+ self.class.new(@name, @lexemes.map(&:clone))
35
+ end
36
+ end # State
37
+ end # Lex
@@ -0,0 +1,47 @@
1
+ # coding: utf-8
2
+
3
+ require 'forwardable'
4
+
5
+ module Lex
6
+ # Used to represent the tokens produced
7
+ class Token
8
+ extend Forwardable
9
+
10
+ attr_accessor :name, :value
11
+
12
+ attr_reader :action
13
+
14
+ def_delegators :@source_line, :line, :column
15
+
16
+ def initialize(name, value, &action)
17
+ @name = name
18
+ @value = value
19
+ @action = action
20
+ @source_line = SourceLine.new
21
+ end
22
+
23
+ def update_line(line, column)
24
+ @source_line.line = line
25
+ @source_line.column = column
26
+ end
27
+
28
+ # Return this token as array of values
29
+ #
30
+ # @return [Symbol, String, Integer, Integer]
31
+ #
32
+ # @api public
33
+ def to_ary
34
+ [name, value, line, column]
35
+ end
36
+
37
+ # Return a string representation
38
+ #
39
+ # @return String
40
+ #
41
+ # @api public
42
+ def to_s
43
+ "Lex::Token(#{to_ary.join(',')})"
44
+ end
45
+ alias_method :inspect, :to_s
46
+ end # Token
47
+ end # Lex
@@ -0,0 +1,5 @@
1
+ # coding: utf-8
2
+
3
+ module Lex
4
+ VERSION = "0.1.0"
5
+ end # Lex
@@ -0,0 +1,50 @@
1
+ # coding: utf-8
2
+
3
+ if RUBY_VERSION > '1.9' and (ENV['COVERAGE'] || ENV['TRAVIS'])
4
+ require 'simplecov'
5
+ require 'coveralls'
6
+
7
+ SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[
8
+ SimpleCov::Formatter::HTMLFormatter,
9
+ Coveralls::SimpleCov::Formatter
10
+ ]
11
+
12
+ SimpleCov.start do
13
+ command_name 'spec'
14
+ add_filter 'spec'
15
+ end
16
+ end
17
+
18
+ require 'lex'
19
+
20
+ RSpec.configure do |config|
21
+ config.expect_with :rspec do |expectations|
22
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
23
+ end
24
+
25
+ config.mock_with :rspec do |mocks|
26
+ mocks.verify_partial_doubles = true
27
+ end
28
+
29
+ # Limits the available syntax to the non-monkey patched syntax that is recommended.
30
+ config.disable_monkey_patching!
31
+
32
+ # This setting enables warnings. It's recommended, but in some cases may
33
+ # be too noisy due to issues in dependencies.
34
+ config.warnings = true
35
+
36
+ if config.files_to_run.one?
37
+ config.default_formatter = 'doc'
38
+ end
39
+
40
+ config.profile_examples = 2
41
+
42
+ config.order = :random
43
+
44
+ Kernel.srand config.seed
45
+ end
46
+
47
+ def unindent(string)
48
+ prefix = string.scan(/^[ \t]+(?=\S)/).min
49
+ string.gsub(/^#{prefix}/, '').chomp
50
+ end
@@ -0,0 +1,42 @@
1
+ # coding: utf-8
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe Lex::Lexer, '#error' do
6
+
7
+ it "registers error handler" do
8
+ stub_const('MyLexer', Class.new(Lex::Lexer) do
9
+ tokens(:IDENTIFIER)
10
+
11
+ rule(:IDENTIFIER, /a|b/)
12
+
13
+ error do |lexer, token|
14
+ token
15
+ end
16
+
17
+ ignore " \t"
18
+ end)
19
+ my_lexer = MyLexer.new
20
+ expect(my_lexer.lex("a(b)a").map(&:to_ary)).to eq([
21
+ [:IDENTIFIER, 'a', 1, 1],
22
+ [:error, '(', 1, 2],
23
+ [:IDENTIFIER, 'b', 1, 3],
24
+ [:error, ')', 1, 4],
25
+ [:IDENTIFIER, 'a', 1, 5]
26
+ ])
27
+ end
28
+
29
+ it "raises error without error handler" do
30
+ stub_const('MyLexer', Class.new(Lex::Lexer) do
31
+ tokens(:IDENTIFIER)
32
+
33
+ rule(:IDENTIFIER, /a|b/)
34
+
35
+ ignore " \t"
36
+ end)
37
+ my_lexer = MyLexer.new
38
+ expect {
39
+ my_lexer.lex("a(b)a").to_a
40
+ }.to raise_error(Lex::LexerError, /Illegal character `\(`/)
41
+ end
42
+ end
@@ -0,0 +1,34 @@
1
+ # coding: utf-8
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe Lex::Lexer, 'keywords' do
6
+ it "allows to easily create keyword tokens" do
7
+ stub_const('MyLexer', Class.new(Lex::Lexer) do
8
+ def self.keywords
9
+ {
10
+ if: :IF,
11
+ then: :THEN,
12
+ else: :ELSE,
13
+ while: :WHILE
14
+ }
15
+ end
16
+
17
+ tokens(:IDENTIFIER, *keywords.values)
18
+
19
+ rule(:IDENTIFIER, /\w[\w\d]*/) do |lexer, token|
20
+ token.name = lexer.class.keywords.fetch(token.value.to_sym, :IDENTIFIER)
21
+ token
22
+ end
23
+
24
+ ignore(' ')
25
+ end)
26
+ my_lexer = MyLexer.new
27
+
28
+ expect(my_lexer.lex("if then else").map(&:to_ary)).to eq([
29
+ [:IF, 'if', 1, 1],
30
+ [:THEN, 'then', 1, 4],
31
+ [:ELSE, 'else', 1, 9]
32
+ ])
33
+ end
34
+ end
@@ -0,0 +1,60 @@
1
+ # coding: utf-8
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe Lex::Lexer, 'lex' do
6
+
7
+ it "tokenizes simple input" do
8
+ code = unindent(<<-EOS)
9
+ x = 5 + 44 * (s - t)
10
+ EOS
11
+
12
+ stub_const('MyLexer', Class.new(Lex::Lexer) do
13
+ tokens(
14
+ :NUMBER,
15
+ :PLUS,
16
+ :MINUS,
17
+ :TIMES,
18
+ :DIVIDE,
19
+ :LPAREN,
20
+ :RPAREN,
21
+ :EQUALS,
22
+ :IDENTIFIER
23
+ )
24
+
25
+ rule(:PLUS, /\+/)
26
+ rule(:MINUS, /\-/)
27
+ rule(:TIMES, /\*/)
28
+ rule(:DIVIDE, /\//)
29
+ rule(:LPAREN, /\(/)
30
+ rule(:RPAREN, /\)/)
31
+ rule(:EQUALS, /=/)
32
+ rule(:IDENTIFIER, /\A[_\$a-zA-Z][_\$0-9a-zA-Z]*/)
33
+
34
+ rule(:NUMBER, /[0-9]+/) do |lexer, token|
35
+ token.value = token.value.to_i
36
+ token
37
+ end
38
+
39
+ rule(:newline, /\n+/) do |lexer, token|
40
+ lexer.advance_line(token.value.length)
41
+ end
42
+
43
+ ignore " \t"
44
+ end)
45
+ my_lexer = MyLexer.new
46
+ expect(my_lexer.lex(code).map(&:to_ary)).to eq([
47
+ [:IDENTIFIER, 'x', 1, 1],
48
+ [:EQUALS, '=', 1, 3],
49
+ [:NUMBER, 5, 1, 5],
50
+ [:PLUS, '+', 1, 7],
51
+ [:NUMBER, 44, 1, 9],
52
+ [:TIMES, '*', 1, 12],
53
+ [:LPAREN, '(', 1, 14],
54
+ [:IDENTIFIER, 's', 1, 15],
55
+ [:MINUS, '-', 1, 17],
56
+ [:IDENTIFIER, 't', 1, 19],
57
+ [:RPAREN, ')', 1, 20]
58
+ ])
59
+ end
60
+ end