lexr 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +42 -0
- data/lexr.rb +1 -0
- data/lib/lexr.rb +117 -0
- metadata +83 -0
data/README.md
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
## Lexr
|
2
|
+
|
3
|
+
Lexr is a lightweight lexical analyser written in ruby, it has no dependencies, has good test coverage, looks pretty and reads well.
|
4
|
+
|
5
|
+
# An example: Expressions
|
6
|
+
|
7
|
+
ExpressionLexer = Lexr.that {
|
8
|
+
ignores /\s+/ => :whitespace
|
9
|
+
matches /[-+]?[0-9]*\.?[0-9]+/ => :number, :convert_with => lambda { |v| Float(v) }
|
10
|
+
matches "+" => :addition
|
11
|
+
matches "-" => :subtraction
|
12
|
+
matches "*" => :multiplication
|
13
|
+
matches "/" => :division
|
14
|
+
matches "(" => :left_parenthesis
|
15
|
+
matches ")" => :right_parenthesis
|
16
|
+
}
|
17
|
+
|
18
|
+
lexer = ExpressionLexer.new("1 * 12.5 / (55 + 2 - 56)")
|
19
|
+
|
20
|
+
while (token = lexer.next) != Lexr::Token.end
|
21
|
+
puts token
|
22
|
+
end
|
23
|
+
|
24
|
+
results in an output of
|
25
|
+
|
26
|
+
number(1.0)
|
27
|
+
multiplication(*)
|
28
|
+
number(12.5)
|
29
|
+
division(/)
|
30
|
+
left_parenthesis(()
|
31
|
+
number(55.0)
|
32
|
+
addition(+)
|
33
|
+
number(2.0)
|
34
|
+
subtraction(-)
|
35
|
+
number(56.0)
|
36
|
+
right_parenthesis())
|
37
|
+
|
38
|
+
if you added a % in there somewhere, you'd get a Lexr::UnmatchableTextError with a message like this:
|
39
|
+
|
40
|
+
=> Unexpected character '%' at position 5
|
41
|
+
|
42
|
+
and that pretty is every feature so far. Please let me know of any bugs or additions that you'd like to see!
|
data/lexr.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'lib', 'lexr'))
|
data/lib/lexr.rb
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
class Lexr
|
2
|
+
def self.that(&block)
|
3
|
+
dsl = Lexr::Dsl.new
|
4
|
+
block.arity == 1 ? block[dsl] : dsl.instance_eval(&block)
|
5
|
+
dsl
|
6
|
+
end
|
7
|
+
|
8
|
+
def initialize(text, rules)
|
9
|
+
@text, @rules = text, rules
|
10
|
+
@position = 0
|
11
|
+
end
|
12
|
+
|
13
|
+
def next
|
14
|
+
return Lexr::Token.end if @position >= @text.length
|
15
|
+
@rules.each do |rule|
|
16
|
+
if result = rule.pattern.instance_of?(Regexp) ? regexp_match(rule.pattern) : literal_match(rule.pattern)
|
17
|
+
result = rule.converter[result] if rule.converter
|
18
|
+
return self.send(:next) if rule.ignore?
|
19
|
+
return Lexr::Token.new(result, rule.symbol)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
raise Lexr::UnmatchableTextError.new(unprocessed_text[0..0], @position)
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def unprocessed_text
|
28
|
+
@text[@position..-1]
|
29
|
+
end
|
30
|
+
|
31
|
+
def regexp_match(regex)
|
32
|
+
return nil unless m = unprocessed_text.match(/^#{regex}/)
|
33
|
+
@position += m.end(0)
|
34
|
+
m[0]
|
35
|
+
end
|
36
|
+
|
37
|
+
def literal_match(lit)
|
38
|
+
return nil unless unprocessed_text[0..lit.length-1] == lit
|
39
|
+
@position += lit.length
|
40
|
+
lit
|
41
|
+
end
|
42
|
+
|
43
|
+
class Token
|
44
|
+
attr_reader :value, :type
|
45
|
+
|
46
|
+
def initialize(value, type = nil)
|
47
|
+
@value, @type = value, type
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.method_missing(sym, *args)
|
51
|
+
self.new(args.first, sym)
|
52
|
+
end
|
53
|
+
|
54
|
+
def to_s
|
55
|
+
"#{type}(#{value})"
|
56
|
+
end
|
57
|
+
|
58
|
+
def ==(other)
|
59
|
+
@type == other.type && @value == other.value
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
class Rule
|
64
|
+
attr_reader :pattern, :symbol
|
65
|
+
|
66
|
+
def converter ; @opts[:convert_with] ; end
|
67
|
+
def ignore? ; @opts[:ignore] ; end
|
68
|
+
|
69
|
+
def initialize(pattern, symbol, opts = {})
|
70
|
+
@pattern, @symbol, @opts = pattern, symbol, opts
|
71
|
+
end
|
72
|
+
|
73
|
+
def ==(other)
|
74
|
+
@pattern == other.pattern &&
|
75
|
+
@symbol == other.symbol &&
|
76
|
+
@opts[:convert_with] == other.converter &&
|
77
|
+
@opts[:ignore] == other.ignore?
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
class Dsl
|
82
|
+
def initialize
|
83
|
+
@rules = []
|
84
|
+
end
|
85
|
+
|
86
|
+
def matches(rule_hash)
|
87
|
+
pattern = rule_hash.keys.reject { |k| k.class == Symbol }.first
|
88
|
+
symbol = rule_hash[pattern]
|
89
|
+
opts = rule_hash.delete_if { |k, v| k.class != Symbol }
|
90
|
+
@rules << Rule.new(pattern, symbol, opts)
|
91
|
+
end
|
92
|
+
|
93
|
+
def ignores(rule_hash)
|
94
|
+
matches rule_hash.merge(:ignore => true)
|
95
|
+
end
|
96
|
+
|
97
|
+
def new(str)
|
98
|
+
Lexr.new(str, @rules)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
class UnmatchableTextError < StandardError
|
103
|
+
attr_reader :character, :position
|
104
|
+
|
105
|
+
def initialize(character, position)
|
106
|
+
@character, @position = character, position
|
107
|
+
end
|
108
|
+
|
109
|
+
def message
|
110
|
+
"Unexpected character '#{character}' at position #{position + 1}"
|
111
|
+
end
|
112
|
+
|
113
|
+
def inspect
|
114
|
+
message
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
metadata
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: lexr
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 0
|
10
|
+
version: 0.1.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Michael Baldry
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-11-12 00:00:00 +00:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: rspec
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
version: "0"
|
33
|
+
type: :development
|
34
|
+
version_requirements: *id001
|
35
|
+
description:
|
36
|
+
email: michael.baldry@uswitch.com
|
37
|
+
executables: []
|
38
|
+
|
39
|
+
extensions: []
|
40
|
+
|
41
|
+
extra_rdoc_files:
|
42
|
+
- README.md
|
43
|
+
files:
|
44
|
+
- lexr.rb
|
45
|
+
- README.md
|
46
|
+
- lib/lexr.rb
|
47
|
+
has_rdoc: true
|
48
|
+
homepage: http://www.forwardtechnology.co.uk
|
49
|
+
licenses: []
|
50
|
+
|
51
|
+
post_install_message:
|
52
|
+
rdoc_options:
|
53
|
+
- --main
|
54
|
+
- README.md
|
55
|
+
require_paths:
|
56
|
+
- lib
|
57
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
58
|
+
none: false
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
hash: 3
|
63
|
+
segments:
|
64
|
+
- 0
|
65
|
+
version: "0"
|
66
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
hash: 3
|
72
|
+
segments:
|
73
|
+
- 0
|
74
|
+
version: "0"
|
75
|
+
requirements: []
|
76
|
+
|
77
|
+
rubyforge_project:
|
78
|
+
rubygems_version: 1.3.7
|
79
|
+
signing_key:
|
80
|
+
specification_version: 3
|
81
|
+
summary: A lightweight and pretty lexical analyser
|
82
|
+
test_files: []
|
83
|
+
|