lexr 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +42 -0
- data/lexr.rb +1 -0
- data/lib/lexr.rb +117 -0
- metadata +83 -0
data/README.md
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
## Lexr
|
2
|
+
|
3
|
+
Lexr is a lightweight lexical analyser written in ruby, it has no dependencies, has good test coverage, looks pretty and reads well.
|
4
|
+
|
5
|
+
# An example: Expressions
|
6
|
+
|
7
|
+
ExpressionLexer = Lexr.that {
|
8
|
+
ignores /\s+/ => :whitespace
|
9
|
+
matches /[-+]?[0-9]*\.?[0-9]+/ => :number, :convert_with => lambda { |v| Float(v) }
|
10
|
+
matches "+" => :addition
|
11
|
+
matches "-" => :subtraction
|
12
|
+
matches "*" => :multiplication
|
13
|
+
matches "/" => :division
|
14
|
+
matches "(" => :left_parenthesis
|
15
|
+
matches ")" => :right_parenthesis
|
16
|
+
}
|
17
|
+
|
18
|
+
lexer = ExpressionLexer.new("1 * 12.5 / (55 + 2 - 56)")
|
19
|
+
|
20
|
+
while (token = lexer.next) != Lexr::Token.end
|
21
|
+
puts token
|
22
|
+
end
|
23
|
+
|
24
|
+
results in an output of
|
25
|
+
|
26
|
+
number(1.0)
|
27
|
+
multiplication(*)
|
28
|
+
number(12.5)
|
29
|
+
division(/)
|
30
|
+
left_parenthesis(()
|
31
|
+
number(55.0)
|
32
|
+
addition(+)
|
33
|
+
number(2.0)
|
34
|
+
subtraction(-)
|
35
|
+
number(56.0)
|
36
|
+
right_parenthesis())
|
37
|
+
|
38
|
+
if you added a % in there somewhere, you'd get a Lexr::UnmatchableTextError with a message like this:
|
39
|
+
|
40
|
+
=> Unexpected character '%' at position 5
|
41
|
+
|
42
|
+
and that pretty is every feature so far. Please let me know of any bugs or additions that you'd like to see!
|
data/lexr.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'lib', 'lexr'))
|
data/lib/lexr.rb
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
class Lexr
|
2
|
+
def self.that(&block)
|
3
|
+
dsl = Lexr::Dsl.new
|
4
|
+
block.arity == 1 ? block[dsl] : dsl.instance_eval(&block)
|
5
|
+
dsl
|
6
|
+
end
|
7
|
+
|
8
|
+
def initialize(text, rules)
|
9
|
+
@text, @rules = text, rules
|
10
|
+
@position = 0
|
11
|
+
end
|
12
|
+
|
13
|
+
def next
|
14
|
+
return Lexr::Token.end if @position >= @text.length
|
15
|
+
@rules.each do |rule|
|
16
|
+
if result = rule.pattern.instance_of?(Regexp) ? regexp_match(rule.pattern) : literal_match(rule.pattern)
|
17
|
+
result = rule.converter[result] if rule.converter
|
18
|
+
return self.send(:next) if rule.ignore?
|
19
|
+
return Lexr::Token.new(result, rule.symbol)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
raise Lexr::UnmatchableTextError.new(unprocessed_text[0..0], @position)
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def unprocessed_text
|
28
|
+
@text[@position..-1]
|
29
|
+
end
|
30
|
+
|
31
|
+
def regexp_match(regex)
|
32
|
+
return nil unless m = unprocessed_text.match(/^#{regex}/)
|
33
|
+
@position += m.end(0)
|
34
|
+
m[0]
|
35
|
+
end
|
36
|
+
|
37
|
+
def literal_match(lit)
|
38
|
+
return nil unless unprocessed_text[0..lit.length-1] == lit
|
39
|
+
@position += lit.length
|
40
|
+
lit
|
41
|
+
end
|
42
|
+
|
43
|
+
class Token
|
44
|
+
attr_reader :value, :type
|
45
|
+
|
46
|
+
def initialize(value, type = nil)
|
47
|
+
@value, @type = value, type
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.method_missing(sym, *args)
|
51
|
+
self.new(args.first, sym)
|
52
|
+
end
|
53
|
+
|
54
|
+
def to_s
|
55
|
+
"#{type}(#{value})"
|
56
|
+
end
|
57
|
+
|
58
|
+
def ==(other)
|
59
|
+
@type == other.type && @value == other.value
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
class Rule
|
64
|
+
attr_reader :pattern, :symbol
|
65
|
+
|
66
|
+
def converter ; @opts[:convert_with] ; end
|
67
|
+
def ignore? ; @opts[:ignore] ; end
|
68
|
+
|
69
|
+
def initialize(pattern, symbol, opts = {})
|
70
|
+
@pattern, @symbol, @opts = pattern, symbol, opts
|
71
|
+
end
|
72
|
+
|
73
|
+
def ==(other)
|
74
|
+
@pattern == other.pattern &&
|
75
|
+
@symbol == other.symbol &&
|
76
|
+
@opts[:convert_with] == other.converter &&
|
77
|
+
@opts[:ignore] == other.ignore?
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
class Dsl
|
82
|
+
def initialize
|
83
|
+
@rules = []
|
84
|
+
end
|
85
|
+
|
86
|
+
def matches(rule_hash)
|
87
|
+
pattern = rule_hash.keys.reject { |k| k.class == Symbol }.first
|
88
|
+
symbol = rule_hash[pattern]
|
89
|
+
opts = rule_hash.delete_if { |k, v| k.class != Symbol }
|
90
|
+
@rules << Rule.new(pattern, symbol, opts)
|
91
|
+
end
|
92
|
+
|
93
|
+
def ignores(rule_hash)
|
94
|
+
matches rule_hash.merge(:ignore => true)
|
95
|
+
end
|
96
|
+
|
97
|
+
def new(str)
|
98
|
+
Lexr.new(str, @rules)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
class UnmatchableTextError < StandardError
|
103
|
+
attr_reader :character, :position
|
104
|
+
|
105
|
+
def initialize(character, position)
|
106
|
+
@character, @position = character, position
|
107
|
+
end
|
108
|
+
|
109
|
+
def message
|
110
|
+
"Unexpected character '#{character}' at position #{position + 1}"
|
111
|
+
end
|
112
|
+
|
113
|
+
def inspect
|
114
|
+
message
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
metadata
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: lexr
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 0
|
10
|
+
version: 0.1.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Michael Baldry
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-11-12 00:00:00 +00:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: rspec
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
version: "0"
|
33
|
+
type: :development
|
34
|
+
version_requirements: *id001
|
35
|
+
description:
|
36
|
+
email: michael.baldry@uswitch.com
|
37
|
+
executables: []
|
38
|
+
|
39
|
+
extensions: []
|
40
|
+
|
41
|
+
extra_rdoc_files:
|
42
|
+
- README.md
|
43
|
+
files:
|
44
|
+
- lexr.rb
|
45
|
+
- README.md
|
46
|
+
- lib/lexr.rb
|
47
|
+
has_rdoc: true
|
48
|
+
homepage: http://www.forwardtechnology.co.uk
|
49
|
+
licenses: []
|
50
|
+
|
51
|
+
post_install_message:
|
52
|
+
rdoc_options:
|
53
|
+
- --main
|
54
|
+
- README.md
|
55
|
+
require_paths:
|
56
|
+
- lib
|
57
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
58
|
+
none: false
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
hash: 3
|
63
|
+
segments:
|
64
|
+
- 0
|
65
|
+
version: "0"
|
66
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
hash: 3
|
72
|
+
segments:
|
73
|
+
- 0
|
74
|
+
version: "0"
|
75
|
+
requirements: []
|
76
|
+
|
77
|
+
rubyforge_project:
|
78
|
+
rubygems_version: 1.3.7
|
79
|
+
signing_key:
|
80
|
+
specification_version: 3
|
81
|
+
summary: A lightweight and pretty lexical analyser
|
82
|
+
test_files: []
|
83
|
+
|