lexr 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. data/README.md +42 -0
  2. data/lexr.rb +1 -0
  3. data/lib/lexr.rb +117 -0
  4. metadata +83 -0
data/README.md ADDED
@@ -0,0 +1,42 @@
1
+ ## Lexr
2
+
3
+ Lexr is a lightweight lexical analyser written in ruby, it has no dependencies, has good test coverage, looks pretty and reads well.
4
+
5
+ # An example: Expressions
6
+
7
+ ExpressionLexer = Lexr.that {
8
+ ignores /\s+/ => :whitespace
9
+ matches /[-+]?[0-9]*\.?[0-9]+/ => :number, :convert_with => lambda { |v| Float(v) }
10
+ matches "+" => :addition
11
+ matches "-" => :subtraction
12
+ matches "*" => :multiplication
13
+ matches "/" => :division
14
+ matches "(" => :left_parenthesis
15
+ matches ")" => :right_parenthesis
16
+ }
17
+
18
+ lexer = ExpressionLexer.new("1 * 12.5 / (55 + 2 - 56)")
19
+
20
+ while (token = lexer.next) != Lexr::Token.end
21
+ puts token
22
+ end
23
+
24
+ results in an output of
25
+
26
+ number(1.0)
27
+ multiplication(*)
28
+ number(12.5)
29
+ division(/)
30
+ left_parenthesis(()
31
+ number(55.0)
32
+ addition(+)
33
+ number(2.0)
34
+ subtraction(-)
35
+ number(56.0)
36
+ right_parenthesis())
37
+
38
+ if you added a % in there somewhere, you'd get a Lexr::UnmatchableTextError with a message like this:
39
+
40
+ => Unexpected character '%' at position 5
41
+
42
+ and that pretty is every feature so far. Please let me know of any bugs or additions that you'd like to see!
data/lexr.rb ADDED
@@ -0,0 +1 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), 'lib', 'lexr'))
data/lib/lexr.rb ADDED
@@ -0,0 +1,117 @@
1
+ class Lexr
2
+ def self.that(&block)
3
+ dsl = Lexr::Dsl.new
4
+ block.arity == 1 ? block[dsl] : dsl.instance_eval(&block)
5
+ dsl
6
+ end
7
+
8
+ def initialize(text, rules)
9
+ @text, @rules = text, rules
10
+ @position = 0
11
+ end
12
+
13
+ def next
14
+ return Lexr::Token.end if @position >= @text.length
15
+ @rules.each do |rule|
16
+ if result = rule.pattern.instance_of?(Regexp) ? regexp_match(rule.pattern) : literal_match(rule.pattern)
17
+ result = rule.converter[result] if rule.converter
18
+ return self.send(:next) if rule.ignore?
19
+ return Lexr::Token.new(result, rule.symbol)
20
+ end
21
+ end
22
+ raise Lexr::UnmatchableTextError.new(unprocessed_text[0..0], @position)
23
+ end
24
+
25
+ private
26
+
27
+ def unprocessed_text
28
+ @text[@position..-1]
29
+ end
30
+
31
+ def regexp_match(regex)
32
+ return nil unless m = unprocessed_text.match(/^#{regex}/)
33
+ @position += m.end(0)
34
+ m[0]
35
+ end
36
+
37
+ def literal_match(lit)
38
+ return nil unless unprocessed_text[0..lit.length-1] == lit
39
+ @position += lit.length
40
+ lit
41
+ end
42
+
43
+ class Token
44
+ attr_reader :value, :type
45
+
46
+ def initialize(value, type = nil)
47
+ @value, @type = value, type
48
+ end
49
+
50
+ def self.method_missing(sym, *args)
51
+ self.new(args.first, sym)
52
+ end
53
+
54
+ def to_s
55
+ "#{type}(#{value})"
56
+ end
57
+
58
+ def ==(other)
59
+ @type == other.type && @value == other.value
60
+ end
61
+ end
62
+
63
+ class Rule
64
+ attr_reader :pattern, :symbol
65
+
66
+ def converter ; @opts[:convert_with] ; end
67
+ def ignore? ; @opts[:ignore] ; end
68
+
69
+ def initialize(pattern, symbol, opts = {})
70
+ @pattern, @symbol, @opts = pattern, symbol, opts
71
+ end
72
+
73
+ def ==(other)
74
+ @pattern == other.pattern &&
75
+ @symbol == other.symbol &&
76
+ @opts[:convert_with] == other.converter &&
77
+ @opts[:ignore] == other.ignore?
78
+ end
79
+ end
80
+
81
+ class Dsl
82
+ def initialize
83
+ @rules = []
84
+ end
85
+
86
+ def matches(rule_hash)
87
+ pattern = rule_hash.keys.reject { |k| k.class == Symbol }.first
88
+ symbol = rule_hash[pattern]
89
+ opts = rule_hash.delete_if { |k, v| k.class != Symbol }
90
+ @rules << Rule.new(pattern, symbol, opts)
91
+ end
92
+
93
+ def ignores(rule_hash)
94
+ matches rule_hash.merge(:ignore => true)
95
+ end
96
+
97
+ def new(str)
98
+ Lexr.new(str, @rules)
99
+ end
100
+ end
101
+
102
+ class UnmatchableTextError < StandardError
103
+ attr_reader :character, :position
104
+
105
+ def initialize(character, position)
106
+ @character, @position = character, position
107
+ end
108
+
109
+ def message
110
+ "Unexpected character '#{character}' at position #{position + 1}"
111
+ end
112
+
113
+ def inspect
114
+ message
115
+ end
116
+ end
117
+ end
metadata ADDED
@@ -0,0 +1,83 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lexr
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Michael Baldry
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-11-12 00:00:00 +00:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: rspec
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
32
+ version: "0"
33
+ type: :development
34
+ version_requirements: *id001
35
+ description:
36
+ email: michael.baldry@uswitch.com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - README.md
43
+ files:
44
+ - lexr.rb
45
+ - README.md
46
+ - lib/lexr.rb
47
+ has_rdoc: true
48
+ homepage: http://www.forwardtechnology.co.uk
49
+ licenses: []
50
+
51
+ post_install_message:
52
+ rdoc_options:
53
+ - --main
54
+ - README.md
55
+ require_paths:
56
+ - lib
57
+ required_ruby_version: !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ hash: 3
63
+ segments:
64
+ - 0
65
+ version: "0"
66
+ required_rubygems_version: !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ hash: 3
72
+ segments:
73
+ - 0
74
+ version: "0"
75
+ requirements: []
76
+
77
+ rubyforge_project:
78
+ rubygems_version: 1.3.7
79
+ signing_key:
80
+ specification_version: 3
81
+ summary: A lightweight and pretty lexical analyser
82
+ test_files: []
83
+