simple_lexer 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +2 -2
- data/lib/simple_lexer.rb +62 -45
- data/lib/simple_lexer/version.rb +1 -1
- metadata +6 -5
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# SimpleLexer
|
2
2
|
|
3
|
-
|
3
|
+
A very basic toy Lexer implemented with Regular Expressions.
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
@@ -18,7 +18,7 @@ Or install it yourself as:
|
|
18
18
|
|
19
19
|
## Usage
|
20
20
|
|
21
|
-
|
21
|
+
See docs for SimpleLexer::Lexer.
|
22
22
|
|
23
23
|
## Contributing
|
24
24
|
|
data/lib/simple_lexer.rb
CHANGED
@@ -2,18 +2,31 @@ require_relative "simple_lexer/version"
|
|
2
2
|
|
3
3
|
module SimpleLexer
|
4
4
|
|
5
|
+
|
6
|
+
# An Exception that is raised when Lexer encounters text for which
|
7
|
+
# there is no rule to match.
|
5
8
|
class NoMatchError < Exception
|
6
|
-
# unable to match
|
7
9
|
end
|
8
10
|
|
11
|
+
# Exception that is raised when Lexer is finished tokenizing the
|
12
|
+
# input string.
|
9
13
|
class EndOfStreamException < Exception
|
10
|
-
# when the Lexer is finished
|
11
14
|
end
|
12
15
|
|
16
|
+
# Object defined with certain rules that takes text as input and
|
17
|
+
# outputs Tokens based on the rules.
|
18
|
+
# @!attribute [r] rules
|
19
|
+
# @return [Array<Regexp>] A list of the rules for the Lexer.
|
20
|
+
# @!attribute [rw] pos
|
21
|
+
# @return [Fixnum] The current position of the input pointer.
|
13
22
|
class Lexer
|
14
23
|
|
15
|
-
attr_reader :rules
|
24
|
+
attr_reader :rules
|
25
|
+
attr_accessor :pos
|
16
26
|
|
27
|
+
# Creates a new instance of Lexer.
|
28
|
+
# @yield [] Some rules passed to instance_eval.
|
29
|
+
# @see #tok An example of a number Lexer using <code>tok</code>.
|
17
30
|
def initialize(&rules)
|
18
31
|
@rules = [] # list of {:rule => Regexp, :token => :token_id}
|
19
32
|
@ignore = [] # list of Regexp
|
@@ -21,24 +34,37 @@ module SimpleLexer
|
|
21
34
|
instance_eval &rules
|
22
35
|
end
|
23
36
|
|
37
|
+
# Defines a new Token rule for the Lexer to match.
|
38
|
+
# @param [Regexp] rule Regular expression that defines the token
|
39
|
+
# @param [Symbol] token Token class
|
40
|
+
# @yield [text] The expression will give the Token its value.
|
41
|
+
# @example Rule for numbers
|
42
|
+
# my_lexer = SimpleLexer::Lexer.new do
|
43
|
+
# tok /-?\d+(\.\d+)?/, :number do |text| text.to_f end
|
44
|
+
# end
|
45
|
+
# my_lexer.load = "-435.234"
|
46
|
+
# puts my_lexer.next_token[:value] # -435.234
|
24
47
|
def tok(rule, token, &action)
|
25
|
-
# defining a new rule:
|
26
|
-
#
|
27
|
-
# my_lexer = SimpleLexer::Lexer.new do
|
28
|
-
# tok /\w+/, :identifier
|
29
|
-
# end
|
30
|
-
|
31
48
|
@rules << {:rule => Regexp.new('\A' + rule.source), :token => token, :action => action}
|
32
49
|
end
|
33
50
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
51
|
+
# Defines rules of input classes to ignore (consume and not output any
|
52
|
+
# tokens.)
|
53
|
+
# @param [Regexp, Symbol] rule Regular expression that defines ignored
|
54
|
+
# characters.
|
55
|
+
# @note You can set _rule_ to <code>:whitespace</code> to ignore whitespace
|
56
|
+
# characters.
|
57
|
+
# @example Ignoring parentheses
|
58
|
+
# my_lexer = SimpleLexer::Lexer.new do
|
59
|
+
# tok /\w+/, :identifier
|
60
|
+
# ign /[\(\)]/
|
61
|
+
# end
|
62
|
+
# @example Ignoring whitespace
|
63
|
+
# my_lexer = SimpleLexer::Lexer.new do
|
64
|
+
# tok /\w+/, :identifier
|
65
|
+
# ign :whitespace
|
66
|
+
# end
|
67
|
+
def ign(rule)
|
42
68
|
if rule == :whitespace
|
43
69
|
rule = /\s+/
|
44
70
|
end
|
@@ -46,21 +72,26 @@ module SimpleLexer
|
|
46
72
|
@ignore << Regexp.new('\A' + rule.source)
|
47
73
|
end
|
48
74
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
@load = string
|
75
|
+
# Give the Lexer some text to tokenize.
|
76
|
+
# @param [String] input Text for the Lexer to tokenize.
|
77
|
+
def load=(input)
|
78
|
+
@load = input
|
54
79
|
@pos = 0
|
55
80
|
end
|
56
|
-
|
81
|
+
|
82
|
+
# What still remains to be processed.
|
83
|
+
# @return [String] Substring of the input starting from input pointer.
|
57
84
|
def load
|
58
|
-
# what the lexer currently sees
|
59
|
-
# my_lexer.load ...
|
60
|
-
|
61
85
|
@load[@pos..-1]
|
62
86
|
end
|
63
87
|
|
88
|
+
# Gets the next Token in the input and advances the input pointer.
|
89
|
+
# @return [Hash{Symbol=>Values}]
|
90
|
+
# - <code>:token</code> Token class
|
91
|
+
# - <code>:text</code> Matched text
|
92
|
+
# - <code>:value</code> Value as defined by passed block, if applicable.
|
93
|
+
# @raise [NoMatchError] If load contains a sequence for which the Lexer has
|
94
|
+
# no rule.
|
64
95
|
def next_token
|
65
96
|
# get the next token
|
66
97
|
# my_lexer.next_token -> [ :token => :token_id, :text => matched ]
|
@@ -85,10 +116,9 @@ module SimpleLexer
|
|
85
116
|
raise NoMatchError, "Unable to match, unexpected characters: '#{load[0..10]}...'"
|
86
117
|
end
|
87
118
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
119
|
+
# Tokenize the entire input stream.
|
120
|
+
# @return [Array<Hash>] An Array of Tokens processed by the Lexer
|
121
|
+
def all_tokens
|
92
122
|
tokens = []
|
93
123
|
loop do
|
94
124
|
tokens << next_token
|
@@ -97,24 +127,11 @@ module SimpleLexer
|
|
97
127
|
tokens
|
98
128
|
end
|
99
129
|
|
130
|
+
# Checks if the Lexer has finished Tokenizing the entire input stream.
|
131
|
+
# @return [Boolean] Whether Lexer has reached the end of input.
|
100
132
|
def finished?
|
101
133
|
return @pos >= @load.length
|
102
134
|
end
|
103
135
|
|
104
136
|
end
|
105
137
|
end
|
106
|
-
|
107
|
-
my_lexer = SimpleLexer::Lexer.new do
|
108
|
-
tok /-?\d+(\.\d+)?/, :number do |t| t.to_f end
|
109
|
-
tok /\+/, :plus
|
110
|
-
tok /-/, :minus
|
111
|
-
tok /\//, :div
|
112
|
-
tok /\*/, :mult
|
113
|
-
tok /\(/, :lparen
|
114
|
-
tok /\)/, :rparen
|
115
|
-
|
116
|
-
ign :whitespace
|
117
|
-
end
|
118
|
-
|
119
|
-
my_lexer.load = "321.32 + -432.388 - 33/4.3 - 4.228 * 5 - (32*632)"
|
120
|
-
p my_lexer.all_tokens
|
data/lib/simple_lexer/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_lexer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2013-11-16 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
16
|
-
requirement: &
|
16
|
+
requirement: &10265840 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '1.3'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *10265840
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rake
|
27
|
-
requirement: &
|
27
|
+
requirement: &10264520 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *10264520
|
36
36
|
description: A simple toy lexer for Ruby
|
37
37
|
email:
|
38
38
|
- wchen298@gmail.com
|
@@ -74,3 +74,4 @@ signing_key:
|
|
74
74
|
specification_version: 3
|
75
75
|
summary: Rudimentary lexer for Ruby
|
76
76
|
test_files: []
|
77
|
+
has_rdoc:
|