simple_lexer 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +2 -2
- data/lib/simple_lexer.rb +62 -45
- data/lib/simple_lexer/version.rb +1 -1
- metadata +6 -5
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# SimpleLexer
|
2
2
|
|
3
|
-
|
3
|
+
A very basic toy Lexer implemented with Regular Expressions.
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
@@ -18,7 +18,7 @@ Or install it yourself as:
|
|
18
18
|
|
19
19
|
## Usage
|
20
20
|
|
21
|
-
|
21
|
+
See docs for SimpleLexer::Lexer.
|
22
22
|
|
23
23
|
## Contributing
|
24
24
|
|
data/lib/simple_lexer.rb
CHANGED
@@ -2,18 +2,31 @@ require_relative "simple_lexer/version"
|
|
2
2
|
|
3
3
|
module SimpleLexer
|
4
4
|
|
5
|
+
|
6
|
+
# An Exception that is raised when Lexer encounters text for which
|
7
|
+
# there is no rule to match.
|
5
8
|
class NoMatchError < Exception
|
6
|
-
# unable to match
|
7
9
|
end
|
8
10
|
|
11
|
+
# Exception that is raised when Lexer is finished tokenizing the
|
12
|
+
# input string.
|
9
13
|
class EndOfStreamException < Exception
|
10
|
-
# when the Lexer is finished
|
11
14
|
end
|
12
15
|
|
16
|
+
# Object defined with certain rules that takes text as input and
|
17
|
+
# outputs Tokens based on the rules.
|
18
|
+
# @!attribute [r] rules
|
19
|
+
# @return [Array<Regexp>] A list of the rules for the Lexer.
|
20
|
+
# @!attribute [rw] pos
|
21
|
+
# @return [Fixnum] The current position of the input pointer.
|
13
22
|
class Lexer
|
14
23
|
|
15
|
-
attr_reader :rules
|
24
|
+
attr_reader :rules
|
25
|
+
attr_accessor :pos
|
16
26
|
|
27
|
+
# Creates a new instance of Lexer.
|
28
|
+
# @yield [] Some rules passed to instance_eval.
|
29
|
+
# @see #tok An example of a number Lexer using <code>tok</code>.
|
17
30
|
def initialize(&rules)
|
18
31
|
@rules = [] # list of {:rule => Regexp, :token => :token_id}
|
19
32
|
@ignore = [] # list of Regexp
|
@@ -21,24 +34,37 @@ module SimpleLexer
|
|
21
34
|
instance_eval &rules
|
22
35
|
end
|
23
36
|
|
37
|
+
# Defines a new Token rule for the Lexer to match.
|
38
|
+
# @param [Regexp] rule Regular expression that defines the token
|
39
|
+
# @param [Symbol] token Token class
|
40
|
+
# @yield [text] The expression will give the Token its value.
|
41
|
+
# @example Rule for numbers
|
42
|
+
# my_lexer = SimpleLexer::Lexer.new do
|
43
|
+
# tok /-?\d+(\.\d+)?/, :number do |text| text.to_f end
|
44
|
+
# end
|
45
|
+
# my_lexer.load = "-435.234"
|
46
|
+
# puts my_lexer.next_token[:value] # -435.234
|
24
47
|
def tok(rule, token, &action)
|
25
|
-
# defining a new rule:
|
26
|
-
#
|
27
|
-
# my_lexer = SimpleLexer::Lexer.new do
|
28
|
-
# tok /\w+/, :identifier
|
29
|
-
# end
|
30
|
-
|
31
48
|
@rules << {:rule => Regexp.new('\A' + rule.source), :token => token, :action => action}
|
32
49
|
end
|
33
50
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
51
|
+
# Defines rules of input classes to ignore (consume and not output any
|
52
|
+
# tokens.)
|
53
|
+
# @param [Regexp, Symbol] rule Regular expression that defines ignored
|
54
|
+
# characters.
|
55
|
+
# @note You can set _rule_ to <code>:whitespace</code> to ignore whitespace
|
56
|
+
# characters.
|
57
|
+
# @example Ignoring parentheses
|
58
|
+
# my_lexer = SimpleLexer::Lexer.new do
|
59
|
+
# tok /\w+/, :identifier
|
60
|
+
# ign /[\(\)]/
|
61
|
+
# end
|
62
|
+
# @example Ignoring whitespace
|
63
|
+
# my_lexer = SimpleLexer::Lexer.new do
|
64
|
+
# tok /\w+/, :identifier
|
65
|
+
# ign :whitespace
|
66
|
+
# end
|
67
|
+
def ign(rule)
|
42
68
|
if rule == :whitespace
|
43
69
|
rule = /\s+/
|
44
70
|
end
|
@@ -46,21 +72,26 @@ module SimpleLexer
|
|
46
72
|
@ignore << Regexp.new('\A' + rule.source)
|
47
73
|
end
|
48
74
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
@load = string
|
75
|
+
# Give the Lexer some text to tokenize.
|
76
|
+
# @param [String] input Text for the Lexer to tokenize.
|
77
|
+
def load=(input)
|
78
|
+
@load = input
|
54
79
|
@pos = 0
|
55
80
|
end
|
56
|
-
|
81
|
+
|
82
|
+
# What still remains to be processed.
|
83
|
+
# @return [String] Substring of the input starting from input pointer.
|
57
84
|
def load
|
58
|
-
# what the lexer currently sees
|
59
|
-
# my_lexer.load ...
|
60
|
-
|
61
85
|
@load[@pos..-1]
|
62
86
|
end
|
63
87
|
|
88
|
+
# Gets the next Token in the input and advances the input pointer.
|
89
|
+
# @return [Hash{Symbol=>Values}]
|
90
|
+
# - <code>:token</code> Token class
|
91
|
+
# - <code>:text</code> Matched text
|
92
|
+
# - <code>:value</code> Value as defined by passed block, if applicable.
|
93
|
+
# @raise [NoMatchError] If load contains a sequence for which the Lexer has
|
94
|
+
# no rule.
|
64
95
|
def next_token
|
65
96
|
# get the next token
|
66
97
|
# my_lexer.next_token -> [ :token => :token_id, :text => matched ]
|
@@ -85,10 +116,9 @@ module SimpleLexer
|
|
85
116
|
raise NoMatchError, "Unable to match, unexpected characters: '#{load[0..10]}...'"
|
86
117
|
end
|
87
118
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
119
|
+
# Tokenize the entire input stream.
|
120
|
+
# @return [Array<Hash>] An Array of Tokens processed by the Lexer
|
121
|
+
def all_tokens
|
92
122
|
tokens = []
|
93
123
|
loop do
|
94
124
|
tokens << next_token
|
@@ -97,24 +127,11 @@ module SimpleLexer
|
|
97
127
|
tokens
|
98
128
|
end
|
99
129
|
|
130
|
+
# Checks if the Lexer has finished Tokenizing the entire input stream.
|
131
|
+
# @return [Boolean] Whether Lexer has reached the end of input.
|
100
132
|
def finished?
|
101
133
|
return @pos >= @load.length
|
102
134
|
end
|
103
135
|
|
104
136
|
end
|
105
137
|
end
|
106
|
-
|
107
|
-
my_lexer = SimpleLexer::Lexer.new do
|
108
|
-
tok /-?\d+(\.\d+)?/, :number do |t| t.to_f end
|
109
|
-
tok /\+/, :plus
|
110
|
-
tok /-/, :minus
|
111
|
-
tok /\//, :div
|
112
|
-
tok /\*/, :mult
|
113
|
-
tok /\(/, :lparen
|
114
|
-
tok /\)/, :rparen
|
115
|
-
|
116
|
-
ign :whitespace
|
117
|
-
end
|
118
|
-
|
119
|
-
my_lexer.load = "321.32 + -432.388 - 33/4.3 - 4.228 * 5 - (32*632)"
|
120
|
-
p my_lexer.all_tokens
|
data/lib/simple_lexer/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_lexer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2013-11-16 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
16
|
-
requirement: &
|
16
|
+
requirement: &10265840 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '1.3'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *10265840
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rake
|
27
|
-
requirement: &
|
27
|
+
requirement: &10264520 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *10264520
|
36
36
|
description: A simple toy lexer for Ruby
|
37
37
|
email:
|
38
38
|
- wchen298@gmail.com
|
@@ -74,3 +74,4 @@ signing_key:
|
|
74
74
|
specification_version: 3
|
75
75
|
summary: Rudimentary lexer for Ruby
|
76
76
|
test_files: []
|
77
|
+
has_rdoc:
|