rlex 0.5.3 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/.yardopts +1 -1
- data/CHANGELOG.md +22 -0
- data/lib/rlex/lexer.rb +27 -6
- data/lib/rlex/token.rb +4 -2
- data/lib/rlex/version.rb +1 -1
- data/rlex.gemspec +2 -0
- data/spec/rlex/lexer_spec.rb +23 -13
- metadata +15 -4
data/.gitignore
CHANGED
data/.yardopts
CHANGED
@@ -1 +1 @@
|
|
1
|
-
--no-private lib - LICENSE
|
1
|
+
--no-private lib - LICENSE CHANGELOG.md
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# Changes
|
2
|
+
|
3
|
+
## Version 0.6.0
|
4
|
+
|
5
|
+
Produced tokens now include line number and column position of the matched text
|
6
|
+
|
7
|
+
## Version 0.5.4
|
8
|
+
|
9
|
+
Update gem spec to reflect development dependency on `rspec`
|
10
|
+
|
11
|
+
## Version 0.5.3
|
12
|
+
|
13
|
+
Add `require` statements internally so `require 'rlex'` is sufficient to use
|
14
|
+
the gem
|
15
|
+
|
16
|
+
## Version 0.5.2
|
17
|
+
|
18
|
+
YARD docs generated correctly
|
19
|
+
|
20
|
+
## Version 0.5.0
|
21
|
+
|
22
|
+
Initial version
|
data/lib/rlex/lexer.rb
CHANGED
@@ -100,10 +100,11 @@ module Rlex
|
|
100
100
|
#
|
101
101
|
def keyword(name = nil, kword)
|
102
102
|
# @todo Validate the keyword name
|
103
|
-
|
104
|
-
|
103
|
+
kword_str = kword.to_s
|
104
|
+
name = kword.to_sym if name == nil
|
105
|
+
pattern = Regexp.new(Regexp.escape kword_str)
|
105
106
|
rule name, pattern
|
106
|
-
@keywords[
|
107
|
+
@keywords[kword_str] = Token.new name.to_sym, kword_str
|
107
108
|
return name.to_sym
|
108
109
|
end
|
109
110
|
|
@@ -117,6 +118,8 @@ module Rlex
|
|
117
118
|
# @return [String] The specified input
|
118
119
|
#
|
119
120
|
def start(input)
|
121
|
+
@line = 1
|
122
|
+
@col = 0
|
120
123
|
@scanner = StringScanner.new input
|
121
124
|
return input
|
122
125
|
end
|
@@ -134,9 +137,11 @@ module Rlex
|
|
134
137
|
return next_token if ignore_prefix?
|
135
138
|
rule = greediest_rule
|
136
139
|
if rule
|
137
|
-
prefix =
|
140
|
+
prefix = fetch_prefix_and_update_pos(rule.pattern)
|
138
141
|
keyword = @keywords[prefix]
|
139
|
-
|
142
|
+
type = keyword ? keyword.type : rule.name
|
143
|
+
token = keyword ? keyword.value : prefix
|
144
|
+
return Token.new(type, token, @line, @col - token.size)
|
140
145
|
end
|
141
146
|
raise "unexpected input <#{@scanner.peek(5)}>"
|
142
147
|
end
|
@@ -149,7 +154,7 @@ module Rlex
|
|
149
154
|
# @private
|
150
155
|
def ignore_prefix?
|
151
156
|
@ignored.each do |pattern|
|
152
|
-
prefix =
|
157
|
+
prefix = fetch_prefix_and_update_pos(pattern)
|
153
158
|
return true if prefix
|
154
159
|
end
|
155
160
|
return false
|
@@ -168,5 +173,21 @@ module Rlex
|
|
168
173
|
end
|
169
174
|
return r
|
170
175
|
end
|
176
|
+
|
177
|
+
# @private
|
178
|
+
def fetch_prefix_and_update_pos(pattern)
|
179
|
+
prefix = @scanner.scan(pattern)
|
180
|
+
return nil if not prefix
|
181
|
+
parts = prefix.split("\n", -1) # arg -1 allows empty lines
|
182
|
+
if parts.count == 1
|
183
|
+
# Staying on the same line
|
184
|
+
@col += prefix.length
|
185
|
+
else
|
186
|
+
# On a new line
|
187
|
+
@line += parts.count - 1
|
188
|
+
@col = parts.last.length
|
189
|
+
end
|
190
|
+
return prefix
|
191
|
+
end
|
171
192
|
end
|
172
193
|
end
|
data/lib/rlex/token.rb
CHANGED
@@ -6,11 +6,13 @@ module Rlex
|
|
6
6
|
# @attr_reader [Symbol] type Type of the token, such as the name of
|
7
7
|
# the rule used to match it
|
8
8
|
# @attr_reader [String] value Text matched from the input
|
9
|
+
# @attr_reader [Integer] line Line number of the matched text
|
10
|
+
# @attr_reader [Integer] col Column position of the matched text
|
9
11
|
#
|
10
|
-
Token = Struct.new :type, :value
|
12
|
+
Token = Struct.new :type, :value, :line, :col
|
11
13
|
|
12
14
|
# Special token used when the lexer has reached the end of the
|
13
15
|
# specified input.
|
14
16
|
#
|
15
|
-
EOF_TOKEN = Token.new :eof, ""
|
17
|
+
EOF_TOKEN = Token.new :eof, "", -1, -1
|
16
18
|
end
|
data/lib/rlex/version.rb
CHANGED
data/rlex.gemspec
CHANGED
data/spec/rlex/lexer_spec.rb
CHANGED
@@ -32,10 +32,9 @@ describe Lexer do
|
|
32
32
|
@lexer.ignore /\s+/
|
33
33
|
@lexer.keyword :special
|
34
34
|
@lexer.start " \t\nspecialspecial special "
|
35
|
-
|
36
|
-
@lexer.next_token.should eq special
|
37
|
-
@lexer.next_token.should eq special
|
38
|
-
@lexer.next_token.should eq special
|
35
|
+
@lexer.next_token.should eq Token.new(:special, "special", 2, 0)
|
36
|
+
@lexer.next_token.should eq Token.new(:special, "special", 2, 7)
|
37
|
+
@lexer.next_token.should eq Token.new(:special, "special", 2, 17)
|
39
38
|
@lexer.next_token.should eq EOF_TOKEN
|
40
39
|
end
|
41
40
|
|
@@ -43,10 +42,10 @@ describe Lexer do
|
|
43
42
|
@lexer.ignore /\s+/
|
44
43
|
@lexer.rule :word, /\w+/
|
45
44
|
@lexer.start "sentence with four tokens"
|
46
|
-
@lexer.next_token.should eq Token.new
|
47
|
-
@lexer.next_token.should eq Token.new
|
48
|
-
@lexer.next_token.should eq Token.new
|
49
|
-
@lexer.next_token.should eq Token.new
|
45
|
+
@lexer.next_token.should eq Token.new(:word, "sentence", 1, 0)
|
46
|
+
@lexer.next_token.should eq Token.new(:word, "with", 1, 9)
|
47
|
+
@lexer.next_token.should eq Token.new(:word, "four", 1, 14)
|
48
|
+
@lexer.next_token.should eq Token.new(:word, "tokens", 1, 19)
|
50
49
|
@lexer.next_token.should eq EOF_TOKEN
|
51
50
|
end
|
52
51
|
|
@@ -57,11 +56,22 @@ describe Lexer do
|
|
57
56
|
@lexer.keyword :rparen, ")"
|
58
57
|
@lexer.rule :word, /\w+/
|
59
58
|
@lexer.start "ifu ( if ) ifu"
|
60
|
-
@lexer.next_token.should eq Token.new
|
61
|
-
@lexer.next_token.should eq Token.new
|
62
|
-
@lexer.next_token.should eq Token.new
|
63
|
-
@lexer.next_token.should eq Token.new
|
64
|
-
@lexer.next_token.should eq Token.new
|
59
|
+
@lexer.next_token.should eq Token.new(:word, "ifu", 1, 0)
|
60
|
+
@lexer.next_token.should eq Token.new(:lparen, "(", 1, 4)
|
61
|
+
@lexer.next_token.should eq Token.new(:if, "if", 1, 6)
|
62
|
+
@lexer.next_token.should eq Token.new(:rparen, ")", 1, 9)
|
63
|
+
@lexer.next_token.should eq Token.new(:word, "ifu", 1, 11)
|
64
|
+
@lexer.next_token.should eq EOF_TOKEN
|
65
|
+
end
|
66
|
+
|
67
|
+
it "should recognize keywords even if declared after rules which also match" do
|
68
|
+
@lexer.ignore /\s+/
|
69
|
+
@lexer.rule :word, /\w+/
|
70
|
+
@lexer.keyword :keyword
|
71
|
+
@lexer.start "word keyword keywordmore"
|
72
|
+
@lexer.next_token.should eq Token.new(:word, "word", 1, 0)
|
73
|
+
@lexer.next_token.should eq Token.new(:keyword, "keyword", 1, 5)
|
74
|
+
@lexer.next_token.should eq Token.new(:word, "keywordmore", 1, 13)
|
65
75
|
@lexer.next_token.should eq EOF_TOKEN
|
66
76
|
end
|
67
77
|
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: rlex
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.
|
5
|
+
version: 0.6.0
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Rasmus Borgsmidt
|
@@ -10,9 +10,19 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2012-
|
14
|
-
dependencies:
|
15
|
-
|
13
|
+
date: 2012-05-16 00:00:00 Z
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: rspec
|
17
|
+
prerelease: false
|
18
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
19
|
+
none: false
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
type: :development
|
25
|
+
version_requirements: *id001
|
16
26
|
description: Implements a simple lexer using a StringScanner
|
17
27
|
email:
|
18
28
|
- rasmus@borgsmidt.dk
|
@@ -26,6 +36,7 @@ files:
|
|
26
36
|
- .gitignore
|
27
37
|
- .rspec
|
28
38
|
- .yardopts
|
39
|
+
- CHANGELOG.md
|
29
40
|
- Gemfile
|
30
41
|
- LICENSE
|
31
42
|
- README.md
|