oedipus_lex 2.3.2 → 2.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/History.rdoc +47 -0
- data/README.rdoc +4 -1
- data/Rakefile +9 -2
- data/lib/oedipus_lex.rb +287 -32
- data/lib/oedipus_lex.rex +2 -0
- data/lib/oedipus_lex.rex.rb +104 -23
- data/test/test_oedipus_lex.rb +123 -7
- metadata +32 -42
- metadata.gz.sig +1 -3
- data/.gemtest +0 -0
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
|
-
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 9822a393543024bafcbb4c490751008d5ae468d3348d3397df454b622e5f8fb1
|
|
4
|
+
data.tar.gz: 5910e4bf0720dc2724bf01dde8629f02616958a1d8eab65005bdfd2d5da35690
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b1c26b081d86155d0bade889a37a80e98acd35dc5dfc8bec0009fe38f18064e7d7a0c61e11a6ba037dd024fedda76597bab9787b21fed1a87d80ceaf666fb22f
|
|
7
|
+
data.tar.gz: e9ad55f6f9d155af3a64af6edd81ffa41c33c70f9e55148a9a002b3dfa3ac7441c819c9bfa6c885903425549926f2a2f3e823791557facb49ded71f16a2bf1df
|
checksums.yaml.gz.sig
CHANGED
|
Binary file
|
data.tar.gz.sig
CHANGED
|
Binary file
|
data/History.rdoc
CHANGED
|
@@ -1,3 +1,50 @@
|
|
|
1
|
+
=== 2.5.2 / 2020-06-14
|
|
2
|
+
|
|
3
|
+
* 1 minor enhancement:
|
|
4
|
+
|
|
5
|
+
* Speedup of column position computation. It went from roughly 10s to 2s for a big file! (vdbijl)
|
|
6
|
+
|
|
7
|
+
=== 2.5.1 / 2019-06-03
|
|
8
|
+
|
|
9
|
+
* 1 minor enhancement:
|
|
10
|
+
|
|
11
|
+
* Added full rdoc an re-bootstrapped.
|
|
12
|
+
|
|
13
|
+
* 1 bug fix:
|
|
14
|
+
|
|
15
|
+
* Fixed a deprecation warning in ruby 2.6+.
|
|
16
|
+
|
|
17
|
+
=== 2.5.0 / 2016-11-30
|
|
18
|
+
|
|
19
|
+
* 5 minor enhancements:
|
|
20
|
+
|
|
21
|
+
* Added #location to generated template, provides file:line:column per options.
|
|
22
|
+
* Added LexerError and made ScanError subclass it.
|
|
23
|
+
* Added column option.
|
|
24
|
+
* Errors try to provide location now.
|
|
25
|
+
* Re-bootstrapped.
|
|
26
|
+
|
|
27
|
+
* 2 bug fixes:
|
|
28
|
+
|
|
29
|
+
* Fixed some whitespace generation when using :column.
|
|
30
|
+
* Fixed wiring on column. (steakknife)
|
|
31
|
+
|
|
32
|
+
=== 2.4.1 / 2016-01-21
|
|
33
|
+
|
|
34
|
+
* 1 minor enhancement:
|
|
35
|
+
|
|
36
|
+
* Use `skip` and `match?` instead of `scan` and `check`. Better on GC. (presidentbeef)
|
|
37
|
+
|
|
38
|
+
=== 2.4.0 / 2014-08-29
|
|
39
|
+
|
|
40
|
+
* 1 minor enhancement:
|
|
41
|
+
|
|
42
|
+
* Added column option & accessor.
|
|
43
|
+
|
|
44
|
+
* 1 bug fix:
|
|
45
|
+
|
|
46
|
+
* lineno shouldn't be visible at all if the option isn't on.
|
|
47
|
+
|
|
1
48
|
=== 2.3.2 / 2014-08-06
|
|
2
49
|
|
|
3
50
|
* 1 bug fix:
|
data/README.rdoc
CHANGED
|
@@ -19,7 +19,7 @@ At the very least, you need to add slashes to all your regexps.
|
|
|
19
19
|
|
|
20
20
|
Oedipus, like rexical, is based primarily on generating code much like
|
|
21
21
|
you would a hand-written lexer. It is _not_ a table or hash driven
|
|
22
|
-
lexer. It
|
|
22
|
+
lexer. It uses StrScanner within a multi-level case statement. As such,
|
|
23
23
|
Oedipus matches on the _first_ match, not the longest (like lex and
|
|
24
24
|
its ilk).
|
|
25
25
|
|
|
@@ -49,6 +49,7 @@ resource for CS learning. Books... books are good. I like books.
|
|
|
49
49
|
| /debug/i
|
|
50
50
|
| /do_parse/i
|
|
51
51
|
| /lineno/i
|
|
52
|
+
| /column/i
|
|
52
53
|
|
|
53
54
|
inner_section = /inner/ NL (misc_line)*
|
|
54
55
|
|
|
@@ -129,6 +130,8 @@ Specify `lineno` to generate automatic line number handling at the
|
|
|
129
130
|
beginning of `next_token`. This was the default in 1.0.0 and you must
|
|
130
131
|
now activate it.
|
|
131
132
|
|
|
133
|
+
Specify `column` to generate automatic column number handling.
|
|
134
|
+
|
|
132
135
|
==== Inner
|
|
133
136
|
|
|
134
137
|
The inner section is just code, like header or footer, but inner gets
|
data/Rakefile
CHANGED
|
@@ -3,8 +3,6 @@
|
|
|
3
3
|
require "rubygems"
|
|
4
4
|
require "hoe"
|
|
5
5
|
|
|
6
|
-
Hoe.plugin :debugging
|
|
7
|
-
Hoe.plugin :git
|
|
8
6
|
Hoe.plugin :isolate
|
|
9
7
|
Hoe.plugin :seattlerb
|
|
10
8
|
|
|
@@ -16,6 +14,10 @@ Hoe.spec "oedipus_lex" do
|
|
|
16
14
|
self.history_file = "History.rdoc"
|
|
17
15
|
end
|
|
18
16
|
|
|
17
|
+
Hoe.bad_plugins.each do |bad|
|
|
18
|
+
warn "BAD: Hoe.plugin :#{bad}"
|
|
19
|
+
end
|
|
20
|
+
|
|
19
21
|
task :bootstrap do
|
|
20
22
|
ruby "-Ilib lib/oedipus_lex.rb lib/oedipus_lex.rex > lib/oedipus_lex.rex.rb.new"
|
|
21
23
|
system "diff -uw lib/oedipus_lex.rex.rb lib/oedipus_lex.rex.rb.new"
|
|
@@ -78,4 +80,9 @@ task :debug do
|
|
|
78
80
|
puts rex.generate
|
|
79
81
|
end
|
|
80
82
|
|
|
83
|
+
task :wtf => :isolate do
|
|
84
|
+
puts `~/.rbenv/versions/2.2.0/bin/ruby -S gem env`
|
|
85
|
+
puts `~/.rbenv/versions/2.2.0/bin/ruby -S gem list`
|
|
86
|
+
end
|
|
87
|
+
|
|
81
88
|
# vim: syntax=ruby
|
data/lib/oedipus_lex.rb
CHANGED
|
@@ -3,71 +3,160 @@ require 'strscan'
|
|
|
3
3
|
require "erb"
|
|
4
4
|
require "oedipus_lex.rex"
|
|
5
5
|
|
|
6
|
+
##
|
|
7
|
+
# Oedipus Lex is a lexer generator in the same family as Rexical and
|
|
8
|
+
# Rex. Oedipus Lex is my independent lexer fork of Rexical. Rexical
|
|
9
|
+
# was in turn a fork of Rex. We've been unable to contact the author
|
|
10
|
+
# of rex in order to take it over, fix it up, extend it, and relicense
|
|
11
|
+
# it to MIT. So, Oedipus was written clean-room in order to bypass
|
|
12
|
+
# licensing constraints (and because bootstrapping is fun).
|
|
13
|
+
#
|
|
14
|
+
# Oedipus brings a lot of extras to the table and at this point is
|
|
15
|
+
# only historically related to rexical. The syntax has changed enough
|
|
16
|
+
# that any rexical lexer will have to be tweaked to work inside of
|
|
17
|
+
# oedipus. At the very least, you need to add slashes to all your
|
|
18
|
+
# regexps.
|
|
19
|
+
#
|
|
20
|
+
# Oedipus, like rexical, is based primarily on generating code much
|
|
21
|
+
# like you would a hand-written lexer. It is _not_ a table or hash
|
|
22
|
+
# driven lexer. It uses StrScanner within a multi-level case
|
|
23
|
+
# statement. As such, Oedipus matches on the _first_ match, not the
|
|
24
|
+
# longest (like lex and its ilk).
|
|
25
|
+
#
|
|
26
|
+
# This documentation is not meant to bypass any prerequisite knowledge
|
|
27
|
+
# on lexing or parsing. If you'd like to study the subject in further
|
|
28
|
+
# detail, please try [TIN321] or the [LLVM Tutorial] or some other
|
|
29
|
+
# good resource for CS learning. Books... books are good. I like
|
|
30
|
+
# books.
|
|
31
|
+
|
|
6
32
|
class OedipusLex
|
|
7
|
-
VERSION = "2.
|
|
33
|
+
VERSION = "2.5.2" # :nodoc:
|
|
34
|
+
|
|
35
|
+
##
|
|
36
|
+
# The class name to generate.
|
|
8
37
|
|
|
9
38
|
attr_accessor :class_name
|
|
39
|
+
|
|
40
|
+
##
|
|
41
|
+
# An array of header lines to have before the lexer class.
|
|
42
|
+
|
|
10
43
|
attr_accessor :header
|
|
44
|
+
|
|
45
|
+
##
|
|
46
|
+
# An array of lines to have after the lexer class.
|
|
47
|
+
|
|
11
48
|
attr_accessor :ends
|
|
49
|
+
|
|
50
|
+
##
|
|
51
|
+
# An array of lines to have inside (but at the bottom of) the lexer
|
|
52
|
+
# class.
|
|
53
|
+
|
|
12
54
|
attr_accessor :inners
|
|
55
|
+
|
|
56
|
+
##
|
|
57
|
+
# An array of name/regexp pairs to generate constants inside the
|
|
58
|
+
# lexer class.
|
|
59
|
+
|
|
13
60
|
attr_accessor :macros
|
|
61
|
+
|
|
62
|
+
##
|
|
63
|
+
# A hash of options for the code generator. See README.rdoc for
|
|
64
|
+
# supported options.
|
|
65
|
+
|
|
14
66
|
attr_accessor :option
|
|
67
|
+
|
|
68
|
+
##
|
|
69
|
+
# The rules for the lexer.
|
|
70
|
+
|
|
15
71
|
attr_accessor :rules
|
|
72
|
+
|
|
73
|
+
##
|
|
74
|
+
# An array of lines of code to generate into the top of the lexer
|
|
75
|
+
# (next_token) loop.
|
|
76
|
+
|
|
16
77
|
attr_accessor :starts
|
|
78
|
+
|
|
79
|
+
##
|
|
80
|
+
# An array of all the groups within the lexer rules.
|
|
81
|
+
|
|
17
82
|
attr_accessor :group
|
|
18
83
|
|
|
19
|
-
DEFAULTS = {
|
|
84
|
+
DEFAULTS = { # :nodoc:
|
|
20
85
|
:debug => false,
|
|
21
86
|
:do_parse => false,
|
|
22
87
|
:lineno => false,
|
|
88
|
+
:column => false,
|
|
23
89
|
:stub => false,
|
|
24
90
|
}
|
|
25
91
|
|
|
92
|
+
##
|
|
93
|
+
# A Rule represents the main component of Oedipus Lex. These are the
|
|
94
|
+
# things that "get stuff done" at the lexical level. They consist of:
|
|
95
|
+
#
|
|
96
|
+
# + an optional required start state symbol or predicate method name
|
|
97
|
+
# + a regexp to match on
|
|
98
|
+
# + an optional action method or block
|
|
99
|
+
|
|
26
100
|
class Rule < Struct.new :start_state, :regexp, :action
|
|
101
|
+
##
|
|
102
|
+
# What group this rule is in, if any.
|
|
103
|
+
|
|
27
104
|
attr_accessor :group
|
|
28
|
-
|
|
105
|
+
|
|
106
|
+
alias :group? :group # :nodoc:
|
|
107
|
+
|
|
108
|
+
##
|
|
109
|
+
# A simple constructor
|
|
29
110
|
|
|
30
111
|
def self.[] start, regexp, action
|
|
31
112
|
new start, regexp.inspect, action
|
|
32
113
|
end
|
|
33
114
|
|
|
34
|
-
def initialize start_state, regexp, action
|
|
115
|
+
def initialize start_state, regexp, action # :nodoc:
|
|
35
116
|
super
|
|
36
117
|
self.group = nil
|
|
37
118
|
end
|
|
38
119
|
|
|
39
120
|
undef_method :to_a
|
|
40
121
|
|
|
122
|
+
##
|
|
123
|
+
# Generate equivalent ruby code for the rule.
|
|
124
|
+
|
|
41
125
|
def to_ruby state, predicates, exclusive
|
|
42
126
|
return unless group? or
|
|
43
127
|
start_state == state or
|
|
44
128
|
(state.nil? and predicates.include? start_state)
|
|
45
129
|
|
|
46
|
-
|
|
47
|
-
if exclusive or not start_state then
|
|
48
|
-
"when text = ss.scan(#{regexp}) then"
|
|
49
|
-
elsif start_state =~ /^:/ then
|
|
50
|
-
"when (state == #{start_state}) && (text = ss.scan(#{regexp})) then"
|
|
51
|
-
else
|
|
52
|
-
"when #{start_state} && (text = ss.scan(#{regexp})) then"
|
|
53
|
-
end
|
|
130
|
+
uses_text = false
|
|
54
131
|
|
|
55
132
|
body =
|
|
56
133
|
case action
|
|
57
134
|
when nil, false then
|
|
58
135
|
" # do nothing"
|
|
59
136
|
when /^\{/ then
|
|
137
|
+
uses_text = action =~ /\btext\b/
|
|
60
138
|
" action #{action}"
|
|
61
139
|
when /^:/, "nil" then
|
|
62
140
|
" [:state, #{action}]"
|
|
63
|
-
else
|
|
141
|
+
else # plain method name
|
|
142
|
+
uses_text = true
|
|
64
143
|
" #{action} text"
|
|
65
144
|
end
|
|
66
145
|
|
|
67
|
-
|
|
146
|
+
check = uses_text ? "text = ss.scan(#{regexp})" : "ss.skip(#{regexp})"
|
|
147
|
+
|
|
148
|
+
cond = if exclusive or not start_state then
|
|
149
|
+
check
|
|
150
|
+
elsif /^:/.match?(start_state) then
|
|
151
|
+
"(state == #{start_state}) && (#{check})"
|
|
152
|
+
else # predicate method
|
|
153
|
+
"#{start_state} && (#{check})"
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
["when #{cond} then", body]
|
|
68
157
|
end
|
|
69
158
|
|
|
70
|
-
def pretty_print pp
|
|
159
|
+
def pretty_print pp # :nodoc:
|
|
71
160
|
pp.text "Rule"
|
|
72
161
|
pp.group 2, "[", "]" do
|
|
73
162
|
pp.pp start_state
|
|
@@ -79,27 +168,39 @@ class OedipusLex
|
|
|
79
168
|
end
|
|
80
169
|
end
|
|
81
170
|
|
|
171
|
+
##
|
|
172
|
+
# A group allows you to group up multiple rules under a single
|
|
173
|
+
# regular prefix expression, allowing optimized code to be generated
|
|
174
|
+
# that skips over all actions if the prefix isn't matched.
|
|
175
|
+
|
|
82
176
|
class Group < Struct.new :regex, :rules
|
|
83
177
|
alias :start_state :regex
|
|
84
178
|
|
|
179
|
+
##
|
|
180
|
+
# A convenience method to create a new group with a +start+ and
|
|
181
|
+
# given +subrules+.
|
|
182
|
+
|
|
85
183
|
def self.[] start, *subrules
|
|
86
184
|
r = new start.inspect
|
|
87
185
|
r.rules.concat subrules
|
|
88
186
|
r
|
|
89
187
|
end
|
|
90
188
|
|
|
91
|
-
def initialize start
|
|
189
|
+
def initialize start # :nodoc:
|
|
92
190
|
super(start, [])
|
|
93
191
|
end
|
|
94
192
|
|
|
193
|
+
##
|
|
194
|
+
# Add a rule to this group.
|
|
195
|
+
|
|
95
196
|
def << rule
|
|
96
197
|
rules << rule
|
|
97
198
|
nil
|
|
98
199
|
end
|
|
99
200
|
|
|
100
|
-
def to_ruby state, predicates, exclusive
|
|
201
|
+
def to_ruby state, predicates, exclusive # :nodoc:
|
|
101
202
|
[
|
|
102
|
-
"when ss.
|
|
203
|
+
"when ss.match?(#{regex}) then",
|
|
103
204
|
" case",
|
|
104
205
|
rules.map { |subrule|
|
|
105
206
|
s = subrule.to_ruby(state, predicates, exclusive)
|
|
@@ -109,7 +210,7 @@ class OedipusLex
|
|
|
109
210
|
]
|
|
110
211
|
end
|
|
111
212
|
|
|
112
|
-
def pretty_print pp
|
|
213
|
+
def pretty_print pp # :nodoc:
|
|
113
214
|
pp.text "Group"
|
|
114
215
|
pp.group 2, "[", "]" do
|
|
115
216
|
pp.seplist([regex] + rules, lambda { pp.comma_breakable }, :each) { |v|
|
|
@@ -119,6 +220,10 @@ class OedipusLex
|
|
|
119
220
|
end
|
|
120
221
|
end
|
|
121
222
|
|
|
223
|
+
##
|
|
224
|
+
# A convenience method to create a new lexer with a +name+ and given
|
|
225
|
+
# +rules+.
|
|
226
|
+
|
|
122
227
|
def self.[](name, *rules)
|
|
123
228
|
r = new
|
|
124
229
|
r.class_name = name
|
|
@@ -126,7 +231,7 @@ class OedipusLex
|
|
|
126
231
|
r
|
|
127
232
|
end
|
|
128
233
|
|
|
129
|
-
def initialize opts = {}
|
|
234
|
+
def initialize opts = {} # :nodoc:
|
|
130
235
|
self.option = DEFAULTS.merge opts
|
|
131
236
|
self.class_name = nil
|
|
132
237
|
|
|
@@ -139,7 +244,7 @@ class OedipusLex
|
|
|
139
244
|
self.group = nil
|
|
140
245
|
end
|
|
141
246
|
|
|
142
|
-
def == o
|
|
247
|
+
def == o # :nodoc:
|
|
143
248
|
(o.class == self.class and
|
|
144
249
|
o.class_name == self.class_name and
|
|
145
250
|
o.header == self.header and
|
|
@@ -150,7 +255,7 @@ class OedipusLex
|
|
|
150
255
|
o.starts == self.starts)
|
|
151
256
|
end
|
|
152
257
|
|
|
153
|
-
def pretty_print pp
|
|
258
|
+
def pretty_print pp # :nodoc:
|
|
154
259
|
commas = lambda { pp.comma_breakable }
|
|
155
260
|
|
|
156
261
|
pp.text "Lexer"
|
|
@@ -159,67 +264,109 @@ class OedipusLex
|
|
|
159
264
|
end
|
|
160
265
|
end
|
|
161
266
|
|
|
267
|
+
##
|
|
268
|
+
# Process a +class+ lexeme.
|
|
269
|
+
|
|
162
270
|
def lex_class prefix, name
|
|
163
271
|
header.concat prefix.split(/\n/)
|
|
164
272
|
self.class_name = name
|
|
165
273
|
end
|
|
166
274
|
|
|
275
|
+
##
|
|
276
|
+
# Process a +comment+ lexeme.
|
|
277
|
+
|
|
167
278
|
def lex_comment line
|
|
168
279
|
# do nothing
|
|
169
280
|
end
|
|
170
281
|
|
|
282
|
+
##
|
|
283
|
+
# Process an +end+ lexeme.
|
|
284
|
+
|
|
171
285
|
def lex_end line
|
|
172
286
|
ends << line
|
|
173
287
|
end
|
|
174
288
|
|
|
289
|
+
##
|
|
290
|
+
# Process an +inner+ lexeme.
|
|
291
|
+
|
|
175
292
|
def lex_inner line
|
|
176
293
|
inners << line
|
|
177
294
|
end
|
|
178
295
|
|
|
296
|
+
##
|
|
297
|
+
# Process a +start+ lexeme.
|
|
298
|
+
|
|
179
299
|
def lex_start line
|
|
180
300
|
starts << line.strip
|
|
181
301
|
end
|
|
182
302
|
|
|
303
|
+
##
|
|
304
|
+
# Process a +macro+ lexeme.
|
|
305
|
+
|
|
183
306
|
def lex_macro name, value
|
|
184
307
|
macros << [name, value]
|
|
185
308
|
end
|
|
186
309
|
|
|
310
|
+
##
|
|
311
|
+
# Process an +option+ lexeme.
|
|
312
|
+
|
|
187
313
|
def lex_option option
|
|
188
314
|
self.option[option.to_sym] = true
|
|
189
315
|
end
|
|
190
316
|
|
|
317
|
+
##
|
|
318
|
+
# Process a +X+ lexeme.
|
|
319
|
+
|
|
191
320
|
def lex_rule start_state, regexp, action = nil
|
|
192
321
|
rules << Rule.new(start_state, regexp, action)
|
|
193
322
|
end
|
|
194
323
|
|
|
324
|
+
##
|
|
325
|
+
# Process a +group head+ lexeme.
|
|
326
|
+
|
|
195
327
|
def lex_grouphead re
|
|
196
328
|
end_group if group
|
|
197
329
|
self.state = :group
|
|
198
330
|
self.group = Group.new re
|
|
199
331
|
end
|
|
200
332
|
|
|
333
|
+
##
|
|
334
|
+
# Process a +group+ lexeme.
|
|
335
|
+
|
|
201
336
|
def lex_group start_state, regexp, action = nil
|
|
202
337
|
rule = Rule.new(start_state, regexp, action)
|
|
203
338
|
rule.group = group
|
|
204
339
|
self.group << rule
|
|
205
340
|
end
|
|
206
341
|
|
|
342
|
+
##
|
|
343
|
+
# End a group.
|
|
344
|
+
|
|
207
345
|
def end_group
|
|
208
346
|
rules << group
|
|
209
347
|
self.group = nil
|
|
210
348
|
self.state = :rule
|
|
211
349
|
end
|
|
212
350
|
|
|
351
|
+
##
|
|
352
|
+
# Process the end of a +group+ lexeme.
|
|
353
|
+
|
|
213
354
|
def lex_groupend start_state, regexp, action = nil
|
|
214
355
|
end_group
|
|
215
356
|
lex_rule start_state, regexp, action
|
|
216
357
|
end
|
|
217
358
|
|
|
218
|
-
|
|
359
|
+
##
|
|
360
|
+
# Process a +state+ lexeme.
|
|
361
|
+
|
|
362
|
+
def lex_state _new_state
|
|
219
363
|
end_group if group
|
|
220
364
|
# do nothing -- lexer switches state for us
|
|
221
365
|
end
|
|
222
366
|
|
|
367
|
+
##
|
|
368
|
+
# Generate the lexer.
|
|
369
|
+
|
|
223
370
|
def generate
|
|
224
371
|
filter = lambda { |r| Rule === r && r.start_state || nil }
|
|
225
372
|
_mystates = rules.map(&filter).flatten.compact.uniq
|
|
@@ -232,12 +379,20 @@ class OedipusLex
|
|
|
232
379
|
all_states = [[nil, *inclusives], # nil+incls # eg [[nil, :a],
|
|
233
380
|
*exclusives.map { |s| [s] }] # [excls] # [:A], [:B]]
|
|
234
381
|
|
|
235
|
-
encoding = header.shift if header.first
|
|
382
|
+
encoding = header.shift if /encoding:/.match?(header.first)
|
|
236
383
|
encoding ||= "# encoding: UTF-8"
|
|
237
384
|
|
|
238
|
-
|
|
385
|
+
erb = if RUBY_VERSION >= "2.6.0" then
|
|
386
|
+
ERB.new(TEMPLATE, trim_mode:"%")
|
|
387
|
+
else
|
|
388
|
+
ERB.new(TEMPLATE, nil, "%")
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
erb.result binding
|
|
239
392
|
end
|
|
240
393
|
|
|
394
|
+
# :stopdoc:
|
|
395
|
+
|
|
241
396
|
TEMPLATE = <<-'REX'.gsub(/^ {6}/, '')
|
|
242
397
|
<%= encoding %>
|
|
243
398
|
#--
|
|
@@ -254,36 +409,89 @@ class OedipusLex
|
|
|
254
409
|
% end
|
|
255
410
|
|
|
256
411
|
% end
|
|
412
|
+
|
|
413
|
+
##
|
|
414
|
+
# The generated lexer <%= class_name %>
|
|
415
|
+
|
|
257
416
|
class <%= class_name %>
|
|
258
417
|
require 'strscan'
|
|
259
418
|
|
|
260
419
|
% unless macros.empty? then
|
|
420
|
+
# :stopdoc:
|
|
261
421
|
% max = macros.map { |(k,_)| k.size }.max
|
|
262
422
|
% macros.each do |(k,v)|
|
|
263
423
|
<%= "%-#{max}s = %s" % [k, v] %>
|
|
264
424
|
% end
|
|
265
|
-
|
|
425
|
+
# :startdoc:
|
|
266
426
|
% end
|
|
267
|
-
|
|
427
|
+
# :stopdoc:
|
|
428
|
+
class LexerError < StandardError ; end
|
|
429
|
+
class ScanError < LexerError ; end
|
|
430
|
+
# :startdoc:
|
|
431
|
+
|
|
432
|
+
% if option[:lineno] then
|
|
433
|
+
##
|
|
434
|
+
# The current line number.
|
|
268
435
|
|
|
269
436
|
attr_accessor :lineno
|
|
437
|
+
% end
|
|
438
|
+
##
|
|
439
|
+
# The file name / path
|
|
440
|
+
|
|
270
441
|
attr_accessor :filename
|
|
442
|
+
|
|
443
|
+
##
|
|
444
|
+
# The StringScanner for this lexer.
|
|
445
|
+
|
|
271
446
|
attr_accessor :ss
|
|
447
|
+
|
|
448
|
+
##
|
|
449
|
+
# The current lexical state.
|
|
450
|
+
|
|
272
451
|
attr_accessor :state
|
|
273
452
|
|
|
274
453
|
alias :match :ss
|
|
275
454
|
|
|
455
|
+
##
|
|
456
|
+
# The match groups for the current scan.
|
|
457
|
+
|
|
276
458
|
def matches
|
|
277
459
|
m = (1..9).map { |i| ss[i] }
|
|
278
460
|
m.pop until m[-1] or m.empty?
|
|
279
461
|
m
|
|
280
462
|
end
|
|
281
463
|
|
|
464
|
+
##
|
|
465
|
+
# Yields on the current action.
|
|
466
|
+
|
|
282
467
|
def action
|
|
283
468
|
yield
|
|
284
469
|
end
|
|
285
470
|
|
|
471
|
+
% if option[:column] then
|
|
472
|
+
##
|
|
473
|
+
# The previous position. Only available if the :column option is on.
|
|
474
|
+
|
|
475
|
+
attr_accessor :old_pos
|
|
476
|
+
|
|
477
|
+
##
|
|
478
|
+
# The position of the start of the current line. Only available if the
|
|
479
|
+
# :column option is on.
|
|
480
|
+
|
|
481
|
+
attr_accessor :start_of_current_line_pos
|
|
482
|
+
|
|
483
|
+
##
|
|
484
|
+
# The current column, starting at 0. Only available if the
|
|
485
|
+
# :column option is on.
|
|
486
|
+
def column
|
|
487
|
+
old_pos - start_of_current_line_pos
|
|
488
|
+
end
|
|
489
|
+
|
|
490
|
+
% end
|
|
286
491
|
% if option[:do_parse] then
|
|
492
|
+
##
|
|
493
|
+
# Parse the file by getting all tokens and calling lex_+type+ on them.
|
|
494
|
+
|
|
287
495
|
def do_parse
|
|
288
496
|
while token = next_token do
|
|
289
497
|
type, *vals = token
|
|
@@ -293,18 +501,33 @@ class OedipusLex
|
|
|
293
501
|
end
|
|
294
502
|
|
|
295
503
|
% end
|
|
504
|
+
|
|
505
|
+
##
|
|
506
|
+
# The current scanner class. Must be overridden in subclasses.
|
|
507
|
+
|
|
296
508
|
def scanner_class
|
|
297
509
|
StringScanner
|
|
298
510
|
end unless instance_methods(false).map(&:to_s).include?("scanner_class")
|
|
299
511
|
|
|
512
|
+
##
|
|
513
|
+
# Parse the given string.
|
|
514
|
+
|
|
300
515
|
def parse str
|
|
301
516
|
self.ss = scanner_class.new str
|
|
517
|
+
% if option[:lineno] then
|
|
302
518
|
self.lineno = 1
|
|
519
|
+
% end
|
|
520
|
+
% if option[:column] then
|
|
521
|
+
self.start_of_current_line_pos = 0
|
|
522
|
+
% end
|
|
303
523
|
self.state ||= nil
|
|
304
524
|
|
|
305
525
|
do_parse
|
|
306
526
|
end
|
|
307
527
|
|
|
528
|
+
##
|
|
529
|
+
# Read in and parse the file at +path+.
|
|
530
|
+
|
|
308
531
|
def parse_file path
|
|
309
532
|
self.filename = path
|
|
310
533
|
open path do |f|
|
|
@@ -312,6 +535,26 @@ class OedipusLex
|
|
|
312
535
|
end
|
|
313
536
|
end
|
|
314
537
|
|
|
538
|
+
##
|
|
539
|
+
# The current location in the parse.
|
|
540
|
+
|
|
541
|
+
def location
|
|
542
|
+
[
|
|
543
|
+
(filename || "<input>"),
|
|
544
|
+
% if option[:lineno] then
|
|
545
|
+
lineno,
|
|
546
|
+
% elsif option[:column] then
|
|
547
|
+
"?",
|
|
548
|
+
% end
|
|
549
|
+
% if option[:column] then
|
|
550
|
+
column,
|
|
551
|
+
% end
|
|
552
|
+
].compact.join(":")
|
|
553
|
+
end
|
|
554
|
+
|
|
555
|
+
##
|
|
556
|
+
# Lex the next token.
|
|
557
|
+
|
|
315
558
|
def next_token
|
|
316
559
|
% starts.each do |s|
|
|
317
560
|
<%= s %>
|
|
@@ -321,7 +564,16 @@ class OedipusLex
|
|
|
321
564
|
|
|
322
565
|
until ss.eos? or token do
|
|
323
566
|
% if option[:lineno] then
|
|
324
|
-
|
|
567
|
+
if ss.peek(1) == "\n"
|
|
568
|
+
self.lineno += 1
|
|
569
|
+
% if option[:column] then
|
|
570
|
+
# line starts 1 position after the newline
|
|
571
|
+
self.start_of_current_line_pos = ss.pos + 1
|
|
572
|
+
% end
|
|
573
|
+
end
|
|
574
|
+
% end
|
|
575
|
+
% if option[:column] then
|
|
576
|
+
self.old_pos = ss.pos
|
|
325
577
|
% end
|
|
326
578
|
token =
|
|
327
579
|
case state
|
|
@@ -336,17 +588,17 @@ class OedipusLex
|
|
|
336
588
|
% end # the_states.each
|
|
337
589
|
else
|
|
338
590
|
text = ss.string[ss.pos .. -1]
|
|
339
|
-
raise ScanError, "can not match (#{state.inspect}): '#{text}'"
|
|
591
|
+
raise ScanError, "can not match (#{state.inspect}) at #{location}: '#{text}'"
|
|
340
592
|
end
|
|
341
593
|
% end # all_states
|
|
342
594
|
else
|
|
343
|
-
raise ScanError, "undefined state: '#{state}'"
|
|
595
|
+
raise ScanError, "undefined state at #{location}: '#{state}'"
|
|
344
596
|
end # token = case state
|
|
345
597
|
|
|
346
598
|
next unless token # allow functions to trigger redo w/ nil
|
|
347
599
|
end # while
|
|
348
600
|
|
|
349
|
-
raise "bad lexical result: #{token.inspect}" unless
|
|
601
|
+
raise LexerError, "bad lexical result at #{location}: #{token.inspect}" unless
|
|
350
602
|
token.nil? || (Array === token && token.size >= 2)
|
|
351
603
|
|
|
352
604
|
# auto-switch state
|
|
@@ -382,13 +634,16 @@ class OedipusLex
|
|
|
382
634
|
begin
|
|
383
635
|
rex.parse_file path
|
|
384
636
|
rescue
|
|
385
|
-
|
|
637
|
+
lineno = rex.respond_to?(:lineno) ? rex.lineno : -1
|
|
638
|
+
$stderr.printf "%s:%d:%s\n", rex.filename, lineno, $!.message
|
|
386
639
|
exit 1
|
|
387
640
|
end
|
|
388
641
|
end
|
|
389
642
|
end
|
|
390
643
|
% end
|
|
391
644
|
REX
|
|
645
|
+
|
|
646
|
+
# :startdoc:
|
|
392
647
|
end
|
|
393
648
|
|
|
394
649
|
if $0 == __FILE__ then
|
data/lib/oedipus_lex.rex
CHANGED
|
@@ -4,6 +4,7 @@ option
|
|
|
4
4
|
|
|
5
5
|
do_parse
|
|
6
6
|
lineno
|
|
7
|
+
column
|
|
7
8
|
|
|
8
9
|
macro
|
|
9
10
|
ST /(?:(:\S+|\w+\??))/
|
|
@@ -31,6 +32,7 @@ rule
|
|
|
31
32
|
:option /debug/i { [:option, text] }
|
|
32
33
|
:option /do_parse/i { [:option, text] }
|
|
33
34
|
:option /lineno/i { [:option, text] }
|
|
35
|
+
:option /column/i { [:option, text] }
|
|
34
36
|
|
|
35
37
|
:inner /.*/ { [:inner, text] }
|
|
36
38
|
|
data/lib/oedipus_lex.rex.rb
CHANGED
|
@@ -1,36 +1,85 @@
|
|
|
1
1
|
# encoding: UTF-8
|
|
2
2
|
#--
|
|
3
3
|
# This file is automatically generated. Do not modify it.
|
|
4
|
-
# Generated by: oedipus_lex version 2.
|
|
4
|
+
# Generated by: oedipus_lex version 2.5.2.
|
|
5
5
|
# Source: lib/oedipus_lex.rex
|
|
6
6
|
#++
|
|
7
7
|
|
|
8
|
+
|
|
9
|
+
##
|
|
10
|
+
# The generated lexer OedipusLex
|
|
11
|
+
|
|
8
12
|
class OedipusLex
|
|
9
13
|
require 'strscan'
|
|
10
14
|
|
|
15
|
+
# :stopdoc:
|
|
11
16
|
ST = /(?:(:\S+|\w+\??))/
|
|
12
17
|
RE = /(\/(?:\\.|[^\/])+\/[ion]?)/
|
|
13
18
|
ACT = /(\{.*|:?\w+)/
|
|
19
|
+
# :startdoc:
|
|
20
|
+
# :stopdoc:
|
|
21
|
+
class LexerError < StandardError ; end
|
|
22
|
+
class ScanError < LexerError ; end
|
|
23
|
+
# :startdoc:
|
|
14
24
|
|
|
15
|
-
|
|
25
|
+
##
|
|
26
|
+
# The current line number.
|
|
16
27
|
|
|
17
28
|
attr_accessor :lineno
|
|
29
|
+
##
|
|
30
|
+
# The file name / path
|
|
31
|
+
|
|
18
32
|
attr_accessor :filename
|
|
33
|
+
|
|
34
|
+
##
|
|
35
|
+
# The StringScanner for this lexer.
|
|
36
|
+
|
|
19
37
|
attr_accessor :ss
|
|
38
|
+
|
|
39
|
+
##
|
|
40
|
+
# The current lexical state.
|
|
41
|
+
|
|
20
42
|
attr_accessor :state
|
|
21
43
|
|
|
22
44
|
alias :match :ss
|
|
23
45
|
|
|
46
|
+
##
|
|
47
|
+
# The match groups for the current scan.
|
|
48
|
+
|
|
24
49
|
def matches
|
|
25
50
|
m = (1..9).map { |i| ss[i] }
|
|
26
51
|
m.pop until m[-1] or m.empty?
|
|
27
52
|
m
|
|
28
53
|
end
|
|
29
54
|
|
|
55
|
+
##
|
|
56
|
+
# Yields on the current action.
|
|
57
|
+
|
|
30
58
|
def action
|
|
31
59
|
yield
|
|
32
60
|
end
|
|
33
61
|
|
|
62
|
+
##
|
|
63
|
+
# The previous position. Only available if the :column option is on.
|
|
64
|
+
|
|
65
|
+
attr_accessor :old_pos
|
|
66
|
+
|
|
67
|
+
##
|
|
68
|
+
# The position of the start of the current line. Only available if the
|
|
69
|
+
# :column option is on.
|
|
70
|
+
|
|
71
|
+
attr_accessor :start_of_current_line_pos
|
|
72
|
+
|
|
73
|
+
##
|
|
74
|
+
# The current column, starting at 0. Only available if the
|
|
75
|
+
# :column option is on.
|
|
76
|
+
def column
|
|
77
|
+
old_pos - start_of_current_line_pos
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
##
|
|
81
|
+
# Parse the file by getting all tokens and calling lex_+type+ on them.
|
|
82
|
+
|
|
34
83
|
def do_parse
|
|
35
84
|
while token = next_token do
|
|
36
85
|
type, *vals = token
|
|
@@ -39,18 +88,29 @@ class OedipusLex
|
|
|
39
88
|
end
|
|
40
89
|
end
|
|
41
90
|
|
|
91
|
+
|
|
92
|
+
##
|
|
93
|
+
# The current scanner class. Must be overridden in subclasses.
|
|
94
|
+
|
|
42
95
|
def scanner_class
|
|
43
96
|
StringScanner
|
|
44
97
|
end unless instance_methods(false).map(&:to_s).include?("scanner_class")
|
|
45
98
|
|
|
99
|
+
##
|
|
100
|
+
# Parse the given string.
|
|
101
|
+
|
|
46
102
|
def parse str
|
|
47
103
|
self.ss = scanner_class.new str
|
|
48
104
|
self.lineno = 1
|
|
105
|
+
self.start_of_current_line_pos = 0
|
|
49
106
|
self.state ||= nil
|
|
50
107
|
|
|
51
108
|
do_parse
|
|
52
109
|
end
|
|
53
110
|
|
|
111
|
+
##
|
|
112
|
+
# Read in and parse the file at +path+.
|
|
113
|
+
|
|
54
114
|
def parse_file path
|
|
55
115
|
self.filename = path
|
|
56
116
|
open path do |f|
|
|
@@ -58,35 +118,54 @@ class OedipusLex
|
|
|
58
118
|
end
|
|
59
119
|
end
|
|
60
120
|
|
|
121
|
+
##
|
|
122
|
+
# The current location in the parse.
|
|
123
|
+
|
|
124
|
+
def location
|
|
125
|
+
[
|
|
126
|
+
(filename || "<input>"),
|
|
127
|
+
lineno,
|
|
128
|
+
column,
|
|
129
|
+
].compact.join(":")
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
##
|
|
133
|
+
# Lex the next token.
|
|
134
|
+
|
|
61
135
|
def next_token
|
|
62
|
-
self.lineno += 1 if ss.peek(1) == "\n"
|
|
63
136
|
|
|
64
137
|
token = nil
|
|
65
138
|
|
|
66
139
|
until ss.eos? or token do
|
|
140
|
+
if ss.peek(1) == "\n"
|
|
141
|
+
self.lineno += 1
|
|
142
|
+
# line starts 1 position after the newline
|
|
143
|
+
self.start_of_current_line_pos = ss.pos + 1
|
|
144
|
+
end
|
|
145
|
+
self.old_pos = ss.pos
|
|
67
146
|
token =
|
|
68
147
|
case state
|
|
69
148
|
when nil, :option, :inner, :start, :macro, :rule, :group then
|
|
70
149
|
case
|
|
71
|
-
when
|
|
150
|
+
when ss.skip(/options?.*/) then
|
|
72
151
|
[:state, :option]
|
|
73
|
-
when
|
|
152
|
+
when ss.skip(/inner.*/) then
|
|
74
153
|
[:state, :inner]
|
|
75
|
-
when
|
|
154
|
+
when ss.skip(/macros?.*/) then
|
|
76
155
|
[:state, :macro]
|
|
77
|
-
when
|
|
156
|
+
when ss.skip(/rules?.*/) then
|
|
78
157
|
[:state, :rule]
|
|
79
|
-
when
|
|
158
|
+
when ss.skip(/start.*/) then
|
|
80
159
|
[:state, :start]
|
|
81
|
-
when
|
|
160
|
+
when ss.skip(/end/) then
|
|
82
161
|
[:state, :END]
|
|
83
|
-
when
|
|
162
|
+
when ss.skip(/\A((?:.|\n)*)class ([\w:]+.*)/) then
|
|
84
163
|
action { [:class, *matches] }
|
|
85
|
-
when
|
|
164
|
+
when ss.skip(/\n+/) then
|
|
86
165
|
# do nothing
|
|
87
166
|
when text = ss.scan(/\s*(\#.*)/) then
|
|
88
167
|
action { [:comment, text] }
|
|
89
|
-
when (state == :option) && (
|
|
168
|
+
when (state == :option) && (ss.skip(/\s+/)) then
|
|
90
169
|
# do nothing
|
|
91
170
|
when (state == :option) && (text = ss.scan(/stub/i)) then
|
|
92
171
|
action { [:option, text] }
|
|
@@ -96,44 +175,46 @@ class OedipusLex
|
|
|
96
175
|
action { [:option, text] }
|
|
97
176
|
when (state == :option) && (text = ss.scan(/lineno/i)) then
|
|
98
177
|
action { [:option, text] }
|
|
178
|
+
when (state == :option) && (text = ss.scan(/column/i)) then
|
|
179
|
+
action { [:option, text] }
|
|
99
180
|
when (state == :inner) && (text = ss.scan(/.*/)) then
|
|
100
181
|
action { [:inner, text] }
|
|
101
182
|
when (state == :start) && (text = ss.scan(/.*/)) then
|
|
102
183
|
action { [:start, text] }
|
|
103
|
-
when (state == :macro) && (
|
|
184
|
+
when (state == :macro) && (ss.skip(/\s+(\w+)\s+#{RE}/o)) then
|
|
104
185
|
action { [:macro, *matches] }
|
|
105
|
-
when (state == :rule) && (
|
|
186
|
+
when (state == :rule) && (ss.skip(/\s*#{ST}?[\ \t]*#{RE}[\ \t]*#{ACT}?/o)) then
|
|
106
187
|
action { [:rule, *matches] }
|
|
107
|
-
when (state == :rule) && (
|
|
188
|
+
when (state == :rule) && (ss.skip(/\s*:[\ \t]*#{RE}/o)) then
|
|
108
189
|
action { [:grouphead, *matches] }
|
|
109
|
-
when (state == :group) && (
|
|
190
|
+
when (state == :group) && (ss.skip(/\s*:[\ \t]*#{RE}/o)) then
|
|
110
191
|
action { [:grouphead, *matches] }
|
|
111
|
-
when (state == :group) && (
|
|
192
|
+
when (state == :group) && (ss.skip(/\s*\|\s*#{ST}?[\ \t]*#{RE}[\ \t]*#{ACT}?/o)) then
|
|
112
193
|
action { [:group, *matches] }
|
|
113
|
-
when (state == :group) && (
|
|
194
|
+
when (state == :group) && (ss.skip(/\s*#{ST}?[\ \t]*#{RE}[\ \t]*#{ACT}?/o)) then
|
|
114
195
|
action { [:groupend, *matches] }
|
|
115
196
|
else
|
|
116
197
|
text = ss.string[ss.pos .. -1]
|
|
117
|
-
raise ScanError, "can not match (#{state.inspect}): '#{text}'"
|
|
198
|
+
raise ScanError, "can not match (#{state.inspect}) at #{location}: '#{text}'"
|
|
118
199
|
end
|
|
119
200
|
when :END then
|
|
120
201
|
case
|
|
121
|
-
when
|
|
202
|
+
when ss.skip(/\n+/) then
|
|
122
203
|
# do nothing
|
|
123
204
|
when text = ss.scan(/.*/) then
|
|
124
205
|
action { [:end, text] }
|
|
125
206
|
else
|
|
126
207
|
text = ss.string[ss.pos .. -1]
|
|
127
|
-
raise ScanError, "can not match (#{state.inspect}): '#{text}'"
|
|
208
|
+
raise ScanError, "can not match (#{state.inspect}) at #{location}: '#{text}'"
|
|
128
209
|
end
|
|
129
210
|
else
|
|
130
|
-
raise ScanError, "undefined state: '#{state}'"
|
|
211
|
+
raise ScanError, "undefined state at #{location}: '#{state}'"
|
|
131
212
|
end # token = case state
|
|
132
213
|
|
|
133
214
|
next unless token # allow functions to trigger redo w/ nil
|
|
134
215
|
end # while
|
|
135
216
|
|
|
136
|
-
raise "bad lexical result: #{token.inspect}" unless
|
|
217
|
+
raise LexerError, "bad lexical result at #{location}: #{token.inspect}" unless
|
|
137
218
|
token.nil? || (Array === token && token.size >= 2)
|
|
138
219
|
|
|
139
220
|
# auto-switch state
|
data/test/test_oedipus_lex.rb
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
gem "minitest"
|
|
2
1
|
require "minitest/autorun"
|
|
3
2
|
require "oedipus_lex"
|
|
4
3
|
require "stringio"
|
|
@@ -83,6 +82,26 @@ class TestOedipusLex < Minitest::Test
|
|
|
83
82
|
assert_equal expected_msg, e.message
|
|
84
83
|
end
|
|
85
84
|
|
|
85
|
+
def assert_token_error grammar, input, expected_msg
|
|
86
|
+
_, mod = eval_lexer grammar
|
|
87
|
+
|
|
88
|
+
calc = mod::Calculator.new
|
|
89
|
+
|
|
90
|
+
def calc.do_parse
|
|
91
|
+
tokens = []
|
|
92
|
+
while token = next_token
|
|
93
|
+
tokens << token
|
|
94
|
+
end
|
|
95
|
+
tokens
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
e = assert_raises mod::Calculator::LexerError do
|
|
99
|
+
calc.parse input
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
assert_equal expected_msg, e.message
|
|
103
|
+
end
|
|
104
|
+
|
|
86
105
|
def test_simple_scanner
|
|
87
106
|
src = <<-'REX'
|
|
88
107
|
class Calculator
|
|
@@ -137,7 +156,7 @@ class TestOedipusLex < Minitest::Test
|
|
|
137
156
|
end
|
|
138
157
|
REX
|
|
139
158
|
|
|
140
|
-
assert_generate_error src, "can not match (:rule): '"
|
|
159
|
+
assert_generate_error src, "can not match (:rule) at <input>:4:0: '"
|
|
141
160
|
end
|
|
142
161
|
|
|
143
162
|
def test_simple_scanner_macro
|
|
@@ -260,6 +279,30 @@ class TestOedipusLex < Minitest::Test
|
|
|
260
279
|
assert_match "[:op, \"+\"]", out
|
|
261
280
|
end
|
|
262
281
|
|
|
282
|
+
def test_column
|
|
283
|
+
src = <<-'REX'
|
|
284
|
+
class Calculator
|
|
285
|
+
rule
|
|
286
|
+
/\d+/ { [:number, text.to_i, lineno, column] }
|
|
287
|
+
/\s+/
|
|
288
|
+
/[+-]/ { [:op, text, lineno, column] }
|
|
289
|
+
end
|
|
290
|
+
REX
|
|
291
|
+
|
|
292
|
+
txt = "1 + 2\n+ 30"
|
|
293
|
+
|
|
294
|
+
exp = [[:number, 1, 1, 0],
|
|
295
|
+
[:op, "+", 1, 2],
|
|
296
|
+
[:number, 2, 1, 4],
|
|
297
|
+
[:op, "+", 2, 0],
|
|
298
|
+
[:number, 30, 2, 2]]
|
|
299
|
+
|
|
300
|
+
option[:column] = true
|
|
301
|
+
option[:lineno] = true
|
|
302
|
+
|
|
303
|
+
assert_lexer src, txt, exp
|
|
304
|
+
end
|
|
305
|
+
|
|
263
306
|
def test_simple_scanner_debug_src
|
|
264
307
|
src = <<-'REX'
|
|
265
308
|
class Calculator
|
|
@@ -434,7 +477,7 @@ class TestOedipusLex < Minitest::Test
|
|
|
434
477
|
|
|
435
478
|
ruby = generate_lexer src
|
|
436
479
|
|
|
437
|
-
assert_match "when ss.
|
|
480
|
+
assert_match "when ss.match?(/\\d/) then", ruby
|
|
438
481
|
assert_match "when text = ss.scan(/\\d+\\.\\d+/) then", ruby
|
|
439
482
|
assert_match "when text = ss.scan(/\\d+/) then", ruby
|
|
440
483
|
assert_match "end # group /\\d/", ruby
|
|
@@ -457,12 +500,12 @@ class TestOedipusLex < Minitest::Test
|
|
|
457
500
|
|
|
458
501
|
ruby = generate_lexer src
|
|
459
502
|
|
|
460
|
-
assert_match "when ss.
|
|
503
|
+
assert_match "when ss.match?(/\\d/) then", ruby
|
|
461
504
|
assert_match "when text = ss.scan(/\\d+\\.\\d+/) then", ruby
|
|
462
505
|
assert_match "when text = ss.scan(/\\d+/) then", ruby
|
|
463
506
|
assert_match "end # group /\\d/", ruby
|
|
464
507
|
|
|
465
|
-
assert_match "when ss.
|
|
508
|
+
assert_match "when ss.match?(/\\+/) then", ruby
|
|
466
509
|
assert_match "when xx? && (text = ss.scan(/\\+whatever/)) then", ruby
|
|
467
510
|
assert_match "when (state == :x) && (text = ss.scan(/\\+\\d+/)) then", ruby
|
|
468
511
|
assert_match "end # group /\\d/", ruby
|
|
@@ -737,9 +780,50 @@ class TestOedipusLex < Minitest::Test
|
|
|
737
780
|
assert_lexer src, txt, exp
|
|
738
781
|
|
|
739
782
|
txt = "aa"
|
|
740
|
-
exp = [[:A, 'a'], [:B, 'b'], [:A, 'a'], [:B, 'b'], [:A, 'a']]
|
|
741
783
|
|
|
742
|
-
assert_lexer_error src, txt, "can not match (:B)
|
|
784
|
+
assert_lexer_error src, txt, "can not match (:B) at <input>: 'a'"
|
|
785
|
+
end
|
|
786
|
+
|
|
787
|
+
def test_error_undefined_state
|
|
788
|
+
src = <<-'REX'
|
|
789
|
+
class Calculator
|
|
790
|
+
rule
|
|
791
|
+
/a/ { self.state = :C ; [:A, text] }
|
|
792
|
+
:B /b/ { self.state = nil ; [:B, text] }
|
|
793
|
+
end
|
|
794
|
+
REX
|
|
795
|
+
|
|
796
|
+
txt = "aa"
|
|
797
|
+
|
|
798
|
+
assert_lexer_error src, txt, "undefined state at <input>: 'C'"
|
|
799
|
+
end
|
|
800
|
+
|
|
801
|
+
def test_error_bad_token
|
|
802
|
+
src = <<-'REX'
|
|
803
|
+
class Calculator
|
|
804
|
+
rule
|
|
805
|
+
/a/ { self.state = :B ; :A }
|
|
806
|
+
:B /b/ { self.state = nil ; [:B, text] }
|
|
807
|
+
end
|
|
808
|
+
REX
|
|
809
|
+
|
|
810
|
+
txt = "aa"
|
|
811
|
+
|
|
812
|
+
assert_token_error src, txt, "bad lexical result at <input>: :A"
|
|
813
|
+
end
|
|
814
|
+
|
|
815
|
+
def test_error_bad_token_size
|
|
816
|
+
src = <<-'REX'
|
|
817
|
+
class Calculator
|
|
818
|
+
rule
|
|
819
|
+
/a/ { self.state = :B ; [:A] }
|
|
820
|
+
:B /b/ { self.state = nil ; [:B, text] }
|
|
821
|
+
end
|
|
822
|
+
REX
|
|
823
|
+
|
|
824
|
+
txt = "aa"
|
|
825
|
+
|
|
826
|
+
assert_token_error src, txt, "bad lexical result at <input>: [:A]"
|
|
743
827
|
end
|
|
744
828
|
|
|
745
829
|
def test_incrementing_lineno_on_nil_token
|
|
@@ -758,4 +842,36 @@ class TestOedipusLex < Minitest::Test
|
|
|
758
842
|
|
|
759
843
|
assert_lexer src, txt, exp
|
|
760
844
|
end
|
|
845
|
+
|
|
846
|
+
def assert_location exp, option = {}
|
|
847
|
+
self.option = option
|
|
848
|
+
|
|
849
|
+
src = "class Calculator\nrule\n /\\d+/ { [:number, text.to_i] }\nend\n"
|
|
850
|
+
|
|
851
|
+
_, mod = eval_lexer src
|
|
852
|
+
|
|
853
|
+
calc = mod::Calculator.new
|
|
854
|
+
def calc.do_parse
|
|
855
|
+
[next_token]
|
|
856
|
+
end
|
|
857
|
+
|
|
858
|
+
calc.filename = option[:filename] if option[:filename]
|
|
859
|
+
calc.parse "42"
|
|
860
|
+
|
|
861
|
+
assert_equal exp, calc.location
|
|
862
|
+
end
|
|
863
|
+
|
|
864
|
+
def test_location
|
|
865
|
+
t = true
|
|
866
|
+
|
|
867
|
+
assert_location "<input>"
|
|
868
|
+
assert_location "<input>:1", :lineno => t
|
|
869
|
+
assert_location "<input>:?:0", :column => t
|
|
870
|
+
assert_location "<input>:1:0", :lineno => t, :column => t
|
|
871
|
+
|
|
872
|
+
assert_location "blah", :filename => "blah"
|
|
873
|
+
assert_location "blah:1", :filename => "blah", :lineno => t
|
|
874
|
+
assert_location "blah:?:0", :filename => "blah", :column => t
|
|
875
|
+
assert_location "blah:1:0", :filename => "blah", :lineno => t, :column => t
|
|
876
|
+
end
|
|
761
877
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: oedipus_lex
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.
|
|
4
|
+
version: 2.5.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ryan Davis
|
|
@@ -10,9 +10,9 @@ bindir: bin
|
|
|
10
10
|
cert_chain:
|
|
11
11
|
- |
|
|
12
12
|
-----BEGIN CERTIFICATE-----
|
|
13
|
-
|
|
13
|
+
MIIDPjCCAiagAwIBAgIBBDANBgkqhkiG9w0BAQsFADBFMRMwEQYDVQQDDApyeWFu
|
|
14
14
|
ZC1ydWJ5MRkwFwYKCZImiZPyLGQBGRYJemVuc3BpZGVyMRMwEQYKCZImiZPyLGQB
|
|
15
|
-
|
|
15
|
+
GRYDY29tMB4XDTE5MTIxMzAwMDIwNFoXDTIwMTIxMjAwMDIwNFowRTETMBEGA1UE
|
|
16
16
|
AwwKcnlhbmQtcnVieTEZMBcGCgmSJomT8ixkARkWCXplbnNwaWRlcjETMBEGCgmS
|
|
17
17
|
JomT8ixkARkWA2NvbTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBALda
|
|
18
18
|
b9DCgK+627gPJkB6XfjZ1itoOQvpqH1EXScSaba9/S2VF22VYQbXU1xQXL/WzCkx
|
|
@@ -21,58 +21,50 @@ cert_chain:
|
|
|
21
21
|
GiadM9GHRaDiaxuX0cIUBj19T01mVE2iymf9I6bEsiayK/n6QujtyCbTWsAS9Rqt
|
|
22
22
|
qhtV7HJxNKuPj/JFH0D2cswvzznE/a5FOYO68g+YCuFi5L8wZuuM8zzdwjrWHqSV
|
|
23
23
|
gBEfoTEGr7Zii72cx+sCAwEAAaM5MDcwCQYDVR0TBAIwADALBgNVHQ8EBAMCBLAw
|
|
24
|
-
HQYDVR0OBBYEFEfFe9md/r/tj/Wmwpy+MI8d9k/
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
24
|
+
HQYDVR0OBBYEFEfFe9md/r/tj/Wmwpy+MI8d9k/hMA0GCSqGSIb3DQEBCwUAA4IB
|
|
25
|
+
AQCkkcHqAa6IKLYGl93rn78J3L+LnqyxaA059n4IGMHWN5bv9KBQnIjOrpLadtYZ
|
|
26
|
+
vhWkunWDKdfVapBEq5+T4HzqnsEXC3aCv6JEKJY6Zw7iSzl0M8hozuzRr+w46wvT
|
|
27
|
+
fV2yTN6QTVxqbMsJJyjosks4ZdQYov2zdvQpt1HsLi+Qmckmg8SPZsd+T8uiiBCf
|
|
28
|
+
b+1ORSM5eEfBQenPXy83LZcoQz8i6zVB4aAfTGGdhxjoMGUEmSZ6xpkOzmnGa9QK
|
|
29
|
+
m5x9IDiApM+vCELNwDXXGNFEnQBBK+wAe4Pek8o1V1TTOxL1kGPewVOitX1p3xoN
|
|
30
|
+
h7iEjga8iM1LbZUfiISZ+WrB
|
|
31
31
|
-----END CERTIFICATE-----
|
|
32
|
-
date:
|
|
32
|
+
date: 2020-06-14 00:00:00.000000000 Z
|
|
33
33
|
dependencies:
|
|
34
|
-
- !ruby/object:Gem::Dependency
|
|
35
|
-
name: minitest
|
|
36
|
-
requirement: !ruby/object:Gem::Requirement
|
|
37
|
-
requirements:
|
|
38
|
-
- - ~>
|
|
39
|
-
- !ruby/object:Gem::Version
|
|
40
|
-
version: '5.4'
|
|
41
|
-
type: :development
|
|
42
|
-
prerelease: false
|
|
43
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
44
|
-
requirements:
|
|
45
|
-
- - ~>
|
|
46
|
-
- !ruby/object:Gem::Version
|
|
47
|
-
version: '5.4'
|
|
48
34
|
- !ruby/object:Gem::Dependency
|
|
49
35
|
name: rdoc
|
|
50
36
|
requirement: !ruby/object:Gem::Requirement
|
|
51
37
|
requirements:
|
|
52
|
-
- -
|
|
38
|
+
- - ">="
|
|
53
39
|
- !ruby/object:Gem::Version
|
|
54
40
|
version: '4.0'
|
|
41
|
+
- - "<"
|
|
42
|
+
- !ruby/object:Gem::Version
|
|
43
|
+
version: '7'
|
|
55
44
|
type: :development
|
|
56
45
|
prerelease: false
|
|
57
46
|
version_requirements: !ruby/object:Gem::Requirement
|
|
58
47
|
requirements:
|
|
59
|
-
- -
|
|
48
|
+
- - ">="
|
|
60
49
|
- !ruby/object:Gem::Version
|
|
61
50
|
version: '4.0'
|
|
51
|
+
- - "<"
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: '7'
|
|
62
54
|
- !ruby/object:Gem::Dependency
|
|
63
55
|
name: hoe
|
|
64
56
|
requirement: !ruby/object:Gem::Requirement
|
|
65
57
|
requirements:
|
|
66
|
-
- - ~>
|
|
58
|
+
- - "~>"
|
|
67
59
|
- !ruby/object:Gem::Version
|
|
68
|
-
version: '3.
|
|
60
|
+
version: '3.22'
|
|
69
61
|
type: :development
|
|
70
62
|
prerelease: false
|
|
71
63
|
version_requirements: !ruby/object:Gem::Requirement
|
|
72
64
|
requirements:
|
|
73
|
-
- - ~>
|
|
65
|
+
- - "~>"
|
|
74
66
|
- !ruby/object:Gem::Version
|
|
75
|
-
version: '3.
|
|
67
|
+
version: '3.22'
|
|
76
68
|
description: |-
|
|
77
69
|
Oedipus Lex is a lexer generator in the same family as Rexical and
|
|
78
70
|
Rex. Oedipus Lex is my independent lexer fork of Rexical. Rexical was
|
|
@@ -88,7 +80,7 @@ description: |-
|
|
|
88
80
|
|
|
89
81
|
Oedipus, like rexical, is based primarily on generating code much like
|
|
90
82
|
you would a hand-written lexer. It is _not_ a table or hash driven
|
|
91
|
-
lexer. It
|
|
83
|
+
lexer. It uses StrScanner within a multi-level case statement. As such,
|
|
92
84
|
Oedipus matches on the _first_ match, not the longest (like lex and
|
|
93
85
|
its ilk).
|
|
94
86
|
|
|
@@ -106,8 +98,7 @@ extra_rdoc_files:
|
|
|
106
98
|
- README.rdoc
|
|
107
99
|
- sample/error1.txt
|
|
108
100
|
files:
|
|
109
|
-
- .autotest
|
|
110
|
-
- .gemtest
|
|
101
|
+
- ".autotest"
|
|
111
102
|
- History.rdoc
|
|
112
103
|
- Manifest.txt
|
|
113
104
|
- README.rdoc
|
|
@@ -137,28 +128,27 @@ files:
|
|
|
137
128
|
homepage: http://github.com/seattlerb/oedipus_lex
|
|
138
129
|
licenses:
|
|
139
130
|
- MIT
|
|
140
|
-
metadata:
|
|
131
|
+
metadata:
|
|
132
|
+
homepage_uri: http://github.com/seattlerb/oedipus_lex
|
|
141
133
|
post_install_message:
|
|
142
134
|
rdoc_options:
|
|
143
|
-
- --main
|
|
135
|
+
- "--main"
|
|
144
136
|
- README.rdoc
|
|
145
137
|
require_paths:
|
|
146
138
|
- lib
|
|
147
139
|
required_ruby_version: !ruby/object:Gem::Requirement
|
|
148
140
|
requirements:
|
|
149
|
-
- -
|
|
141
|
+
- - ">="
|
|
150
142
|
- !ruby/object:Gem::Version
|
|
151
143
|
version: '0'
|
|
152
144
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
153
145
|
requirements:
|
|
154
|
-
- -
|
|
146
|
+
- - ">="
|
|
155
147
|
- !ruby/object:Gem::Version
|
|
156
148
|
version: '0'
|
|
157
149
|
requirements: []
|
|
158
|
-
|
|
159
|
-
rubygems_version: 2.2.1
|
|
150
|
+
rubygems_version: 3.0.3
|
|
160
151
|
signing_key:
|
|
161
152
|
specification_version: 4
|
|
162
153
|
summary: Oedipus Lex is a lexer generator in the same family as Rexical and Rex
|
|
163
|
-
test_files:
|
|
164
|
-
- test/test_oedipus_lex.rb
|
|
154
|
+
test_files: []
|
metadata.gz.sig
CHANGED
|
@@ -1,3 +1 @@
|
|
|
1
|
-
|
|
2
|
-
�4�s���-�[ ����X���/{��sI���\��CU�4��7�Z0�st㋾-���CD�m푹�&��<_�+Vʈs��T>�m4��� �/�G�TT�-Akch]�U
|
|
3
|
-
�`zU�QSu��|��"����%\֙3���Zp^�-]�������qs�Z���T���`s�x�vG�[�m�6�f%�����j+��A��4��װ�\��[����
|
|
1
|
+
jl�f��J*��_X%��d��s�� �g�Cao���G7�!�ߡm�������:J6���M�͡���I� Ϟ�p��@�5)�}G�0F^�cń�o�_����zx,������L/���s+sQ7�6��g�-���J��+I�0�3�#`o,�gw��ԁ�#���Yr��"p�a�w ���u�k|��tc$x�=�M��Ѥ�[e$����`�R����ke�i��&�'���ŗ�����)
|
data/.gemtest
DELETED
|
File without changes
|