oedipus_lex 2.5.0 → 2.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- checksums.yaml.gz.sig +0 -0
- data/History.rdoc +33 -0
- data/Rakefile +2 -0
- data/lib/oedipus_lex.rb +237 -19
- data/lib/oedipus_lex.rex +1 -1
- data/lib/oedipus_lex.rex.rb +69 -6
- data/test/test_oedipus_lex.rb +19 -4
- data.tar.gz.sig +1 -2
- metadata +38 -30
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 9a4715d1253bc4e949f28fe08756befae60945975dea73942e4821293e910a21
|
4
|
+
data.tar.gz: c54b53dd59c1ebd5f81b0d4e3cfcd81b06520813934f91827a03d1185d4a7971
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1d611cdec7771203561d7f6c4edaf5afa7a4839eff5efa0b82c22a88a7f68b87a8f6b7bd13891a2dbcdccfed1017ea8d32dc4424a5427be7cf6a9b4444626d1d
|
7
|
+
data.tar.gz: 259a6b53e966df95e6138f7a6fd3c80948d151ffca0d50a9c03badb737b30b949800e9bf761cacbbaaf55c20c72469733dbd1e0aff7dca8b819005efcb1aac6f
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/History.rdoc
CHANGED
@@ -1,3 +1,36 @@
|
|
1
|
+
=== 2.6.0 / 2021-10-27
|
2
|
+
|
3
|
+
* 2 minor enhancements:
|
4
|
+
|
5
|
+
* Add frozen_string_literal comment to generated lexers.
|
6
|
+
* Allow empty regex. (marcandre)
|
7
|
+
|
8
|
+
* 1 bug fix:
|
9
|
+
|
10
|
+
* Switched from peek(1) == "\n" to check(/\n/) to save a ton of strings.
|
11
|
+
|
12
|
+
=== 2.5.3 / 2021-05-29
|
13
|
+
|
14
|
+
* 1 bug fix:
|
15
|
+
|
16
|
+
* Added require_ruby_version >= 2.4 to gemspec
|
17
|
+
|
18
|
+
=== 2.5.2 / 2020-06-14
|
19
|
+
|
20
|
+
* 1 minor enhancement:
|
21
|
+
|
22
|
+
* Speedup of column position computation. It went from roughly 10s to 2s for a big file! (vdbijl)
|
23
|
+
|
24
|
+
=== 2.5.1 / 2019-06-03
|
25
|
+
|
26
|
+
* 1 minor enhancement:
|
27
|
+
|
28
|
+
* Added full rdoc an re-bootstrapped.
|
29
|
+
|
30
|
+
* 1 bug fix:
|
31
|
+
|
32
|
+
* Fixed a deprecation warning in ruby 2.6+.
|
33
|
+
|
1
34
|
=== 2.5.0 / 2016-11-30
|
2
35
|
|
3
36
|
* 5 minor enhancements:
|
data/Rakefile
CHANGED
data/lib/oedipus_lex.rb
CHANGED
@@ -3,20 +3,85 @@ require 'strscan'
|
|
3
3
|
require "erb"
|
4
4
|
require "oedipus_lex.rex"
|
5
5
|
|
6
|
+
##
|
7
|
+
# Oedipus Lex is a lexer generator in the same family as Rexical and
|
8
|
+
# Rex. Oedipus Lex is my independent lexer fork of Rexical. Rexical
|
9
|
+
# was in turn a fork of Rex. We've been unable to contact the author
|
10
|
+
# of rex in order to take it over, fix it up, extend it, and relicense
|
11
|
+
# it to MIT. So, Oedipus was written clean-room in order to bypass
|
12
|
+
# licensing constraints (and because bootstrapping is fun).
|
13
|
+
#
|
14
|
+
# Oedipus brings a lot of extras to the table and at this point is
|
15
|
+
# only historically related to rexical. The syntax has changed enough
|
16
|
+
# that any rexical lexer will have to be tweaked to work inside of
|
17
|
+
# oedipus. At the very least, you need to add slashes to all your
|
18
|
+
# regexps.
|
19
|
+
#
|
20
|
+
# Oedipus, like rexical, is based primarily on generating code much
|
21
|
+
# like you would a hand-written lexer. It is _not_ a table or hash
|
22
|
+
# driven lexer. It uses StrScanner within a multi-level case
|
23
|
+
# statement. As such, Oedipus matches on the _first_ match, not the
|
24
|
+
# longest (like lex and its ilk).
|
25
|
+
#
|
26
|
+
# This documentation is not meant to bypass any prerequisite knowledge
|
27
|
+
# on lexing or parsing. If you'd like to study the subject in further
|
28
|
+
# detail, please try [TIN321] or the [LLVM Tutorial] or some other
|
29
|
+
# good resource for CS learning. Books... books are good. I like
|
30
|
+
# books.
|
31
|
+
|
6
32
|
class OedipusLex
|
7
|
-
VERSION = "2.
|
33
|
+
VERSION = "2.6.0" # :nodoc:
|
34
|
+
|
35
|
+
##
|
36
|
+
# The class name to generate.
|
8
37
|
|
9
38
|
attr_accessor :class_name
|
39
|
+
|
40
|
+
##
|
41
|
+
# An array of header lines to have before the lexer class.
|
42
|
+
|
10
43
|
attr_accessor :header
|
44
|
+
|
45
|
+
##
|
46
|
+
# An array of lines to have after the lexer class.
|
47
|
+
|
11
48
|
attr_accessor :ends
|
49
|
+
|
50
|
+
##
|
51
|
+
# An array of lines to have inside (but at the bottom of) the lexer
|
52
|
+
# class.
|
53
|
+
|
12
54
|
attr_accessor :inners
|
55
|
+
|
56
|
+
##
|
57
|
+
# An array of name/regexp pairs to generate constants inside the
|
58
|
+
# lexer class.
|
59
|
+
|
13
60
|
attr_accessor :macros
|
61
|
+
|
62
|
+
##
|
63
|
+
# A hash of options for the code generator. See README.rdoc for
|
64
|
+
# supported options.
|
65
|
+
|
14
66
|
attr_accessor :option
|
67
|
+
|
68
|
+
##
|
69
|
+
# The rules for the lexer.
|
70
|
+
|
15
71
|
attr_accessor :rules
|
72
|
+
|
73
|
+
##
|
74
|
+
# An array of lines of code to generate into the top of the lexer
|
75
|
+
# (next_token) loop.
|
76
|
+
|
16
77
|
attr_accessor :starts
|
78
|
+
|
79
|
+
##
|
80
|
+
# An array of all the groups within the lexer rules.
|
81
|
+
|
17
82
|
attr_accessor :group
|
18
83
|
|
19
|
-
DEFAULTS = {
|
84
|
+
DEFAULTS = { # :nodoc:
|
20
85
|
:debug => false,
|
21
86
|
:do_parse => false,
|
22
87
|
:lineno => false,
|
@@ -24,21 +89,39 @@ class OedipusLex
|
|
24
89
|
:stub => false,
|
25
90
|
}
|
26
91
|
|
92
|
+
##
|
93
|
+
# A Rule represents the main component of Oedipus Lex. These are the
|
94
|
+
# things that "get stuff done" at the lexical level. They consist of:
|
95
|
+
#
|
96
|
+
# + an optional required start state symbol or predicate method name
|
97
|
+
# + a regexp to match on
|
98
|
+
# + an optional action method or block
|
99
|
+
|
27
100
|
class Rule < Struct.new :start_state, :regexp, :action
|
101
|
+
##
|
102
|
+
# What group this rule is in, if any.
|
103
|
+
|
28
104
|
attr_accessor :group
|
29
|
-
|
105
|
+
|
106
|
+
alias :group? :group # :nodoc:
|
107
|
+
|
108
|
+
##
|
109
|
+
# A simple constructor
|
30
110
|
|
31
111
|
def self.[] start, regexp, action
|
32
112
|
new start, regexp.inspect, action
|
33
113
|
end
|
34
114
|
|
35
|
-
def initialize start_state, regexp, action
|
115
|
+
def initialize start_state, regexp, action # :nodoc:
|
36
116
|
super
|
37
117
|
self.group = nil
|
38
118
|
end
|
39
119
|
|
40
120
|
undef_method :to_a
|
41
121
|
|
122
|
+
##
|
123
|
+
# Generate equivalent ruby code for the rule.
|
124
|
+
|
42
125
|
def to_ruby state, predicates, exclusive
|
43
126
|
return unless group? or
|
44
127
|
start_state == state or
|
@@ -64,7 +147,7 @@ class OedipusLex
|
|
64
147
|
|
65
148
|
cond = if exclusive or not start_state then
|
66
149
|
check
|
67
|
-
elsif start_state
|
150
|
+
elsif /^:/.match?(start_state) then
|
68
151
|
"(state == #{start_state}) && (#{check})"
|
69
152
|
else # predicate method
|
70
153
|
"#{start_state} && (#{check})"
|
@@ -73,7 +156,7 @@ class OedipusLex
|
|
73
156
|
["when #{cond} then", body]
|
74
157
|
end
|
75
158
|
|
76
|
-
def pretty_print pp
|
159
|
+
def pretty_print pp # :nodoc:
|
77
160
|
pp.text "Rule"
|
78
161
|
pp.group 2, "[", "]" do
|
79
162
|
pp.pp start_state
|
@@ -85,25 +168,37 @@ class OedipusLex
|
|
85
168
|
end
|
86
169
|
end
|
87
170
|
|
171
|
+
##
|
172
|
+
# A group allows you to group up multiple rules under a single
|
173
|
+
# regular prefix expression, allowing optimized code to be generated
|
174
|
+
# that skips over all actions if the prefix isn't matched.
|
175
|
+
|
88
176
|
class Group < Struct.new :regex, :rules
|
89
177
|
alias :start_state :regex
|
90
178
|
|
179
|
+
##
|
180
|
+
# A convenience method to create a new group with a +start+ and
|
181
|
+
# given +subrules+.
|
182
|
+
|
91
183
|
def self.[] start, *subrules
|
92
184
|
r = new start.inspect
|
93
185
|
r.rules.concat subrules
|
94
186
|
r
|
95
187
|
end
|
96
188
|
|
97
|
-
def initialize start
|
189
|
+
def initialize start # :nodoc:
|
98
190
|
super(start, [])
|
99
191
|
end
|
100
192
|
|
193
|
+
##
|
194
|
+
# Add a rule to this group.
|
195
|
+
|
101
196
|
def << rule
|
102
197
|
rules << rule
|
103
198
|
nil
|
104
199
|
end
|
105
200
|
|
106
|
-
def to_ruby state, predicates, exclusive
|
201
|
+
def to_ruby state, predicates, exclusive # :nodoc:
|
107
202
|
[
|
108
203
|
"when ss.match?(#{regex}) then",
|
109
204
|
" case",
|
@@ -115,7 +210,7 @@ class OedipusLex
|
|
115
210
|
]
|
116
211
|
end
|
117
212
|
|
118
|
-
def pretty_print pp
|
213
|
+
def pretty_print pp # :nodoc:
|
119
214
|
pp.text "Group"
|
120
215
|
pp.group 2, "[", "]" do
|
121
216
|
pp.seplist([regex] + rules, lambda { pp.comma_breakable }, :each) { |v|
|
@@ -125,6 +220,10 @@ class OedipusLex
|
|
125
220
|
end
|
126
221
|
end
|
127
222
|
|
223
|
+
##
|
224
|
+
# A convenience method to create a new lexer with a +name+ and given
|
225
|
+
# +rules+.
|
226
|
+
|
128
227
|
def self.[](name, *rules)
|
129
228
|
r = new
|
130
229
|
r.class_name = name
|
@@ -132,7 +231,7 @@ class OedipusLex
|
|
132
231
|
r
|
133
232
|
end
|
134
233
|
|
135
|
-
def initialize opts = {}
|
234
|
+
def initialize opts = {} # :nodoc:
|
136
235
|
self.option = DEFAULTS.merge opts
|
137
236
|
self.class_name = nil
|
138
237
|
|
@@ -145,7 +244,7 @@ class OedipusLex
|
|
145
244
|
self.group = nil
|
146
245
|
end
|
147
246
|
|
148
|
-
def == o
|
247
|
+
def == o # :nodoc:
|
149
248
|
(o.class == self.class and
|
150
249
|
o.class_name == self.class_name and
|
151
250
|
o.header == self.header and
|
@@ -156,7 +255,7 @@ class OedipusLex
|
|
156
255
|
o.starts == self.starts)
|
157
256
|
end
|
158
257
|
|
159
|
-
def pretty_print pp
|
258
|
+
def pretty_print pp # :nodoc:
|
160
259
|
commas = lambda { pp.comma_breakable }
|
161
260
|
|
162
261
|
pp.text "Lexer"
|
@@ -165,67 +264,109 @@ class OedipusLex
|
|
165
264
|
end
|
166
265
|
end
|
167
266
|
|
267
|
+
##
|
268
|
+
# Process a +class+ lexeme.
|
269
|
+
|
168
270
|
def lex_class prefix, name
|
169
271
|
header.concat prefix.split(/\n/)
|
170
272
|
self.class_name = name
|
171
273
|
end
|
172
274
|
|
275
|
+
##
|
276
|
+
# Process a +comment+ lexeme.
|
277
|
+
|
173
278
|
def lex_comment line
|
174
279
|
# do nothing
|
175
280
|
end
|
176
281
|
|
282
|
+
##
|
283
|
+
# Process an +end+ lexeme.
|
284
|
+
|
177
285
|
def lex_end line
|
178
286
|
ends << line
|
179
287
|
end
|
180
288
|
|
289
|
+
##
|
290
|
+
# Process an +inner+ lexeme.
|
291
|
+
|
181
292
|
def lex_inner line
|
182
293
|
inners << line
|
183
294
|
end
|
184
295
|
|
296
|
+
##
|
297
|
+
# Process a +start+ lexeme.
|
298
|
+
|
185
299
|
def lex_start line
|
186
300
|
starts << line.strip
|
187
301
|
end
|
188
302
|
|
303
|
+
##
|
304
|
+
# Process a +macro+ lexeme.
|
305
|
+
|
189
306
|
def lex_macro name, value
|
190
307
|
macros << [name, value]
|
191
308
|
end
|
192
309
|
|
310
|
+
##
|
311
|
+
# Process an +option+ lexeme.
|
312
|
+
|
193
313
|
def lex_option option
|
194
314
|
self.option[option.to_sym] = true
|
195
315
|
end
|
196
316
|
|
317
|
+
##
|
318
|
+
# Process a +X+ lexeme.
|
319
|
+
|
197
320
|
def lex_rule start_state, regexp, action = nil
|
198
321
|
rules << Rule.new(start_state, regexp, action)
|
199
322
|
end
|
200
323
|
|
324
|
+
##
|
325
|
+
# Process a +group head+ lexeme.
|
326
|
+
|
201
327
|
def lex_grouphead re
|
202
328
|
end_group if group
|
203
329
|
self.state = :group
|
204
330
|
self.group = Group.new re
|
205
331
|
end
|
206
332
|
|
333
|
+
##
|
334
|
+
# Process a +group+ lexeme.
|
335
|
+
|
207
336
|
def lex_group start_state, regexp, action = nil
|
208
337
|
rule = Rule.new(start_state, regexp, action)
|
209
338
|
rule.group = group
|
210
339
|
self.group << rule
|
211
340
|
end
|
212
341
|
|
342
|
+
##
|
343
|
+
# End a group.
|
344
|
+
|
213
345
|
def end_group
|
214
346
|
rules << group
|
215
347
|
self.group = nil
|
216
348
|
self.state = :rule
|
217
349
|
end
|
218
350
|
|
351
|
+
##
|
352
|
+
# Process the end of a +group+ lexeme.
|
353
|
+
|
219
354
|
def lex_groupend start_state, regexp, action = nil
|
220
355
|
end_group
|
221
356
|
lex_rule start_state, regexp, action
|
222
357
|
end
|
223
358
|
|
224
|
-
|
359
|
+
##
|
360
|
+
# Process a +state+ lexeme.
|
361
|
+
|
362
|
+
def lex_state _new_state
|
225
363
|
end_group if group
|
226
364
|
# do nothing -- lexer switches state for us
|
227
365
|
end
|
228
366
|
|
367
|
+
##
|
368
|
+
# Generate the lexer.
|
369
|
+
|
229
370
|
def generate
|
230
371
|
filter = lambda { |r| Rule === r && r.start_state || nil }
|
231
372
|
_mystates = rules.map(&filter).flatten.compact.uniq
|
@@ -238,13 +379,22 @@ class OedipusLex
|
|
238
379
|
all_states = [[nil, *inclusives], # nil+incls # eg [[nil, :a],
|
239
380
|
*exclusives.map { |s| [s] }] # [excls] # [:A], [:B]]
|
240
381
|
|
241
|
-
encoding = header.shift if header.first
|
382
|
+
encoding = header.shift if /encoding:/.match?(header.first)
|
242
383
|
encoding ||= "# encoding: UTF-8"
|
243
384
|
|
244
|
-
|
385
|
+
erb = if RUBY_VERSION >= "2.6.0" then
|
386
|
+
ERB.new(TEMPLATE, trim_mode:"%")
|
387
|
+
else
|
388
|
+
ERB.new(TEMPLATE, nil, "%")
|
389
|
+
end
|
390
|
+
|
391
|
+
erb.result binding
|
245
392
|
end
|
246
393
|
|
394
|
+
# :stopdoc:
|
395
|
+
|
247
396
|
TEMPLATE = <<-'REX'.gsub(/^ {6}/, '')
|
397
|
+
# frozen_string_literal: true
|
248
398
|
<%= encoding %>
|
249
399
|
#--
|
250
400
|
# This file is automatically generated. Do not modify it.
|
@@ -260,48 +410,89 @@ class OedipusLex
|
|
260
410
|
% end
|
261
411
|
|
262
412
|
% end
|
413
|
+
|
414
|
+
##
|
415
|
+
# The generated lexer <%= class_name %>
|
416
|
+
|
263
417
|
class <%= class_name %>
|
264
418
|
require 'strscan'
|
265
419
|
|
266
420
|
% unless macros.empty? then
|
421
|
+
# :stopdoc:
|
267
422
|
% max = macros.map { |(k,_)| k.size }.max
|
268
423
|
% macros.each do |(k,v)|
|
269
424
|
<%= "%-#{max}s = %s" % [k, v] %>
|
270
425
|
% end
|
271
|
-
|
426
|
+
# :startdoc:
|
272
427
|
% end
|
428
|
+
# :stopdoc:
|
273
429
|
class LexerError < StandardError ; end
|
274
430
|
class ScanError < LexerError ; end
|
431
|
+
# :startdoc:
|
275
432
|
|
276
433
|
% if option[:lineno] then
|
434
|
+
##
|
435
|
+
# The current line number.
|
436
|
+
|
277
437
|
attr_accessor :lineno
|
278
438
|
% end
|
439
|
+
##
|
440
|
+
# The file name / path
|
441
|
+
|
279
442
|
attr_accessor :filename
|
443
|
+
|
444
|
+
##
|
445
|
+
# The StringScanner for this lexer.
|
446
|
+
|
280
447
|
attr_accessor :ss
|
448
|
+
|
449
|
+
##
|
450
|
+
# The current lexical state.
|
451
|
+
|
281
452
|
attr_accessor :state
|
282
453
|
|
283
454
|
alias :match :ss
|
284
455
|
|
456
|
+
##
|
457
|
+
# The match groups for the current scan.
|
458
|
+
|
285
459
|
def matches
|
286
460
|
m = (1..9).map { |i| ss[i] }
|
287
461
|
m.pop until m[-1] or m.empty?
|
288
462
|
m
|
289
463
|
end
|
290
464
|
|
465
|
+
##
|
466
|
+
# Yields on the current action.
|
467
|
+
|
291
468
|
def action
|
292
469
|
yield
|
293
470
|
end
|
294
471
|
|
295
472
|
% if option[:column] then
|
473
|
+
##
|
474
|
+
# The previous position. Only available if the :column option is on.
|
475
|
+
|
296
476
|
attr_accessor :old_pos
|
297
477
|
|
478
|
+
##
|
479
|
+
# The position of the start of the current line. Only available if the
|
480
|
+
# :column option is on.
|
481
|
+
|
482
|
+
attr_accessor :start_of_current_line_pos
|
483
|
+
|
484
|
+
##
|
485
|
+
# The current column, starting at 0. Only available if the
|
486
|
+
# :column option is on.
|
298
487
|
def column
|
299
|
-
|
300
|
-
old_pos - idx - 1
|
488
|
+
old_pos - start_of_current_line_pos
|
301
489
|
end
|
302
490
|
|
303
491
|
% end
|
304
492
|
% if option[:do_parse] then
|
493
|
+
##
|
494
|
+
# Parse the file by getting all tokens and calling lex_+type+ on them.
|
495
|
+
|
305
496
|
def do_parse
|
306
497
|
while token = next_token do
|
307
498
|
type, *vals = token
|
@@ -311,20 +502,33 @@ class OedipusLex
|
|
311
502
|
end
|
312
503
|
|
313
504
|
% end
|
505
|
+
|
506
|
+
##
|
507
|
+
# The current scanner class. Must be overridden in subclasses.
|
508
|
+
|
314
509
|
def scanner_class
|
315
510
|
StringScanner
|
316
511
|
end unless instance_methods(false).map(&:to_s).include?("scanner_class")
|
317
512
|
|
513
|
+
##
|
514
|
+
# Parse the given string.
|
515
|
+
|
318
516
|
def parse str
|
319
517
|
self.ss = scanner_class.new str
|
320
518
|
% if option[:lineno] then
|
321
519
|
self.lineno = 1
|
520
|
+
% end
|
521
|
+
% if option[:column] then
|
522
|
+
self.start_of_current_line_pos = 0
|
322
523
|
% end
|
323
524
|
self.state ||= nil
|
324
525
|
|
325
526
|
do_parse
|
326
527
|
end
|
327
528
|
|
529
|
+
##
|
530
|
+
# Read in and parse the file at +path+.
|
531
|
+
|
328
532
|
def parse_file path
|
329
533
|
self.filename = path
|
330
534
|
open path do |f|
|
@@ -332,6 +536,9 @@ class OedipusLex
|
|
332
536
|
end
|
333
537
|
end
|
334
538
|
|
539
|
+
##
|
540
|
+
# The current location in the parse.
|
541
|
+
|
335
542
|
def location
|
336
543
|
[
|
337
544
|
(filename || "<input>"),
|
@@ -346,6 +553,9 @@ class OedipusLex
|
|
346
553
|
].compact.join(":")
|
347
554
|
end
|
348
555
|
|
556
|
+
##
|
557
|
+
# Lex the next token.
|
558
|
+
|
349
559
|
def next_token
|
350
560
|
% starts.each do |s|
|
351
561
|
<%= s %>
|
@@ -355,7 +565,13 @@ class OedipusLex
|
|
355
565
|
|
356
566
|
until ss.eos? or token do
|
357
567
|
% if option[:lineno] then
|
358
|
-
|
568
|
+
if ss.check(/\n/) then
|
569
|
+
self.lineno += 1
|
570
|
+
% if option[:column] then
|
571
|
+
# line starts 1 position after the newline
|
572
|
+
self.start_of_current_line_pos = ss.pos + 1
|
573
|
+
% end
|
574
|
+
end
|
359
575
|
% end
|
360
576
|
% if option[:column] then
|
361
577
|
self.old_pos = ss.pos
|
@@ -427,6 +643,8 @@ class OedipusLex
|
|
427
643
|
end
|
428
644
|
% end
|
429
645
|
REX
|
646
|
+
|
647
|
+
# :startdoc:
|
430
648
|
end
|
431
649
|
|
432
650
|
if $0 == __FILE__ then
|
data/lib/oedipus_lex.rex
CHANGED
data/lib/oedipus_lex.rex.rb
CHANGED
@@ -1,44 +1,86 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
# encoding: UTF-8
|
2
3
|
#--
|
3
4
|
# This file is automatically generated. Do not modify it.
|
4
|
-
# Generated by: oedipus_lex version 2.
|
5
|
+
# Generated by: oedipus_lex version 2.6.0.
|
5
6
|
# Source: lib/oedipus_lex.rex
|
6
7
|
#++
|
7
8
|
|
9
|
+
|
10
|
+
##
|
11
|
+
# The generated lexer OedipusLex
|
12
|
+
|
8
13
|
class OedipusLex
|
9
14
|
require 'strscan'
|
10
15
|
|
16
|
+
# :stopdoc:
|
11
17
|
ST = /(?:(:\S+|\w+\??))/
|
12
|
-
RE = /(\/(?:\\.|[^\/])
|
18
|
+
RE = /(\/(?:\\.|[^\/])*\/[ion]?)/
|
13
19
|
ACT = /(\{.*|:?\w+)/
|
14
|
-
|
20
|
+
# :startdoc:
|
21
|
+
# :stopdoc:
|
15
22
|
class LexerError < StandardError ; end
|
16
23
|
class ScanError < LexerError ; end
|
24
|
+
# :startdoc:
|
25
|
+
|
26
|
+
##
|
27
|
+
# The current line number.
|
17
28
|
|
18
29
|
attr_accessor :lineno
|
30
|
+
##
|
31
|
+
# The file name / path
|
32
|
+
|
19
33
|
attr_accessor :filename
|
34
|
+
|
35
|
+
##
|
36
|
+
# The StringScanner for this lexer.
|
37
|
+
|
20
38
|
attr_accessor :ss
|
39
|
+
|
40
|
+
##
|
41
|
+
# The current lexical state.
|
42
|
+
|
21
43
|
attr_accessor :state
|
22
44
|
|
23
45
|
alias :match :ss
|
24
46
|
|
47
|
+
##
|
48
|
+
# The match groups for the current scan.
|
49
|
+
|
25
50
|
def matches
|
26
51
|
m = (1..9).map { |i| ss[i] }
|
27
52
|
m.pop until m[-1] or m.empty?
|
28
53
|
m
|
29
54
|
end
|
30
55
|
|
56
|
+
##
|
57
|
+
# Yields on the current action.
|
58
|
+
|
31
59
|
def action
|
32
60
|
yield
|
33
61
|
end
|
34
62
|
|
63
|
+
##
|
64
|
+
# The previous position. Only available if the :column option is on.
|
65
|
+
|
35
66
|
attr_accessor :old_pos
|
36
67
|
|
68
|
+
##
|
69
|
+
# The position of the start of the current line. Only available if the
|
70
|
+
# :column option is on.
|
71
|
+
|
72
|
+
attr_accessor :start_of_current_line_pos
|
73
|
+
|
74
|
+
##
|
75
|
+
# The current column, starting at 0. Only available if the
|
76
|
+
# :column option is on.
|
37
77
|
def column
|
38
|
-
|
39
|
-
old_pos - idx - 1
|
78
|
+
old_pos - start_of_current_line_pos
|
40
79
|
end
|
41
80
|
|
81
|
+
##
|
82
|
+
# Parse the file by getting all tokens and calling lex_+type+ on them.
|
83
|
+
|
42
84
|
def do_parse
|
43
85
|
while token = next_token do
|
44
86
|
type, *vals = token
|
@@ -47,18 +89,29 @@ class OedipusLex
|
|
47
89
|
end
|
48
90
|
end
|
49
91
|
|
92
|
+
|
93
|
+
##
|
94
|
+
# The current scanner class. Must be overridden in subclasses.
|
95
|
+
|
50
96
|
def scanner_class
|
51
97
|
StringScanner
|
52
98
|
end unless instance_methods(false).map(&:to_s).include?("scanner_class")
|
53
99
|
|
100
|
+
##
|
101
|
+
# Parse the given string.
|
102
|
+
|
54
103
|
def parse str
|
55
104
|
self.ss = scanner_class.new str
|
56
105
|
self.lineno = 1
|
106
|
+
self.start_of_current_line_pos = 0
|
57
107
|
self.state ||= nil
|
58
108
|
|
59
109
|
do_parse
|
60
110
|
end
|
61
111
|
|
112
|
+
##
|
113
|
+
# Read in and parse the file at +path+.
|
114
|
+
|
62
115
|
def parse_file path
|
63
116
|
self.filename = path
|
64
117
|
open path do |f|
|
@@ -66,6 +119,9 @@ class OedipusLex
|
|
66
119
|
end
|
67
120
|
end
|
68
121
|
|
122
|
+
##
|
123
|
+
# The current location in the parse.
|
124
|
+
|
69
125
|
def location
|
70
126
|
[
|
71
127
|
(filename || "<input>"),
|
@@ -74,12 +130,19 @@ class OedipusLex
|
|
74
130
|
].compact.join(":")
|
75
131
|
end
|
76
132
|
|
133
|
+
##
|
134
|
+
# Lex the next token.
|
135
|
+
|
77
136
|
def next_token
|
78
137
|
|
79
138
|
token = nil
|
80
139
|
|
81
140
|
until ss.eos? or token do
|
82
|
-
|
141
|
+
if ss.check(/\n/) then
|
142
|
+
self.lineno += 1
|
143
|
+
# line starts 1 position after the newline
|
144
|
+
self.start_of_current_line_pos = ss.pos + 1
|
145
|
+
end
|
83
146
|
self.old_pos = ss.pos
|
84
147
|
token =
|
85
148
|
case state
|
data/test/test_oedipus_lex.rb
CHANGED
@@ -635,7 +635,8 @@ class TestOedipusLex < Minitest::Test
|
|
635
635
|
|
636
636
|
ruby = generate_lexer src
|
637
637
|
|
638
|
-
exp = ["#
|
638
|
+
exp = ["# frozen_string_literal: true",
|
639
|
+
"# encoding: UTF-8",
|
639
640
|
"#--",
|
640
641
|
"# This file is automatically generated. Do not modify it.",
|
641
642
|
"# Generated by: oedipus_lex version #{OedipusLex::VERSION}.",
|
@@ -644,7 +645,7 @@ class TestOedipusLex < Minitest::Test
|
|
644
645
|
"module X",
|
645
646
|
"module Y"]
|
646
647
|
|
647
|
-
assert_equal exp, ruby.lines.map(&:chomp).first(
|
648
|
+
assert_equal exp, ruby.lines.map(&:chomp).first(9)
|
648
649
|
end
|
649
650
|
|
650
651
|
def test_header_encoding_is_on_top
|
@@ -665,7 +666,8 @@ class TestOedipusLex < Minitest::Test
|
|
665
666
|
|
666
667
|
ruby = generate_lexer src
|
667
668
|
|
668
|
-
exp = ["#
|
669
|
+
exp = ["# frozen_string_literal: true",
|
670
|
+
"# encoding: UTF-8",
|
669
671
|
"#--",
|
670
672
|
"# This file is automatically generated. Do not modify it.",
|
671
673
|
"# Generated by: oedipus_lex version #{OedipusLex::VERSION}.",
|
@@ -674,7 +676,7 @@ class TestOedipusLex < Minitest::Test
|
|
674
676
|
"",
|
675
677
|
"module X"]
|
676
678
|
|
677
|
-
assert_equal exp, ruby.lines.map(&:chomp).first(
|
679
|
+
assert_equal exp, ruby.lines.map(&:chomp).first(9)
|
678
680
|
end
|
679
681
|
|
680
682
|
def test_read_non_existent_file
|
@@ -765,6 +767,19 @@ class TestOedipusLex < Minitest::Test
|
|
765
767
|
assert_match 'ss.scan(/#{X}/)', source
|
766
768
|
end
|
767
769
|
|
770
|
+
def test_parses_empty_regexp
|
771
|
+
source = generate_lexer %q{
|
772
|
+
class Foo
|
773
|
+
rule
|
774
|
+
/\w+/ { @state = :ARG; emit :tFUNCTION_CALL }
|
775
|
+
:ARG /\(/ { @state = nil; emit :tARG_LIST_BEGIN }
|
776
|
+
:ARG // { @state = nil }
|
777
|
+
end
|
778
|
+
}
|
779
|
+
|
780
|
+
assert_match 'ss.skip(//)', source
|
781
|
+
end
|
782
|
+
|
768
783
|
def test_changing_state_during_lexing
|
769
784
|
src = <<-'REX'
|
770
785
|
class Calculator
|
data.tar.gz.sig
CHANGED
@@ -1,2 +1 @@
|
|
1
|
-
|
2
|
-
v/����NQ�ʑhQ<�<��q�z�ڣV���q"�t��ǰ�f:��#wK��x�����xl��;#IN-�koy�
|
1
|
+
l��hm2���+��Z2�k� ��:�� V��Ph)�j܄b��D��XB��d����9����*2yn2�7-#�{��W���A���)�v`=J�w��>�/�y��J?0�T�WKJ�\)�w�lGy�p\�Dsy���
|
metadata
CHANGED
@@ -1,18 +1,18 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: oedipus_lex
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Davis
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain:
|
11
11
|
- |
|
12
12
|
-----BEGIN CERTIFICATE-----
|
13
|
-
|
13
|
+
MIIDPjCCAiagAwIBAgIBBTANBgkqhkiG9w0BAQsFADBFMRMwEQYDVQQDDApyeWFu
|
14
14
|
ZC1ydWJ5MRkwFwYKCZImiZPyLGQBGRYJemVuc3BpZGVyMRMwEQYKCZImiZPyLGQB
|
15
|
-
|
15
|
+
GRYDY29tMB4XDTIwMTIyMjIwMzgzMFoXDTIxMTIyMjIwMzgzMFowRTETMBEGA1UE
|
16
16
|
AwwKcnlhbmQtcnVieTEZMBcGCgmSJomT8ixkARkWCXplbnNwaWRlcjETMBEGCgmS
|
17
17
|
JomT8ixkARkWA2NvbTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBALda
|
18
18
|
b9DCgK+627gPJkB6XfjZ1itoOQvpqH1EXScSaba9/S2VF22VYQbXU1xQXL/WzCkx
|
@@ -20,46 +20,51 @@ cert_chain:
|
|
20
20
|
oOvjtt5P8+GSK9zLzxQP0gVLS/D0FmoE44XuDr3iQkVS2ujU5zZL84mMNqNB1znh
|
21
21
|
GiadM9GHRaDiaxuX0cIUBj19T01mVE2iymf9I6bEsiayK/n6QujtyCbTWsAS9Rqt
|
22
22
|
qhtV7HJxNKuPj/JFH0D2cswvzznE/a5FOYO68g+YCuFi5L8wZuuM8zzdwjrWHqSV
|
23
|
-
gBEfoTEGr7Zii72cx+
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
fO6tdKQc/5RfA8oQEkg8hrxA5PQSz4TOFJGLpFvIapEk6tMruQ0bHgkhr9auXg==
|
23
|
+
gBEfoTEGr7Zii72cx+sCAwEAAaM5MDcwCQYDVR0TBAIwADALBgNVHQ8EBAMCBLAw
|
24
|
+
HQYDVR0OBBYEFEfFe9md/r/tj/Wmwpy+MI8d9k/hMA0GCSqGSIb3DQEBCwUAA4IB
|
25
|
+
AQAE3XRm1YZcCVjAJy5yMZvTOFrS7B2SYErc+0QwmKYbHztTTDY2m5Bii+jhpuxh
|
26
|
+
H+ETcU1z8TUKLpsBUP4kUpIRowkVN1p/jKapV8T3Rbwq+VuYFe+GMKsf8wGZSecG
|
27
|
+
oMQ8DzzauZfbvhe2kDg7G9BBPU0wLQlY25rDcCy9bLnD7R0UK3ONqpwvsI5I7x5X
|
28
|
+
ZIMXR0a9/DG+55mawwdGzCQobDKiSNLK89KK7OcNTALKU0DfgdTkktdgKchzKHqZ
|
29
|
+
d/AHw/kcnU6iuMUoJEcGiJd4gVCTn1l3cDcIvxakGslCA88Jubw0Sqatan0TnC9g
|
30
|
+
KToW560QIey7SPfHWduzFJnV
|
32
31
|
-----END CERTIFICATE-----
|
33
|
-
date:
|
32
|
+
date: 2021-10-27 00:00:00.000000000 Z
|
34
33
|
dependencies:
|
35
34
|
- !ruby/object:Gem::Dependency
|
36
35
|
name: rdoc
|
37
36
|
requirement: !ruby/object:Gem::Requirement
|
38
37
|
requirements:
|
39
|
-
- -
|
38
|
+
- - ">="
|
40
39
|
- !ruby/object:Gem::Version
|
41
40
|
version: '4.0'
|
41
|
+
- - "<"
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '7'
|
42
44
|
type: :development
|
43
45
|
prerelease: false
|
44
46
|
version_requirements: !ruby/object:Gem::Requirement
|
45
47
|
requirements:
|
46
|
-
- -
|
48
|
+
- - ">="
|
47
49
|
- !ruby/object:Gem::Version
|
48
50
|
version: '4.0'
|
51
|
+
- - "<"
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '7'
|
49
54
|
- !ruby/object:Gem::Dependency
|
50
55
|
name: hoe
|
51
56
|
requirement: !ruby/object:Gem::Requirement
|
52
57
|
requirements:
|
53
|
-
- - ~>
|
58
|
+
- - "~>"
|
54
59
|
- !ruby/object:Gem::Version
|
55
|
-
version: '3.
|
60
|
+
version: '3.22'
|
56
61
|
type: :development
|
57
62
|
prerelease: false
|
58
63
|
version_requirements: !ruby/object:Gem::Requirement
|
59
64
|
requirements:
|
60
|
-
- - ~>
|
65
|
+
- - "~>"
|
61
66
|
- !ruby/object:Gem::Version
|
62
|
-
version: '3.
|
67
|
+
version: '3.22'
|
63
68
|
description: |-
|
64
69
|
Oedipus Lex is a lexer generator in the same family as Rexical and
|
65
70
|
Rex. Oedipus Lex is my independent lexer fork of Rexical. Rexical was
|
@@ -93,7 +98,7 @@ extra_rdoc_files:
|
|
93
98
|
- README.rdoc
|
94
99
|
- sample/error1.txt
|
95
100
|
files:
|
96
|
-
- .autotest
|
101
|
+
- ".autotest"
|
97
102
|
- History.rdoc
|
98
103
|
- Manifest.txt
|
99
104
|
- README.rdoc
|
@@ -123,27 +128,30 @@ files:
|
|
123
128
|
homepage: http://github.com/seattlerb/oedipus_lex
|
124
129
|
licenses:
|
125
130
|
- MIT
|
126
|
-
metadata:
|
127
|
-
|
131
|
+
metadata:
|
132
|
+
homepage_uri: http://github.com/seattlerb/oedipus_lex
|
133
|
+
post_install_message:
|
128
134
|
rdoc_options:
|
129
|
-
- --main
|
135
|
+
- "--main"
|
130
136
|
- README.rdoc
|
131
137
|
require_paths:
|
132
138
|
- lib
|
133
139
|
required_ruby_version: !ruby/object:Gem::Requirement
|
134
140
|
requirements:
|
135
|
-
- -
|
141
|
+
- - ">="
|
136
142
|
- !ruby/object:Gem::Version
|
137
|
-
version: '
|
143
|
+
version: '2.4'
|
144
|
+
- - "<"
|
145
|
+
- !ruby/object:Gem::Version
|
146
|
+
version: '4.0'
|
138
147
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
139
148
|
requirements:
|
140
|
-
- -
|
149
|
+
- - ">="
|
141
150
|
- !ruby/object:Gem::Version
|
142
151
|
version: '0'
|
143
152
|
requirements: []
|
144
|
-
|
145
|
-
|
146
|
-
signing_key:
|
153
|
+
rubygems_version: 3.2.16
|
154
|
+
signing_key:
|
147
155
|
specification_version: 4
|
148
156
|
summary: Oedipus Lex is a lexer generator in the same family as Rexical and Rex
|
149
157
|
test_files: []
|
metadata.gz.sig
CHANGED
Binary file
|