ebnf 1.2.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +160 -185
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/bin/ebnf +6 -3
- data/etc/doap.ttl +13 -12
- data/etc/ebnf.ebnf +13 -19
- data/etc/ebnf.html +205 -239
- data/etc/{ebnf.rb → ebnf.ll1.rb} +3 -4
- data/etc/ebnf.ll1.sxp +179 -183
- data/etc/ebnf.peg.rb +98 -0
- data/etc/ebnf.peg.sxp +93 -0
- data/etc/ebnf.sxp +37 -41
- data/etc/sparql.html +1603 -1751
- data/etc/sparql.ll1.sxp +7372 -7372
- data/etc/sparql.peg.rb +532 -0
- data/etc/sparql.peg.sxp +597 -0
- data/etc/sparql.sxp +362 -362
- data/etc/turtle.html +465 -517
- data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
- data/etc/turtle.ll1.sxp +425 -425
- data/etc/turtle.peg.rb +182 -0
- data/etc/turtle.peg.sxp +199 -0
- data/etc/turtle.sxp +101 -101
- data/lib/ebnf.rb +3 -1
- data/lib/ebnf/base.rb +30 -29
- data/lib/ebnf/bnf.rb +1 -26
- data/lib/ebnf/ll1.rb +132 -1
- data/lib/ebnf/ll1/lexer.rb +20 -22
- data/lib/ebnf/ll1/parser.rb +86 -61
- data/lib/ebnf/ll1/scanner.rb +83 -50
- data/lib/ebnf/peg.rb +39 -0
- data/lib/ebnf/peg/parser.rb +535 -0
- data/lib/ebnf/peg/rule.rb +222 -0
- data/lib/ebnf/rule.rb +118 -55
- data/lib/ebnf/terminals.rb +18 -0
- data/lib/ebnf/writer.rb +3 -2
- metadata +29 -6
- data/etc/sparql.rb +0 -45773
data/lib/ebnf.rb
CHANGED
@@ -3,7 +3,9 @@ module EBNF
|
|
3
3
|
autoload :BNF, "ebnf/bnf"
|
4
4
|
autoload :LL1, "ebnf/ll1"
|
5
5
|
autoload :Parser, "ebnf/parser"
|
6
|
+
autoload :PEG, "ebnf/peg"
|
6
7
|
autoload :Rule, "ebnf/rule"
|
8
|
+
autoload :Terminals,"ebnf/terminals"
|
7
9
|
autoload :Writer, "ebnf/writer"
|
8
10
|
autoload :VERSION, "ebnf/version"
|
9
11
|
|
@@ -18,6 +20,6 @@ module EBNF
|
|
18
20
|
# @return [EBNF::Base]
|
19
21
|
# @raise [Exception] on invalid input
|
20
22
|
def self.parse(input, **options)
|
21
|
-
|
23
|
+
::EBNF::Base.new(input, **options)
|
22
24
|
end
|
23
25
|
end
|
data/lib/ebnf/base.rb
CHANGED
@@ -2,7 +2,7 @@ require 'strscan'
|
|
2
2
|
|
3
3
|
# Extended Bakus-Nour Form (EBNF), being the W3C variation is
|
4
4
|
# originaly defined in the
|
5
|
-
# [W3C XML 1.0 Spec](
|
5
|
+
# [W3C XML 1.0 Spec](https://www.w3.org/TR/REC-xml/#sec-notation).
|
6
6
|
#
|
7
7
|
# This version attempts to be less strict than the strict definition
|
8
8
|
# to allow for coloquial variations (such as in the Turtle syntax).
|
@@ -12,8 +12,8 @@ require 'strscan'
|
|
12
12
|
#
|
13
13
|
# Comments include the content between '/*' and '*/'
|
14
14
|
#
|
15
|
-
# @see
|
16
|
-
# @see
|
15
|
+
# @see https://www.w3.org/2000/10/swap/grammar/ebnf2turtle.py
|
16
|
+
# @see https://www.w3.org/2000/10/swap/grammar/ebnf2bnf.n3
|
17
17
|
#
|
18
18
|
# Based on bnf2turtle by Dan Connolly.
|
19
19
|
#
|
@@ -36,7 +36,7 @@ require 'strscan'
|
|
36
36
|
# derived mechanically from the specification.
|
37
37
|
#
|
38
38
|
#
|
39
|
-
# [N3 design note]:
|
39
|
+
# [N3 design note]: https://www.w3.org/DesignIssues/Notation3
|
40
40
|
#
|
41
41
|
# Related Work
|
42
42
|
# ------------
|
@@ -59,12 +59,12 @@ require 'strscan'
|
|
59
59
|
# expression of the grammar in terms of the higher level EBNF
|
60
60
|
# constructs.
|
61
61
|
#
|
62
|
-
# [goal]:
|
63
|
-
# [n3p announcement]:
|
64
|
-
# [Yacker]:
|
65
|
-
# [SPARQL specification]:
|
66
|
-
# [Cwm Release 1.1.0rc1]:
|
67
|
-
# [bnf-rules.n3]:
|
62
|
+
# [goal]: https://www.w3.org/2002/02/mid/1086902566.21030.1479.camel@dirk;list=public-cwm-bugs
|
63
|
+
# [n3p announcement]: https://lists.w3.org/Archives/Public/public-cwm-talk/2004OctDec/0029.html
|
64
|
+
# [Yacker]: https://rubygems/02/26-modules/User/Yacker
|
65
|
+
# [SPARQL specification]: https://www.w3.org/TR/rdf-sparql-query/
|
66
|
+
# [Cwm Release 1.1.0rc1]: https://lists.w3.org/Archives/Public/public-cwm-announce/2005JulSep/0000.html
|
67
|
+
# [bnf-rules.n3]: https://www.w3.org/2000/10/swap/grammar/bnf-rules.n3
|
68
68
|
#
|
69
69
|
# Open Issues and Future Work
|
70
70
|
# ---------------------------
|
@@ -82,8 +82,8 @@ require 'strscan'
|
|
82
82
|
# It would be interesting to corroborate the claim in the SPARQL spec
|
83
83
|
# that the grammar is LL(1) with a mechanical proof based on N3 rules.
|
84
84
|
#
|
85
|
-
# [swap/grammar/bnf]:
|
86
|
-
# [bnf2html.n3]:
|
85
|
+
# [swap/grammar/bnf]: https://www.w3.org/2000/10/swap/grammar/bnf
|
86
|
+
# [bnf2html.n3]: https://www.w3.org/2000/10/swap/grammar/bnf2html.n3
|
87
87
|
#
|
88
88
|
# Background
|
89
89
|
# ----------
|
@@ -93,7 +93,7 @@ require 'strscan'
|
|
93
93
|
# of N3 that maps directly to (and from) the standard XML syntax for
|
94
94
|
# RDF.
|
95
95
|
#
|
96
|
-
# [N3 Primer]:
|
96
|
+
# [N3 Primer]: https://www.w3.org/2000/10/swap/Primer.html
|
97
97
|
#
|
98
98
|
# @author Gregg Kellogg
|
99
99
|
module EBNF
|
@@ -101,6 +101,7 @@ module EBNF
|
|
101
101
|
include BNF
|
102
102
|
include LL1
|
103
103
|
include Parser
|
104
|
+
include PEG
|
104
105
|
|
105
106
|
# Abstract syntax tree from parse
|
106
107
|
#
|
@@ -116,9 +117,9 @@ module EBNF
|
|
116
117
|
# in S-Expressions (similar to SPARQL SSE)
|
117
118
|
#
|
118
119
|
# @param [#read, #to_s] input
|
119
|
-
# @param [
|
120
|
-
# @param [Symbol] :format (:ebnf)
|
120
|
+
# @param [Symbol] format (:ebnf)
|
121
121
|
# Format of input, one of :ebnf, or :sxp
|
122
|
+
# @param [Hash{Symbol => Object}] options
|
122
123
|
# @option options [Boolean, Array] :debug
|
123
124
|
# Output debug information to an array or $stdout.
|
124
125
|
def initialize(input, format: :ebnf, **options)
|
@@ -194,26 +195,26 @@ module EBNF
|
|
194
195
|
# Output Ruby parser files
|
195
196
|
#
|
196
197
|
# @param [IO, StringIO] output
|
197
|
-
# @param [String]
|
198
|
-
# @param [String]
|
199
|
-
def to_ruby(output = $stdout, grammarFile: nil, mod_name: '
|
198
|
+
# @param [String] grammarFile
|
199
|
+
# @param [String] mod_name ('Meta')
|
200
|
+
def to_ruby(output = $stdout, grammarFile: nil, mod_name: 'Meta', **options)
|
200
201
|
unless output == $stdout
|
201
|
-
output.puts "# This file is automatically generated by #{
|
202
|
-
output.puts "#
|
202
|
+
output.puts "# This file is automatically generated by ebnf version #{EBNF::VERSION}"
|
203
|
+
output.puts "# Derived from #{grammarFile}" if grammarFile
|
203
204
|
unless self.errors.empty?
|
204
|
-
output.puts "# Note,
|
205
|
+
output.puts "# Note, grammar has errors, may need to be resolved manually:"
|
205
206
|
#output.puts "# #{pp.conflicts.map{|c| c.join("\n# ")}.join("\n# ")}"
|
206
207
|
end
|
207
208
|
output.puts "module #{mod_name}"
|
208
|
-
output.puts " START = #{self.start.inspect}"
|
209
|
-
|
209
|
+
output.puts " START = #{self.start.inspect}\n" if self.start
|
210
|
+
end
|
211
|
+
|
212
|
+
# Either output LL(1) BRANCH tables or rules for PEG parsing
|
213
|
+
if ast.first.is_a?(EBNF::PEG::Rule)
|
214
|
+
to_ruby_peg(output)
|
215
|
+
else
|
216
|
+
to_ruby_ll1(output)
|
210
217
|
end
|
211
|
-
self.outputTable(output, 'BRANCH', self.branch, 1)
|
212
|
-
self.outputTable(output, 'TERMINALS', self.terminals, 1)
|
213
|
-
self.outputTable(output, 'FIRST', self.first, 1)
|
214
|
-
self.outputTable(output, 'FOLLOW', self.follow, 1)
|
215
|
-
self.outputTable(output, 'CLEANUP', self.cleanup, 1)
|
216
|
-
self.outputTable(output, 'PASS', [self.pass], 1) if self.pass
|
217
218
|
unless output == $stdout
|
218
219
|
output.puts "end"
|
219
220
|
end
|
data/lib/ebnf/bnf.rb
CHANGED
@@ -17,32 +17,7 @@ module EBNF
|
|
17
17
|
new_ast += new_rules
|
18
18
|
end
|
19
19
|
|
20
|
-
|
21
|
-
to_rewrite = {}
|
22
|
-
new_ast.select {|r| r.terminal?}.each do |src_rule|
|
23
|
-
new_ast.select {|r| r.terminal?}.each do |dst_rule|
|
24
|
-
if src_rule.equivalent?(dst_rule) && src_rule != dst_rule
|
25
|
-
debug("make_bnf") {"equivalent rules: #{src_rule.inspect} and #{dst_rule.inspect}"}
|
26
|
-
(to_rewrite[src_rule] ||= []) << dst_rule
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
# Replace references to equivalent rules with canonical rule
|
32
|
-
to_rewrite.each do |src_rule, dst_rules|
|
33
|
-
dst_rules.each do |dst_rule|
|
34
|
-
new_ast.each do |mod_rule|
|
35
|
-
debug("make_bnf") {"rewrite #{mod_rule.inspect} from #{dst_rule.sym} to #{src_rule.sym}"}
|
36
|
-
mod_rule.rewrite(dst_rule, src_rule)
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
# AST now has just rewritten rules
|
42
|
-
compacted_ast = new_ast - to_rewrite.values.flatten.compact
|
43
|
-
|
44
|
-
# Sort AST by number
|
45
|
-
@ast = compacted_ast
|
20
|
+
@ast = new_ast
|
46
21
|
progress("make_bnf") {"End: #{@ast.length} rules"}
|
47
22
|
self
|
48
23
|
end
|
data/lib/ebnf/ll1.rb
CHANGED
@@ -1,4 +1,90 @@
|
|
1
1
|
module EBNF
|
2
|
+
##
|
3
|
+
# This module extends {EBNF::Base} to create metadata including _branch_, [First/Follow][], and other tables which is used by {EBNF::LL1::Parser} to recognize examples of the associated grammar.
|
4
|
+
#
|
5
|
+
### Branch Table
|
6
|
+
#
|
7
|
+
# The Branch table is a hash mapping production rules to a hash relating terminals appearing in input to sequence of productions to follow when the corresponding input terminal is found. This allows either the `seq` primitive, where all terminals map to the same sequence of productions, or the `alt` primitive, where each terminal may map to a different production.
|
8
|
+
#
|
9
|
+
# BRANCH = {
|
10
|
+
# :alt => {
|
11
|
+
# "(" => [:seq, :_alt_1],
|
12
|
+
# :ENUM => [:seq, :_alt_1],
|
13
|
+
# :HEX => [:seq, :_alt_1],
|
14
|
+
# :O_ENUM => [:seq, :_alt_1],
|
15
|
+
# :O_RANGE => [:seq, :_alt_1],
|
16
|
+
# :RANGE => [:seq, :_alt_1],
|
17
|
+
# :STRING1 => [:seq, :_alt_1],
|
18
|
+
# :STRING2 => [:seq, :_alt_1],
|
19
|
+
# :SYMBOL => [:seq, :_alt_1],
|
20
|
+
# },
|
21
|
+
# ...
|
22
|
+
# :declaration => {
|
23
|
+
# "@pass" => [:pass],
|
24
|
+
# "@terminals" => ["@terminals"],
|
25
|
+
# },
|
26
|
+
# ...
|
27
|
+
# }
|
28
|
+
#
|
29
|
+
# In this case the `alt` rule is `seq ('|' seq)*` can happen when any of the specified tokens appears on the input stream. The all cause the same token to be passed to the `seq` rule and follow with `_alt_1`, which handles the `('|' seq)*` portion of the rule, after the first sequence is matched.
|
30
|
+
#
|
31
|
+
# The `declaration` rule is `@terminals' | pass` using the `alt` primitive determining the production to run based on the terminal appearing on the input stream. Eventually, a terminal production is found and the token is consumed.
|
32
|
+
#
|
33
|
+
### First/Follow Table
|
34
|
+
#
|
35
|
+
# The [First/Follow][] table is a hash mapping production rules to the terminals that may proceed or follow the rule. For example:
|
36
|
+
#
|
37
|
+
# FIRST = {
|
38
|
+
# :alt => [
|
39
|
+
# :HEX,
|
40
|
+
# :SYMBOL,
|
41
|
+
# :ENUM,
|
42
|
+
# :O_ENUM,
|
43
|
+
# :RANGE,
|
44
|
+
# :O_RANGE,
|
45
|
+
# :STRING1,
|
46
|
+
# :STRING2,
|
47
|
+
# "("],
|
48
|
+
# ...
|
49
|
+
# }
|
50
|
+
#
|
51
|
+
### Terminals Table
|
52
|
+
#
|
53
|
+
# This table is a simple list of the terminal productions found in the grammar. For example:
|
54
|
+
#
|
55
|
+
# TERMINALS = ["(", ")", "-",
|
56
|
+
# "@pass", "@terminals",
|
57
|
+
# :ENUM, :HEX, :LHS, :O_ENUM, :O_RANGE,:POSTFIX,
|
58
|
+
# :RANGE, :STRING1, :STRING2, :SYMBOL,"|"
|
59
|
+
# ].freeze
|
60
|
+
#
|
61
|
+
### Cleanup Table
|
62
|
+
#
|
63
|
+
# This table identifies productions which used EBNF rules, which are transformed to BNF for actual parsing. This allows the parser, in some cases, to reproduce *star*, *plus*, and *opt* rule matches. For example:
|
64
|
+
#
|
65
|
+
# CLEANUP = {
|
66
|
+
# :_alt_1 => :star,
|
67
|
+
# :_alt_3 => :merge,
|
68
|
+
# :_diff_1 => :opt,
|
69
|
+
# :ebnf => :star,
|
70
|
+
# :_ebnf_2 => :merge,
|
71
|
+
# :_postfix_1 => :opt,
|
72
|
+
# :seq => :plus,
|
73
|
+
# :_seq_1 => :star,
|
74
|
+
# :_seq_2 => :merge,
|
75
|
+
# }.freeze
|
76
|
+
#
|
77
|
+
# In this case the `ebnf` rule was `(declaration | rule)*`. As BNF does not support a star operator, this is decomposed into a set of rules using `alt` and `seq` primitives:
|
78
|
+
#
|
79
|
+
# ebnf ::= _empty _ebnf_2
|
80
|
+
# _ebnf_1 ::= declaration | rule
|
81
|
+
# _ebnf_2 ::= _ebnf_1 ebnf
|
82
|
+
# _ebnf_3 ::= ebnf
|
83
|
+
#
|
84
|
+
# The `_empty` production matches an empty string, so allows for now value. `_ebnf_2` matches `declaration | rule` (using the `alt` primitive) followed by `ebnf`, creating a sequence of zero or more `declaration` or `alt` members.
|
85
|
+
#
|
86
|
+
# [First/Follow]: https://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
|
87
|
+
|
2
88
|
module LL1
|
3
89
|
autoload :Lexer, "ebnf/ll1/lexer"
|
4
90
|
autoload :Parser, "ebnf/ll1/parser"
|
@@ -51,8 +137,40 @@ module EBNF
|
|
51
137
|
##
|
52
138
|
# Create first/follow for each rule using techniques defined for LL(1) parsers.
|
53
139
|
#
|
140
|
+
# This takes rules which have transformed into BNF and adds first/follow and otehr information to the rules to allow the generation of metadata tables used for driving a parser.
|
141
|
+
#
|
142
|
+
# Given an initial rule in EBNF:
|
143
|
+
#
|
144
|
+
# (rule enbf "1" (star declaration rule))
|
145
|
+
#
|
146
|
+
# The BNF transformation becomes:
|
147
|
+
#
|
148
|
+
# (rule ebnf "1" (alt _empty _ebnf_2))
|
149
|
+
# (rule _ebnf_1 "1.1" (alt declaration rule))
|
150
|
+
# (rule _ebnf_2 "1.2" (seq _ebnf_1 ebnf))
|
151
|
+
# (rule _ebnf_3 "1.3" (seq ebnf))
|
152
|
+
#
|
153
|
+
# After running this method, the rules are annotated with first/follow and cleanup rules:
|
154
|
+
#
|
155
|
+
# (rule ebnf "1"
|
156
|
+
# (start #t)
|
157
|
+
# (first "@pass" "@terminals" LHS _eps)
|
158
|
+
# (follow _eof)
|
159
|
+
# (cleanup star)
|
160
|
+
# (alt _empty _ebnf_2))
|
161
|
+
# (rule _ebnf_1 "1.1"
|
162
|
+
# (first "@pass" "@terminals" LHS)
|
163
|
+
# (follow "@pass" "@terminals" LHS _eof)
|
164
|
+
# (alt declaration rule))
|
165
|
+
# (rule _ebnf_2 "1.2"
|
166
|
+
# (first "@pass" "@terminals" LHS)
|
167
|
+
# (follow _eof)
|
168
|
+
# (cleanup merge)
|
169
|
+
# (seq _ebnf_1 ebnf))
|
170
|
+
# (rule _ebnf_3 "1.3" (first "@pass" "@terminals" LHS _eps) (follow _eof) (seq ebnf))
|
171
|
+
#
|
54
172
|
# @return [EBNF] self
|
55
|
-
# @see
|
173
|
+
# @see https://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
|
56
174
|
# @param [Array<Symbol>] starts
|
57
175
|
# Set of symbols which are start rules
|
58
176
|
def first_follow(*starts)
|
@@ -276,6 +394,19 @@ module EBNF
|
|
276
394
|
end
|
277
395
|
end
|
278
396
|
|
397
|
+
##
|
398
|
+
# Output Ruby parser files for LL(1) parsing
|
399
|
+
#
|
400
|
+
# @param [IO, StringIO] output
|
401
|
+
def to_ruby_ll1(output, **options)
|
402
|
+
self.outputTable(output, 'BRANCH', self.branch, 1)
|
403
|
+
self.outputTable(output, 'TERMINALS', self.terminals, 1)
|
404
|
+
self.outputTable(output, 'FIRST', self.first, 1)
|
405
|
+
self.outputTable(output, 'FOLLOW', self.follow, 1)
|
406
|
+
self.outputTable(output, 'CLEANUP', self.cleanup, 1)
|
407
|
+
self.outputTable(output, 'PASS', [self.pass], 1) if self.pass
|
408
|
+
end
|
409
|
+
|
279
410
|
private
|
280
411
|
def do_production(lhs)
|
281
412
|
rule = find_rule(lhs)
|
data/lib/ebnf/ll1/lexer.rb
CHANGED
@@ -29,7 +29,7 @@ module EBNF::LL1
|
|
29
29
|
# warn error.inspect
|
30
30
|
# end
|
31
31
|
#
|
32
|
-
# @see
|
32
|
+
# @see https://en.wikipedia.org/wiki/Lexical_analysis
|
33
33
|
class Lexer
|
34
34
|
include Enumerable
|
35
35
|
|
@@ -43,10 +43,10 @@ module EBNF::LL1
|
|
43
43
|
"\\'" => '\'', # \u0027 (apostrophe-quote, single quote mark)
|
44
44
|
'\\\\' => '\\' # \u005C (backslash)
|
45
45
|
}.freeze
|
46
|
-
ESCAPE_CHAR4 = /\\u(?:[0-9A-Fa-f]{4,4})
|
47
|
-
ESCAPE_CHAR8 = /\\U(?:[0-9A-Fa-f]{8,8})
|
48
|
-
ECHAR = /\\./
|
49
|
-
UCHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}
|
46
|
+
ESCAPE_CHAR4 = /\\u(?:[0-9A-Fa-f]{4,4})/u.freeze # \uXXXX
|
47
|
+
ESCAPE_CHAR8 = /\\U(?:[0-9A-Fa-f]{8,8})/u.freeze # \UXXXXXXXX
|
48
|
+
ECHAR = /\\./u.freeze # More liberal unescaping
|
49
|
+
UCHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}/n.freeze
|
50
50
|
|
51
51
|
##
|
52
52
|
# @return [Regexp] defines whitespace, including comments, otherwise whitespace must be explicit in terminals
|
@@ -59,7 +59,7 @@ module EBNF::LL1
|
|
59
59
|
#
|
60
60
|
# @param [String] string
|
61
61
|
# @return [String]
|
62
|
-
# @see
|
62
|
+
# @see https://www.w3.org/TR/rdf-sparql-query/#codepointEscape
|
63
63
|
def self.unescape_codepoints(string)
|
64
64
|
string = string.dup
|
65
65
|
string.force_encoding(Encoding::ASCII_8BIT) if string.respond_to?(:force_encoding)
|
@@ -81,7 +81,7 @@ module EBNF::LL1
|
|
81
81
|
#
|
82
82
|
# @param [String] input
|
83
83
|
# @return [String]
|
84
|
-
# @see
|
84
|
+
# @see https://www.w3.org/TR/rdf-sparql-query/#grammarEscapes
|
85
85
|
def self.unescape_string(input)
|
86
86
|
input.gsub(ECHAR) { |escaped| ESCAPE_CHARS[escaped] || escaped[1..-1]}
|
87
87
|
end
|
@@ -131,7 +131,6 @@ module EBNF::LL1
|
|
131
131
|
|
132
132
|
raise Error, "Terminal patterns not defined" unless @terminals && @terminals.length > 0
|
133
133
|
|
134
|
-
@lineno = 1
|
135
134
|
@scanner = Scanner.new(input, **options)
|
136
135
|
end
|
137
136
|
|
@@ -147,12 +146,6 @@ module EBNF::LL1
|
|
147
146
|
# @return [String]
|
148
147
|
attr_accessor :input
|
149
148
|
|
150
|
-
##
|
151
|
-
# The current line number (zero-based).
|
152
|
-
#
|
153
|
-
# @return [Integer]
|
154
|
-
attr_reader :lineno
|
155
|
-
|
156
149
|
##
|
157
150
|
# Returns `true` if the input string is lexically valid.
|
158
151
|
#
|
@@ -194,7 +187,7 @@ module EBNF::LL1
|
|
194
187
|
|
195
188
|
@first ||= begin
|
196
189
|
{} while !scanner.eos? && skip_whitespace
|
197
|
-
return
|
190
|
+
return nil if scanner.eos?
|
198
191
|
|
199
192
|
token = match_token(*types)
|
200
193
|
|
@@ -233,7 +226,7 @@ module EBNF::LL1
|
|
233
226
|
# @return [Token]
|
234
227
|
def recover(*types)
|
235
228
|
until scanner.eos? || tok = match_token(*types)
|
236
|
-
if scanner.skip_until(@whitespace || /\s
|
229
|
+
if scanner.skip_until(@whitespace || /\s+/m).nil? # Skip past current "token"
|
237
230
|
# No whitespace at the end, must be and end of string
|
238
231
|
scanner.terminate
|
239
232
|
else
|
@@ -243,6 +236,14 @@ module EBNF::LL1
|
|
243
236
|
scanner.unscan if tok
|
244
237
|
first
|
245
238
|
end
|
239
|
+
|
240
|
+
##
|
241
|
+
# The current line number (one-based).
|
242
|
+
#
|
243
|
+
# @return [Integer]
|
244
|
+
def lineno
|
245
|
+
scanner.lineno
|
246
|
+
end
|
246
247
|
protected
|
247
248
|
|
248
249
|
# @return [StringScanner]
|
@@ -253,9 +254,7 @@ module EBNF::LL1
|
|
253
254
|
def skip_whitespace
|
254
255
|
# skip all white space, but keep track of the current line number
|
255
256
|
while @whitespace && !scanner.eos?
|
256
|
-
|
257
|
-
@lineno += matched.count("\n")
|
258
|
-
else
|
257
|
+
unless scanner.scan(@whitespace)
|
259
258
|
return
|
260
259
|
end
|
261
260
|
end
|
@@ -281,7 +280,6 @@ module EBNF::LL1
|
|
281
280
|
if matched = scanner.scan(term.regexp)
|
282
281
|
#STDERR.puts " matched #{term.type.inspect}: #{matched.inspect}"
|
283
282
|
tok = token(term.type, term.canonicalize(matched))
|
284
|
-
@lineno += matched.count("\n")
|
285
283
|
return tok
|
286
284
|
end
|
287
285
|
end
|
@@ -372,7 +370,7 @@ module EBNF::LL1
|
|
372
370
|
# token.type #=> :LANGTAG
|
373
371
|
# token.value #=> "en"
|
374
372
|
#
|
375
|
-
# @see
|
373
|
+
# @see https://en.wikipedia.org/wiki/Lexical_analysis#Token
|
376
374
|
class Token
|
377
375
|
##
|
378
376
|
# The token's symbol type.
|
@@ -493,7 +491,7 @@ module EBNF::LL1
|
|
493
491
|
# "invalid token '%' on line 10",
|
494
492
|
# input: query, token: '%', lineno: 9)
|
495
493
|
#
|
496
|
-
# @see
|
494
|
+
# @see https://ruby-doc.org/core/classes/StandardError.html
|
497
495
|
class Error < StandardError
|
498
496
|
##
|
499
497
|
# The input string associated with the error.
|