ebnf 1.2.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +160 -185
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/bin/ebnf +6 -3
- data/etc/doap.ttl +13 -12
- data/etc/ebnf.ebnf +13 -19
- data/etc/ebnf.html +205 -239
- data/etc/{ebnf.rb → ebnf.ll1.rb} +3 -4
- data/etc/ebnf.ll1.sxp +179 -183
- data/etc/ebnf.peg.rb +98 -0
- data/etc/ebnf.peg.sxp +93 -0
- data/etc/ebnf.sxp +37 -41
- data/etc/sparql.html +1603 -1751
- data/etc/sparql.ll1.sxp +7372 -7372
- data/etc/sparql.peg.rb +532 -0
- data/etc/sparql.peg.sxp +597 -0
- data/etc/sparql.sxp +362 -362
- data/etc/turtle.html +465 -517
- data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
- data/etc/turtle.ll1.sxp +425 -425
- data/etc/turtle.peg.rb +182 -0
- data/etc/turtle.peg.sxp +199 -0
- data/etc/turtle.sxp +101 -101
- data/lib/ebnf.rb +3 -1
- data/lib/ebnf/base.rb +30 -29
- data/lib/ebnf/bnf.rb +1 -26
- data/lib/ebnf/ll1.rb +132 -1
- data/lib/ebnf/ll1/lexer.rb +20 -22
- data/lib/ebnf/ll1/parser.rb +86 -61
- data/lib/ebnf/ll1/scanner.rb +83 -50
- data/lib/ebnf/peg.rb +39 -0
- data/lib/ebnf/peg/parser.rb +535 -0
- data/lib/ebnf/peg/rule.rb +222 -0
- data/lib/ebnf/rule.rb +118 -55
- data/lib/ebnf/terminals.rb +18 -0
- data/lib/ebnf/writer.rb +3 -2
- metadata +29 -6
- data/etc/sparql.rb +0 -45773
data/lib/ebnf.rb
CHANGED
@@ -3,7 +3,9 @@ module EBNF
|
|
3
3
|
autoload :BNF, "ebnf/bnf"
|
4
4
|
autoload :LL1, "ebnf/ll1"
|
5
5
|
autoload :Parser, "ebnf/parser"
|
6
|
+
autoload :PEG, "ebnf/peg"
|
6
7
|
autoload :Rule, "ebnf/rule"
|
8
|
+
autoload :Terminals,"ebnf/terminals"
|
7
9
|
autoload :Writer, "ebnf/writer"
|
8
10
|
autoload :VERSION, "ebnf/version"
|
9
11
|
|
@@ -18,6 +20,6 @@ module EBNF
|
|
18
20
|
# @return [EBNF::Base]
|
19
21
|
# @raise [Exception] on invalid input
|
20
22
|
def self.parse(input, **options)
|
21
|
-
|
23
|
+
::EBNF::Base.new(input, **options)
|
22
24
|
end
|
23
25
|
end
|
data/lib/ebnf/base.rb
CHANGED
@@ -2,7 +2,7 @@ require 'strscan'
|
|
2
2
|
|
3
3
|
# Extended Bakus-Nour Form (EBNF), being the W3C variation is
|
4
4
|
# originaly defined in the
|
5
|
-
# [W3C XML 1.0 Spec](
|
5
|
+
# [W3C XML 1.0 Spec](https://www.w3.org/TR/REC-xml/#sec-notation).
|
6
6
|
#
|
7
7
|
# This version attempts to be less strict than the strict definition
|
8
8
|
# to allow for coloquial variations (such as in the Turtle syntax).
|
@@ -12,8 +12,8 @@ require 'strscan'
|
|
12
12
|
#
|
13
13
|
# Comments include the content between '/*' and '*/'
|
14
14
|
#
|
15
|
-
# @see
|
16
|
-
# @see
|
15
|
+
# @see https://www.w3.org/2000/10/swap/grammar/ebnf2turtle.py
|
16
|
+
# @see https://www.w3.org/2000/10/swap/grammar/ebnf2bnf.n3
|
17
17
|
#
|
18
18
|
# Based on bnf2turtle by Dan Connolly.
|
19
19
|
#
|
@@ -36,7 +36,7 @@ require 'strscan'
|
|
36
36
|
# derived mechanically from the specification.
|
37
37
|
#
|
38
38
|
#
|
39
|
-
# [N3 design note]:
|
39
|
+
# [N3 design note]: https://www.w3.org/DesignIssues/Notation3
|
40
40
|
#
|
41
41
|
# Related Work
|
42
42
|
# ------------
|
@@ -59,12 +59,12 @@ require 'strscan'
|
|
59
59
|
# expression of the grammar in terms of the higher level EBNF
|
60
60
|
# constructs.
|
61
61
|
#
|
62
|
-
# [goal]:
|
63
|
-
# [n3p announcement]:
|
64
|
-
# [Yacker]:
|
65
|
-
# [SPARQL specification]:
|
66
|
-
# [Cwm Release 1.1.0rc1]:
|
67
|
-
# [bnf-rules.n3]:
|
62
|
+
# [goal]: https://www.w3.org/2002/02/mid/1086902566.21030.1479.camel@dirk;list=public-cwm-bugs
|
63
|
+
# [n3p announcement]: https://lists.w3.org/Archives/Public/public-cwm-talk/2004OctDec/0029.html
|
64
|
+
# [Yacker]: https://rubygems/02/26-modules/User/Yacker
|
65
|
+
# [SPARQL specification]: https://www.w3.org/TR/rdf-sparql-query/
|
66
|
+
# [Cwm Release 1.1.0rc1]: https://lists.w3.org/Archives/Public/public-cwm-announce/2005JulSep/0000.html
|
67
|
+
# [bnf-rules.n3]: https://www.w3.org/2000/10/swap/grammar/bnf-rules.n3
|
68
68
|
#
|
69
69
|
# Open Issues and Future Work
|
70
70
|
# ---------------------------
|
@@ -82,8 +82,8 @@ require 'strscan'
|
|
82
82
|
# It would be interesting to corroborate the claim in the SPARQL spec
|
83
83
|
# that the grammar is LL(1) with a mechanical proof based on N3 rules.
|
84
84
|
#
|
85
|
-
# [swap/grammar/bnf]:
|
86
|
-
# [bnf2html.n3]:
|
85
|
+
# [swap/grammar/bnf]: https://www.w3.org/2000/10/swap/grammar/bnf
|
86
|
+
# [bnf2html.n3]: https://www.w3.org/2000/10/swap/grammar/bnf2html.n3
|
87
87
|
#
|
88
88
|
# Background
|
89
89
|
# ----------
|
@@ -93,7 +93,7 @@ require 'strscan'
|
|
93
93
|
# of N3 that maps directly to (and from) the standard XML syntax for
|
94
94
|
# RDF.
|
95
95
|
#
|
96
|
-
# [N3 Primer]:
|
96
|
+
# [N3 Primer]: https://www.w3.org/2000/10/swap/Primer.html
|
97
97
|
#
|
98
98
|
# @author Gregg Kellogg
|
99
99
|
module EBNF
|
@@ -101,6 +101,7 @@ module EBNF
|
|
101
101
|
include BNF
|
102
102
|
include LL1
|
103
103
|
include Parser
|
104
|
+
include PEG
|
104
105
|
|
105
106
|
# Abstract syntax tree from parse
|
106
107
|
#
|
@@ -116,9 +117,9 @@ module EBNF
|
|
116
117
|
# in S-Expressions (similar to SPARQL SSE)
|
117
118
|
#
|
118
119
|
# @param [#read, #to_s] input
|
119
|
-
# @param [
|
120
|
-
# @param [Symbol] :format (:ebnf)
|
120
|
+
# @param [Symbol] format (:ebnf)
|
121
121
|
# Format of input, one of :ebnf, or :sxp
|
122
|
+
# @param [Hash{Symbol => Object}] options
|
122
123
|
# @option options [Boolean, Array] :debug
|
123
124
|
# Output debug information to an array or $stdout.
|
124
125
|
def initialize(input, format: :ebnf, **options)
|
@@ -194,26 +195,26 @@ module EBNF
|
|
194
195
|
# Output Ruby parser files
|
195
196
|
#
|
196
197
|
# @param [IO, StringIO] output
|
197
|
-
# @param [String]
|
198
|
-
# @param [String]
|
199
|
-
def to_ruby(output = $stdout, grammarFile: nil, mod_name: '
|
198
|
+
# @param [String] grammarFile
|
199
|
+
# @param [String] mod_name ('Meta')
|
200
|
+
def to_ruby(output = $stdout, grammarFile: nil, mod_name: 'Meta', **options)
|
200
201
|
unless output == $stdout
|
201
|
-
output.puts "# This file is automatically generated by #{
|
202
|
-
output.puts "#
|
202
|
+
output.puts "# This file is automatically generated by ebnf version #{EBNF::VERSION}"
|
203
|
+
output.puts "# Derived from #{grammarFile}" if grammarFile
|
203
204
|
unless self.errors.empty?
|
204
|
-
output.puts "# Note,
|
205
|
+
output.puts "# Note, grammar has errors, may need to be resolved manually:"
|
205
206
|
#output.puts "# #{pp.conflicts.map{|c| c.join("\n# ")}.join("\n# ")}"
|
206
207
|
end
|
207
208
|
output.puts "module #{mod_name}"
|
208
|
-
output.puts " START = #{self.start.inspect}"
|
209
|
-
|
209
|
+
output.puts " START = #{self.start.inspect}\n" if self.start
|
210
|
+
end
|
211
|
+
|
212
|
+
# Either output LL(1) BRANCH tables or rules for PEG parsing
|
213
|
+
if ast.first.is_a?(EBNF::PEG::Rule)
|
214
|
+
to_ruby_peg(output)
|
215
|
+
else
|
216
|
+
to_ruby_ll1(output)
|
210
217
|
end
|
211
|
-
self.outputTable(output, 'BRANCH', self.branch, 1)
|
212
|
-
self.outputTable(output, 'TERMINALS', self.terminals, 1)
|
213
|
-
self.outputTable(output, 'FIRST', self.first, 1)
|
214
|
-
self.outputTable(output, 'FOLLOW', self.follow, 1)
|
215
|
-
self.outputTable(output, 'CLEANUP', self.cleanup, 1)
|
216
|
-
self.outputTable(output, 'PASS', [self.pass], 1) if self.pass
|
217
218
|
unless output == $stdout
|
218
219
|
output.puts "end"
|
219
220
|
end
|
data/lib/ebnf/bnf.rb
CHANGED
@@ -17,32 +17,7 @@ module EBNF
|
|
17
17
|
new_ast += new_rules
|
18
18
|
end
|
19
19
|
|
20
|
-
|
21
|
-
to_rewrite = {}
|
22
|
-
new_ast.select {|r| r.terminal?}.each do |src_rule|
|
23
|
-
new_ast.select {|r| r.terminal?}.each do |dst_rule|
|
24
|
-
if src_rule.equivalent?(dst_rule) && src_rule != dst_rule
|
25
|
-
debug("make_bnf") {"equivalent rules: #{src_rule.inspect} and #{dst_rule.inspect}"}
|
26
|
-
(to_rewrite[src_rule] ||= []) << dst_rule
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
# Replace references to equivalent rules with canonical rule
|
32
|
-
to_rewrite.each do |src_rule, dst_rules|
|
33
|
-
dst_rules.each do |dst_rule|
|
34
|
-
new_ast.each do |mod_rule|
|
35
|
-
debug("make_bnf") {"rewrite #{mod_rule.inspect} from #{dst_rule.sym} to #{src_rule.sym}"}
|
36
|
-
mod_rule.rewrite(dst_rule, src_rule)
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
# AST now has just rewritten rules
|
42
|
-
compacted_ast = new_ast - to_rewrite.values.flatten.compact
|
43
|
-
|
44
|
-
# Sort AST by number
|
45
|
-
@ast = compacted_ast
|
20
|
+
@ast = new_ast
|
46
21
|
progress("make_bnf") {"End: #{@ast.length} rules"}
|
47
22
|
self
|
48
23
|
end
|
data/lib/ebnf/ll1.rb
CHANGED
@@ -1,4 +1,90 @@
|
|
1
1
|
module EBNF
|
2
|
+
##
|
3
|
+
# This module extends {EBNF::Base} to create metadata including _branch_, [First/Follow][], and other tables which is used by {EBNF::LL1::Parser} to recognize examples of the associated grammar.
|
4
|
+
#
|
5
|
+
### Branch Table
|
6
|
+
#
|
7
|
+
# The Branch table is a hash mapping production rules to a hash relating terminals appearing in input to sequence of productions to follow when the corresponding input terminal is found. This allows either the `seq` primitive, where all terminals map to the same sequence of productions, or the `alt` primitive, where each terminal may map to a different production.
|
8
|
+
#
|
9
|
+
# BRANCH = {
|
10
|
+
# :alt => {
|
11
|
+
# "(" => [:seq, :_alt_1],
|
12
|
+
# :ENUM => [:seq, :_alt_1],
|
13
|
+
# :HEX => [:seq, :_alt_1],
|
14
|
+
# :O_ENUM => [:seq, :_alt_1],
|
15
|
+
# :O_RANGE => [:seq, :_alt_1],
|
16
|
+
# :RANGE => [:seq, :_alt_1],
|
17
|
+
# :STRING1 => [:seq, :_alt_1],
|
18
|
+
# :STRING2 => [:seq, :_alt_1],
|
19
|
+
# :SYMBOL => [:seq, :_alt_1],
|
20
|
+
# },
|
21
|
+
# ...
|
22
|
+
# :declaration => {
|
23
|
+
# "@pass" => [:pass],
|
24
|
+
# "@terminals" => ["@terminals"],
|
25
|
+
# },
|
26
|
+
# ...
|
27
|
+
# }
|
28
|
+
#
|
29
|
+
# In this case the `alt` rule is `seq ('|' seq)*` can happen when any of the specified tokens appears on the input stream. The all cause the same token to be passed to the `seq` rule and follow with `_alt_1`, which handles the `('|' seq)*` portion of the rule, after the first sequence is matched.
|
30
|
+
#
|
31
|
+
# The `declaration` rule is `@terminals' | pass` using the `alt` primitive determining the production to run based on the terminal appearing on the input stream. Eventually, a terminal production is found and the token is consumed.
|
32
|
+
#
|
33
|
+
### First/Follow Table
|
34
|
+
#
|
35
|
+
# The [First/Follow][] table is a hash mapping production rules to the terminals that may proceed or follow the rule. For example:
|
36
|
+
#
|
37
|
+
# FIRST = {
|
38
|
+
# :alt => [
|
39
|
+
# :HEX,
|
40
|
+
# :SYMBOL,
|
41
|
+
# :ENUM,
|
42
|
+
# :O_ENUM,
|
43
|
+
# :RANGE,
|
44
|
+
# :O_RANGE,
|
45
|
+
# :STRING1,
|
46
|
+
# :STRING2,
|
47
|
+
# "("],
|
48
|
+
# ...
|
49
|
+
# }
|
50
|
+
#
|
51
|
+
### Terminals Table
|
52
|
+
#
|
53
|
+
# This table is a simple list of the terminal productions found in the grammar. For example:
|
54
|
+
#
|
55
|
+
# TERMINALS = ["(", ")", "-",
|
56
|
+
# "@pass", "@terminals",
|
57
|
+
# :ENUM, :HEX, :LHS, :O_ENUM, :O_RANGE,:POSTFIX,
|
58
|
+
# :RANGE, :STRING1, :STRING2, :SYMBOL,"|"
|
59
|
+
# ].freeze
|
60
|
+
#
|
61
|
+
### Cleanup Table
|
62
|
+
#
|
63
|
+
# This table identifies productions which used EBNF rules, which are transformed to BNF for actual parsing. This allows the parser, in some cases, to reproduce *star*, *plus*, and *opt* rule matches. For example:
|
64
|
+
#
|
65
|
+
# CLEANUP = {
|
66
|
+
# :_alt_1 => :star,
|
67
|
+
# :_alt_3 => :merge,
|
68
|
+
# :_diff_1 => :opt,
|
69
|
+
# :ebnf => :star,
|
70
|
+
# :_ebnf_2 => :merge,
|
71
|
+
# :_postfix_1 => :opt,
|
72
|
+
# :seq => :plus,
|
73
|
+
# :_seq_1 => :star,
|
74
|
+
# :_seq_2 => :merge,
|
75
|
+
# }.freeze
|
76
|
+
#
|
77
|
+
# In this case the `ebnf` rule was `(declaration | rule)*`. As BNF does not support a star operator, this is decomposed into a set of rules using `alt` and `seq` primitives:
|
78
|
+
#
|
79
|
+
# ebnf ::= _empty _ebnf_2
|
80
|
+
# _ebnf_1 ::= declaration | rule
|
81
|
+
# _ebnf_2 ::= _ebnf_1 ebnf
|
82
|
+
# _ebnf_3 ::= ebnf
|
83
|
+
#
|
84
|
+
# The `_empty` production matches an empty string, so allows for now value. `_ebnf_2` matches `declaration | rule` (using the `alt` primitive) followed by `ebnf`, creating a sequence of zero or more `declaration` or `alt` members.
|
85
|
+
#
|
86
|
+
# [First/Follow]: https://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
|
87
|
+
|
2
88
|
module LL1
|
3
89
|
autoload :Lexer, "ebnf/ll1/lexer"
|
4
90
|
autoload :Parser, "ebnf/ll1/parser"
|
@@ -51,8 +137,40 @@ module EBNF
|
|
51
137
|
##
|
52
138
|
# Create first/follow for each rule using techniques defined for LL(1) parsers.
|
53
139
|
#
|
140
|
+
# This takes rules which have transformed into BNF and adds first/follow and otehr information to the rules to allow the generation of metadata tables used for driving a parser.
|
141
|
+
#
|
142
|
+
# Given an initial rule in EBNF:
|
143
|
+
#
|
144
|
+
# (rule enbf "1" (star declaration rule))
|
145
|
+
#
|
146
|
+
# The BNF transformation becomes:
|
147
|
+
#
|
148
|
+
# (rule ebnf "1" (alt _empty _ebnf_2))
|
149
|
+
# (rule _ebnf_1 "1.1" (alt declaration rule))
|
150
|
+
# (rule _ebnf_2 "1.2" (seq _ebnf_1 ebnf))
|
151
|
+
# (rule _ebnf_3 "1.3" (seq ebnf))
|
152
|
+
#
|
153
|
+
# After running this method, the rules are annotated with first/follow and cleanup rules:
|
154
|
+
#
|
155
|
+
# (rule ebnf "1"
|
156
|
+
# (start #t)
|
157
|
+
# (first "@pass" "@terminals" LHS _eps)
|
158
|
+
# (follow _eof)
|
159
|
+
# (cleanup star)
|
160
|
+
# (alt _empty _ebnf_2))
|
161
|
+
# (rule _ebnf_1 "1.1"
|
162
|
+
# (first "@pass" "@terminals" LHS)
|
163
|
+
# (follow "@pass" "@terminals" LHS _eof)
|
164
|
+
# (alt declaration rule))
|
165
|
+
# (rule _ebnf_2 "1.2"
|
166
|
+
# (first "@pass" "@terminals" LHS)
|
167
|
+
# (follow _eof)
|
168
|
+
# (cleanup merge)
|
169
|
+
# (seq _ebnf_1 ebnf))
|
170
|
+
# (rule _ebnf_3 "1.3" (first "@pass" "@terminals" LHS _eps) (follow _eof) (seq ebnf))
|
171
|
+
#
|
54
172
|
# @return [EBNF] self
|
55
|
-
# @see
|
173
|
+
# @see https://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
|
56
174
|
# @param [Array<Symbol>] starts
|
57
175
|
# Set of symbols which are start rules
|
58
176
|
def first_follow(*starts)
|
@@ -276,6 +394,19 @@ module EBNF
|
|
276
394
|
end
|
277
395
|
end
|
278
396
|
|
397
|
+
##
|
398
|
+
# Output Ruby parser files for LL(1) parsing
|
399
|
+
#
|
400
|
+
# @param [IO, StringIO] output
|
401
|
+
def to_ruby_ll1(output, **options)
|
402
|
+
self.outputTable(output, 'BRANCH', self.branch, 1)
|
403
|
+
self.outputTable(output, 'TERMINALS', self.terminals, 1)
|
404
|
+
self.outputTable(output, 'FIRST', self.first, 1)
|
405
|
+
self.outputTable(output, 'FOLLOW', self.follow, 1)
|
406
|
+
self.outputTable(output, 'CLEANUP', self.cleanup, 1)
|
407
|
+
self.outputTable(output, 'PASS', [self.pass], 1) if self.pass
|
408
|
+
end
|
409
|
+
|
279
410
|
private
|
280
411
|
def do_production(lhs)
|
281
412
|
rule = find_rule(lhs)
|
data/lib/ebnf/ll1/lexer.rb
CHANGED
@@ -29,7 +29,7 @@ module EBNF::LL1
|
|
29
29
|
# warn error.inspect
|
30
30
|
# end
|
31
31
|
#
|
32
|
-
# @see
|
32
|
+
# @see https://en.wikipedia.org/wiki/Lexical_analysis
|
33
33
|
class Lexer
|
34
34
|
include Enumerable
|
35
35
|
|
@@ -43,10 +43,10 @@ module EBNF::LL1
|
|
43
43
|
"\\'" => '\'', # \u0027 (apostrophe-quote, single quote mark)
|
44
44
|
'\\\\' => '\\' # \u005C (backslash)
|
45
45
|
}.freeze
|
46
|
-
ESCAPE_CHAR4 = /\\u(?:[0-9A-Fa-f]{4,4})
|
47
|
-
ESCAPE_CHAR8 = /\\U(?:[0-9A-Fa-f]{8,8})
|
48
|
-
ECHAR = /\\./
|
49
|
-
UCHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}
|
46
|
+
ESCAPE_CHAR4 = /\\u(?:[0-9A-Fa-f]{4,4})/u.freeze # \uXXXX
|
47
|
+
ESCAPE_CHAR8 = /\\U(?:[0-9A-Fa-f]{8,8})/u.freeze # \UXXXXXXXX
|
48
|
+
ECHAR = /\\./u.freeze # More liberal unescaping
|
49
|
+
UCHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}/n.freeze
|
50
50
|
|
51
51
|
##
|
52
52
|
# @return [Regexp] defines whitespace, including comments, otherwise whitespace must be explicit in terminals
|
@@ -59,7 +59,7 @@ module EBNF::LL1
|
|
59
59
|
#
|
60
60
|
# @param [String] string
|
61
61
|
# @return [String]
|
62
|
-
# @see
|
62
|
+
# @see https://www.w3.org/TR/rdf-sparql-query/#codepointEscape
|
63
63
|
def self.unescape_codepoints(string)
|
64
64
|
string = string.dup
|
65
65
|
string.force_encoding(Encoding::ASCII_8BIT) if string.respond_to?(:force_encoding)
|
@@ -81,7 +81,7 @@ module EBNF::LL1
|
|
81
81
|
#
|
82
82
|
# @param [String] input
|
83
83
|
# @return [String]
|
84
|
-
# @see
|
84
|
+
# @see https://www.w3.org/TR/rdf-sparql-query/#grammarEscapes
|
85
85
|
def self.unescape_string(input)
|
86
86
|
input.gsub(ECHAR) { |escaped| ESCAPE_CHARS[escaped] || escaped[1..-1]}
|
87
87
|
end
|
@@ -131,7 +131,6 @@ module EBNF::LL1
|
|
131
131
|
|
132
132
|
raise Error, "Terminal patterns not defined" unless @terminals && @terminals.length > 0
|
133
133
|
|
134
|
-
@lineno = 1
|
135
134
|
@scanner = Scanner.new(input, **options)
|
136
135
|
end
|
137
136
|
|
@@ -147,12 +146,6 @@ module EBNF::LL1
|
|
147
146
|
# @return [String]
|
148
147
|
attr_accessor :input
|
149
148
|
|
150
|
-
##
|
151
|
-
# The current line number (zero-based).
|
152
|
-
#
|
153
|
-
# @return [Integer]
|
154
|
-
attr_reader :lineno
|
155
|
-
|
156
149
|
##
|
157
150
|
# Returns `true` if the input string is lexically valid.
|
158
151
|
#
|
@@ -194,7 +187,7 @@ module EBNF::LL1
|
|
194
187
|
|
195
188
|
@first ||= begin
|
196
189
|
{} while !scanner.eos? && skip_whitespace
|
197
|
-
return
|
190
|
+
return nil if scanner.eos?
|
198
191
|
|
199
192
|
token = match_token(*types)
|
200
193
|
|
@@ -233,7 +226,7 @@ module EBNF::LL1
|
|
233
226
|
# @return [Token]
|
234
227
|
def recover(*types)
|
235
228
|
until scanner.eos? || tok = match_token(*types)
|
236
|
-
if scanner.skip_until(@whitespace || /\s
|
229
|
+
if scanner.skip_until(@whitespace || /\s+/m).nil? # Skip past current "token"
|
237
230
|
# No whitespace at the end, must be and end of string
|
238
231
|
scanner.terminate
|
239
232
|
else
|
@@ -243,6 +236,14 @@ module EBNF::LL1
|
|
243
236
|
scanner.unscan if tok
|
244
237
|
first
|
245
238
|
end
|
239
|
+
|
240
|
+
##
|
241
|
+
# The current line number (one-based).
|
242
|
+
#
|
243
|
+
# @return [Integer]
|
244
|
+
def lineno
|
245
|
+
scanner.lineno
|
246
|
+
end
|
246
247
|
protected
|
247
248
|
|
248
249
|
# @return [StringScanner]
|
@@ -253,9 +254,7 @@ module EBNF::LL1
|
|
253
254
|
def skip_whitespace
|
254
255
|
# skip all white space, but keep track of the current line number
|
255
256
|
while @whitespace && !scanner.eos?
|
256
|
-
|
257
|
-
@lineno += matched.count("\n")
|
258
|
-
else
|
257
|
+
unless scanner.scan(@whitespace)
|
259
258
|
return
|
260
259
|
end
|
261
260
|
end
|
@@ -281,7 +280,6 @@ module EBNF::LL1
|
|
281
280
|
if matched = scanner.scan(term.regexp)
|
282
281
|
#STDERR.puts " matched #{term.type.inspect}: #{matched.inspect}"
|
283
282
|
tok = token(term.type, term.canonicalize(matched))
|
284
|
-
@lineno += matched.count("\n")
|
285
283
|
return tok
|
286
284
|
end
|
287
285
|
end
|
@@ -372,7 +370,7 @@ module EBNF::LL1
|
|
372
370
|
# token.type #=> :LANGTAG
|
373
371
|
# token.value #=> "en"
|
374
372
|
#
|
375
|
-
# @see
|
373
|
+
# @see https://en.wikipedia.org/wiki/Lexical_analysis#Token
|
376
374
|
class Token
|
377
375
|
##
|
378
376
|
# The token's symbol type.
|
@@ -493,7 +491,7 @@ module EBNF::LL1
|
|
493
491
|
# "invalid token '%' on line 10",
|
494
492
|
# input: query, token: '%', lineno: 9)
|
495
493
|
#
|
496
|
-
# @see
|
494
|
+
# @see https://ruby-doc.org/core/classes/StandardError.html
|
497
495
|
class Error < StandardError
|
498
496
|
##
|
499
497
|
# The input string associated with the error.
|