ebnf 1.1.3 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +221 -198
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/bin/ebnf +40 -21
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/doap.ttl +23 -15
- data/etc/ebnf.ebnf +21 -33
- data/etc/ebnf.html +171 -160
- data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
- data/etc/ebnf.ll1.sxp +182 -183
- data/etc/ebnf.peg.rb +90 -0
- data/etc/ebnf.peg.sxp +84 -0
- data/etc/ebnf.sxp +40 -41
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.html +1603 -1751
- data/etc/sparql.ll1.sxp +7372 -7372
- data/etc/sparql.peg.rb +532 -0
- data/etc/sparql.peg.sxp +597 -0
- data/etc/sparql.sxp +363 -362
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.html +465 -517
- data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
- data/etc/turtle.ll1.sxp +425 -425
- data/etc/turtle.peg.rb +182 -0
- data/etc/turtle.peg.sxp +199 -0
- data/etc/turtle.sxp +103 -101
- data/lib/ebnf.rb +7 -2
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +113 -69
- data/lib/ebnf/bnf.rb +1 -26
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +138 -6
- data/lib/ebnf/ll1/lexer.rb +37 -32
- data/lib/ebnf/ll1/parser.rb +113 -73
- data/lib/ebnf/ll1/scanner.rb +83 -51
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +39 -0
- data/lib/ebnf/peg/parser.rb +561 -0
- data/lib/ebnf/peg/rule.rb +250 -0
- data/lib/ebnf/rule.rb +443 -148
- data/lib/ebnf/terminals.rb +21 -0
- data/lib/ebnf/writer.rb +565 -83
- metadata +107 -29
- data/etc/sparql.rb +0 -45773
@@ -0,0 +1,75 @@
|
|
1
|
+
# This file is automatically generated by ebnf version 2.0.0
|
2
|
+
# Derived from etc/iso-ebnf.ebnf
|
3
|
+
module ISOEBNFMeta
|
4
|
+
RULES = [
|
5
|
+
EBNF::Rule.new(:syntax, nil, [:star, :syntax_rule]).extend(EBNF::PEG::Rule),
|
6
|
+
EBNF::Rule.new(:syntax_rule, nil, [:seq, :meta_identifier, :defining_symbol, :definitions_list, :terminator_symbol]).extend(EBNF::PEG::Rule),
|
7
|
+
EBNF::Rule.new(:definitions_list, nil, [:seq, :single_definition, :_definitions_list_1]).extend(EBNF::PEG::Rule),
|
8
|
+
EBNF::Rule.new(:_definitions_list_1, nil, [:star, :_definitions_list_2]).extend(EBNF::PEG::Rule),
|
9
|
+
EBNF::Rule.new(:_definitions_list_2, nil, [:seq, :definition_separator_symbol, :definitions_list]).extend(EBNF::PEG::Rule),
|
10
|
+
EBNF::Rule.new(:single_definition, nil, [:seq, :term, :_single_definition_1]).extend(EBNF::PEG::Rule),
|
11
|
+
EBNF::Rule.new(:_single_definition_1, nil, [:star, :_single_definition_2]).extend(EBNF::PEG::Rule),
|
12
|
+
EBNF::Rule.new(:_single_definition_2, nil, [:seq, ",", :term]).extend(EBNF::PEG::Rule),
|
13
|
+
EBNF::Rule.new(:term, nil, [:seq, :factor, :_term_1]).extend(EBNF::PEG::Rule),
|
14
|
+
EBNF::Rule.new(:_term_1, nil, [:opt, :_term_2]).extend(EBNF::PEG::Rule),
|
15
|
+
EBNF::Rule.new(:_term_2, nil, [:seq, "-", :exception]).extend(EBNF::PEG::Rule),
|
16
|
+
EBNF::Rule.new(:exception, nil, [:seq, :factor]).extend(EBNF::PEG::Rule),
|
17
|
+
EBNF::Rule.new(:factor, nil, [:seq, :_factor_1, :primary]).extend(EBNF::PEG::Rule),
|
18
|
+
EBNF::Rule.new(:_factor_1, nil, [:opt, :_factor_2]).extend(EBNF::PEG::Rule),
|
19
|
+
EBNF::Rule.new(:_factor_2, nil, [:seq, :integer, "*"]).extend(EBNF::PEG::Rule),
|
20
|
+
EBNF::Rule.new(:primary, nil, [:alt, :optional_sequence, :repeated_sequence, :special_sequence, :grouped_sequence, :meta_identifier, :terminal_string, :empty]).extend(EBNF::PEG::Rule),
|
21
|
+
EBNF::Rule.new(:optional_sequence, nil, [:seq, :start_option_symbol, :definitions_list, :end_option_symbol]).extend(EBNF::PEG::Rule),
|
22
|
+
EBNF::Rule.new(:repeated_sequence, nil, [:seq, :start_repeat_symbol, :definitions_list, :end_repeat_symbol]).extend(EBNF::PEG::Rule),
|
23
|
+
EBNF::Rule.new(:grouped_sequence, nil, [:seq, "(", :definitions_list, ")"]).extend(EBNF::PEG::Rule),
|
24
|
+
EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule),
|
25
|
+
EBNF::Rule.new(:terminal_string, nil, [:alt, :_terminal_string_1, :_terminal_string_2], kind: :terminal).extend(EBNF::PEG::Rule),
|
26
|
+
EBNF::Rule.new(:_terminal_string_1, nil, [:seq, "'", :_terminal_string_3, "'"]).extend(EBNF::PEG::Rule),
|
27
|
+
EBNF::Rule.new(:_terminal_string_3, nil, [:plus, :first_terminal_character]).extend(EBNF::PEG::Rule),
|
28
|
+
EBNF::Rule.new(:_terminal_string_2, nil, [:seq, "\"", :_terminal_string_4, "\""]).extend(EBNF::PEG::Rule),
|
29
|
+
EBNF::Rule.new(:_terminal_string_4, nil, [:plus, :second_terminal_character]).extend(EBNF::PEG::Rule),
|
30
|
+
EBNF::Rule.new(:meta_identifier, nil, [:seq, :letter, :_meta_identifier_1], kind: :terminal).extend(EBNF::PEG::Rule),
|
31
|
+
EBNF::Rule.new(:_meta_identifier_1, nil, [:star, :meta_identifier_character]).extend(EBNF::PEG::Rule),
|
32
|
+
EBNF::Rule.new(:integer, nil, [:plus, :decimal_digit], kind: :terminal).extend(EBNF::PEG::Rule),
|
33
|
+
EBNF::Rule.new(:special_sequence, nil, [:seq, "?", :_special_sequence_1, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
|
34
|
+
EBNF::Rule.new(:_special_sequence_1, nil, [:star, :special_sequence_character]).extend(EBNF::PEG::Rule),
|
35
|
+
EBNF::Rule.new(:comment, nil, [:seq, :start_comment_symbol, :_comment_1, :end_comment_symbol], kind: :terminal).extend(EBNF::PEG::Rule),
|
36
|
+
EBNF::Rule.new(:_comment_1, nil, [:star, :comment_symbol]).extend(EBNF::PEG::Rule),
|
37
|
+
EBNF::Rule.new(:comment_symbol, nil, [:alt, :comment, :commentless_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
|
38
|
+
EBNF::Rule.new(:commentless_symbol, nil, [:alt, :terminal_character, :meta_identifier, :integer, :terminal_string, :special_sequence], kind: :terminal).extend(EBNF::PEG::Rule),
|
39
|
+
EBNF::Rule.new(:letter, nil, [:range, "a-zA-Z"], kind: :terminal).extend(EBNF::PEG::Rule),
|
40
|
+
EBNF::Rule.new(:decimal_digit, nil, [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule),
|
41
|
+
EBNF::Rule.new(:meta_identifier_character, nil, [:alt, :letter, :decimal_digit, "_"], kind: :terminal).extend(EBNF::PEG::Rule),
|
42
|
+
EBNF::Rule.new(:first_terminal_character, nil, [:diff, :terminal_character, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
|
43
|
+
EBNF::Rule.new(:second_terminal_character, nil, [:diff, :terminal_character, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
|
44
|
+
EBNF::Rule.new(:special_sequence_character, nil, [:diff, :terminal_character, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
|
45
|
+
EBNF::Rule.new(:terminal_character, nil, [:alt, :letter, :decimal_digit, :concatenate_symbol, :defining_symbol, :definition_separator_symbol, :end_comment_symbol, :end_group_symbol, :end_option_symbol, :end_repeat_symbol, :except_symbol, :first_quote_symbol, :repetition_symbol, :second_quote_symbol, :special_sequence_symbol, :start_comment_symbol, :start_group_symbol, :start_option_symbol, :start_repeat_symbol, :terminator_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
|
46
|
+
EBNF::Rule.new(:other_character, nil, [:alt, :_other_character_1, "\\"], kind: :terminal).extend(EBNF::PEG::Rule),
|
47
|
+
EBNF::Rule.new(:_other_character_1, nil, [:range, ":+_%@&$<>^` ̃#x20#x23"], kind: :terminal).extend(EBNF::PEG::Rule),
|
48
|
+
EBNF::Rule.new(:gap_separator, nil, [:range, "#x9#xa#xb#xc#xd#x20"], kind: :terminal).extend(EBNF::PEG::Rule),
|
49
|
+
EBNF::Rule.new(:_pass, nil, [:alt, :__pass_1, :comment], kind: :pass).extend(EBNF::PEG::Rule),
|
50
|
+
EBNF::Rule.new(:__pass_1, nil, [:plus, :gap_separator]).extend(EBNF::PEG::Rule),
|
51
|
+
EBNF::Rule.new(:empty, nil, [:seq, ""], kind: :terminal).extend(EBNF::PEG::Rule),
|
52
|
+
EBNF::Rule.new(:concatenate_symbol, nil, [:seq, ","], kind: :terminal).extend(EBNF::PEG::Rule),
|
53
|
+
EBNF::Rule.new(:repetition_symbol, nil, [:seq, "*"], kind: :terminal).extend(EBNF::PEG::Rule),
|
54
|
+
EBNF::Rule.new(:except_symbol, nil, [:seq, "-"], kind: :terminal).extend(EBNF::PEG::Rule),
|
55
|
+
EBNF::Rule.new(:first_quote_symbol, nil, [:seq, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
|
56
|
+
EBNF::Rule.new(:second_quote_symbol, nil, [:seq, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
|
57
|
+
EBNF::Rule.new(:start_comment_symbol, nil, [:seq, "(*"], kind: :terminal).extend(EBNF::PEG::Rule),
|
58
|
+
EBNF::Rule.new(:end_comment_symbol, nil, [:seq, "*)"], kind: :terminal).extend(EBNF::PEG::Rule),
|
59
|
+
EBNF::Rule.new(:start_group_symbol, nil, [:seq, "("], kind: :terminal).extend(EBNF::PEG::Rule),
|
60
|
+
EBNF::Rule.new(:end_group_symbol, nil, [:seq, ")"], kind: :terminal).extend(EBNF::PEG::Rule),
|
61
|
+
EBNF::Rule.new(:special_sequence_symbol, nil, [:seq, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
|
62
|
+
EBNF::Rule.new(:defining_symbol, nil, [:alt, "=", ":"], kind: :terminal).extend(EBNF::PEG::Rule),
|
63
|
+
EBNF::Rule.new(:definition_separator_symbol, nil, [:alt, "|", "/", "!"], kind: :terminal).extend(EBNF::PEG::Rule),
|
64
|
+
EBNF::Rule.new(:terminator_symbol, nil, [:alt, ";", "."], kind: :terminal).extend(EBNF::PEG::Rule),
|
65
|
+
EBNF::Rule.new(:start_option_symbol, nil, [:seq, "["], kind: :terminal).extend(EBNF::PEG::Rule),
|
66
|
+
EBNF::Rule.new(:end_option_symbol, nil, [:seq, "]"], kind: :terminal).extend(EBNF::PEG::Rule),
|
67
|
+
EBNF::Rule.new(:start_repeat_symbol, nil, [:alt, "{", "(:"], kind: :terminal).extend(EBNF::PEG::Rule),
|
68
|
+
EBNF::Rule.new(:end_repeat_symbol, nil, [:alt, "}", ":)"], kind: :terminal).extend(EBNF::PEG::Rule),
|
69
|
+
EBNF::Rule.new(:gap_free_symbol, nil, [:alt, :_gap_free_symbol_1, :terminal_string], kind: :terminal).extend(EBNF::PEG::Rule),
|
70
|
+
EBNF::Rule.new(:_gap_free_symbol_1, nil, [:seq, :_gap_free_symbol_3, :terminal_character]).extend(EBNF::PEG::Rule),
|
71
|
+
EBNF::Rule.new(:_gap_free_symbol_3, nil, [:not, :_gap_free_symbol_2]).extend(EBNF::PEG::Rule),
|
72
|
+
EBNF::Rule.new(:_gap_free_symbol_2, nil, [:range, "'\""], kind: :terminal).extend(EBNF::PEG::Rule),
|
73
|
+
]
|
74
|
+
end
|
75
|
+
|
data/lib/ebnf/ll1.rb
CHANGED
@@ -1,4 +1,86 @@
|
|
1
1
|
module EBNF
|
2
|
+
##
|
3
|
+
# This module extends {EBNF::Base} to create metadata including _branch_, [First/Follow][], and other tables which is used by {EBNF::LL1::Parser} to recognize examples of the associated grammar.
|
4
|
+
#
|
5
|
+
### Branch Table
|
6
|
+
#
|
7
|
+
# The Branch table is a hash mapping production rules to a hash relating terminals appearing in input to sequence of productions to follow when the corresponding input terminal is found. This allows either the `seq` primitive, where all terminals map to the same sequence of productions, or the `alt` primitive, where each terminal may map to a different production.
|
8
|
+
#
|
9
|
+
# BRANCH = {
|
10
|
+
# :alt => {
|
11
|
+
# "(" => [:seq, :_alt_1],
|
12
|
+
# :HEX => [:seq, :_alt_1],
|
13
|
+
# :O_RANGE => [:seq, :_alt_1],
|
14
|
+
# :RANGE => [:seq, :_alt_1],
|
15
|
+
# :STRING1 => [:seq, :_alt_1],
|
16
|
+
# :STRING2 => [:seq, :_alt_1],
|
17
|
+
# :SYMBOL => [:seq, :_alt_1],
|
18
|
+
# },
|
19
|
+
# ...
|
20
|
+
# :declaration => {
|
21
|
+
# "@pass" => [:pass],
|
22
|
+
# "@terminals" => ["@terminals"],
|
23
|
+
# },
|
24
|
+
# ...
|
25
|
+
# }
|
26
|
+
#
|
27
|
+
# In this case the `alt` rule is `seq ('|' seq)*` can happen when any of the specified tokens appears on the input stream. The all cause the same token to be passed to the `seq` rule and follow with `_alt_1`, which handles the `('|' seq)*` portion of the rule, after the first sequence is matched.
|
28
|
+
#
|
29
|
+
# The `declaration` rule is `@terminals' | pass` using the `alt` primitive determining the production to run based on the terminal appearing on the input stream. Eventually, a terminal production is found and the token is consumed.
|
30
|
+
#
|
31
|
+
### First/Follow Table
|
32
|
+
#
|
33
|
+
# The [First/Follow][] table is a hash mapping production rules to the terminals that may proceed or follow the rule. For example:
|
34
|
+
#
|
35
|
+
# FIRST = {
|
36
|
+
# :alt => [
|
37
|
+
# :HEX,
|
38
|
+
# :SYMBOL,
|
39
|
+
# :RANGE,
|
40
|
+
# :O_RANGE,
|
41
|
+
# :STRING1,
|
42
|
+
# :STRING2,
|
43
|
+
# "("],
|
44
|
+
# ...
|
45
|
+
# }
|
46
|
+
#
|
47
|
+
### Terminals Table
|
48
|
+
#
|
49
|
+
# This table is a simple list of the terminal productions found in the grammar. For example:
|
50
|
+
#
|
51
|
+
# TERMINALS = ["(", ")", "-",
|
52
|
+
# "@pass", "@terminals",
|
53
|
+
# :HEX, :LHS, :O_RANGE,:POSTFIX,
|
54
|
+
# :RANGE, :STRING1, :STRING2, :SYMBOL,"|"
|
55
|
+
# ].freeze
|
56
|
+
#
|
57
|
+
### Cleanup Table
|
58
|
+
#
|
59
|
+
# This table identifies productions which used EBNF rules, which are transformed to BNF for actual parsing. This allows the parser, in some cases, to reproduce *star*, *plus*, and *opt* rule matches. For example:
|
60
|
+
#
|
61
|
+
# CLEANUP = {
|
62
|
+
# :_alt_1 => :star,
|
63
|
+
# :_alt_3 => :merge,
|
64
|
+
# :_diff_1 => :opt,
|
65
|
+
# :ebnf => :star,
|
66
|
+
# :_ebnf_2 => :merge,
|
67
|
+
# :_postfix_1 => :opt,
|
68
|
+
# :seq => :plus,
|
69
|
+
# :_seq_1 => :star,
|
70
|
+
# :_seq_2 => :merge,
|
71
|
+
# }.freeze
|
72
|
+
#
|
73
|
+
# In this case the `ebnf` rule was `(declaration | rule)*`. As BNF does not support a star operator, this is decomposed into a set of rules using `alt` and `seq` primitives:
|
74
|
+
#
|
75
|
+
# ebnf ::= _empty _ebnf_2
|
76
|
+
# _ebnf_1 ::= declaration | rule
|
77
|
+
# _ebnf_2 ::= _ebnf_1 ebnf
|
78
|
+
# _ebnf_3 ::= ebnf
|
79
|
+
#
|
80
|
+
# The `_empty` production matches an empty string, so allows for now value. `_ebnf_2` matches `declaration | rule` (using the `alt` primitive) followed by `ebnf`, creating a sequence of zero or more `declaration` or `alt` members.
|
81
|
+
#
|
82
|
+
# [First/Follow]: https://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
|
83
|
+
|
2
84
|
module LL1
|
3
85
|
autoload :Lexer, "ebnf/ll1/lexer"
|
4
86
|
autoload :Parser, "ebnf/ll1/parser"
|
@@ -51,8 +133,40 @@ module EBNF
|
|
51
133
|
##
|
52
134
|
# Create first/follow for each rule using techniques defined for LL(1) parsers.
|
53
135
|
#
|
136
|
+
# This takes rules which have transformed into BNF and adds first/follow and otehr information to the rules to allow the generation of metadata tables used for driving a parser.
|
137
|
+
#
|
138
|
+
# Given an initial rule in EBNF:
|
139
|
+
#
|
140
|
+
# (rule enbf "1" (star declaration rule))
|
141
|
+
#
|
142
|
+
# The BNF transformation becomes:
|
143
|
+
#
|
144
|
+
# (rule ebnf "1" (alt _empty _ebnf_2))
|
145
|
+
# (rule _ebnf_1 "1.1" (alt declaration rule))
|
146
|
+
# (rule _ebnf_2 "1.2" (seq _ebnf_1 ebnf))
|
147
|
+
# (rule _ebnf_3 "1.3" (seq ebnf))
|
148
|
+
#
|
149
|
+
# After running this method, the rules are annotated with first/follow and cleanup rules:
|
150
|
+
#
|
151
|
+
# (rule ebnf "1"
|
152
|
+
# (start #t)
|
153
|
+
# (first "@pass" "@terminals" LHS _eps)
|
154
|
+
# (follow _eof)
|
155
|
+
# (cleanup star)
|
156
|
+
# (alt _empty _ebnf_2))
|
157
|
+
# (rule _ebnf_1 "1.1"
|
158
|
+
# (first "@pass" "@terminals" LHS)
|
159
|
+
# (follow "@pass" "@terminals" LHS _eof)
|
160
|
+
# (alt declaration rule))
|
161
|
+
# (rule _ebnf_2 "1.2"
|
162
|
+
# (first "@pass" "@terminals" LHS)
|
163
|
+
# (follow _eof)
|
164
|
+
# (cleanup merge)
|
165
|
+
# (seq _ebnf_1 ebnf))
|
166
|
+
# (rule _ebnf_3 "1.3" (first "@pass" "@terminals" LHS _eps) (follow _eof) (seq ebnf))
|
167
|
+
#
|
54
168
|
# @return [EBNF] self
|
55
|
-
# @see
|
169
|
+
# @see https://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
|
56
170
|
# @param [Array<Symbol>] starts
|
57
171
|
# Set of symbols which are start rules
|
58
172
|
def first_follow(*starts)
|
@@ -96,8 +210,9 @@ module EBNF
|
|
96
210
|
firsts, follows = 0, 0
|
97
211
|
# add Fi(wi) to Fi(Ai) for every rule Ai → wi
|
98
212
|
#
|
99
|
-
# For sequences, this is the first rule in the sequence.
|
100
|
-
# For alts, this is every rule in the sequence
|
213
|
+
# * For sequences, this is the first rule in the sequence.
|
214
|
+
# * For alts, this is every rule in the sequence
|
215
|
+
# * Other rules don't matter, as they don't appear in strict BNF
|
101
216
|
each(:rule) do |ai|
|
102
217
|
# Fi(a w' ) = { a } for every terminal a
|
103
218
|
ai.terminals(ast).each do |t|
|
@@ -168,6 +283,10 @@ module EBNF
|
|
168
283
|
progress("first_follow") {"(#{ittr}) firsts #{firsts}, follows #{follows}"}
|
169
284
|
ittr += 1
|
170
285
|
end while (firsts + follows) > 0
|
286
|
+
|
287
|
+
debug("Fi.2-post: non-terminals without first") do
|
288
|
+
ast.reject(&:terminal?).reject(&:first).map(&:sym)
|
289
|
+
end if ast.reject(&:terminal?).any? {|r| r.first.nil?}
|
171
290
|
end
|
172
291
|
end
|
173
292
|
|
@@ -272,6 +391,19 @@ module EBNF
|
|
272
391
|
end
|
273
392
|
end
|
274
393
|
|
394
|
+
##
|
395
|
+
# Output Ruby parser files for LL(1) parsing
|
396
|
+
#
|
397
|
+
# @param [IO, StringIO] output
|
398
|
+
def to_ruby_ll1(output, **options)
|
399
|
+
self.outputTable(output, 'BRANCH', self.branch, 1)
|
400
|
+
self.outputTable(output, 'TERMINALS', self.terminals, 1)
|
401
|
+
self.outputTable(output, 'FIRST', self.first, 1)
|
402
|
+
self.outputTable(output, 'FOLLOW', self.follow, 1)
|
403
|
+
self.outputTable(output, 'CLEANUP', self.cleanup, 1)
|
404
|
+
self.outputTable(output, 'PASS', [self.pass], 1) if self.pass
|
405
|
+
end
|
406
|
+
|
275
407
|
private
|
276
408
|
def do_production(lhs)
|
277
409
|
rule = find_rule(lhs)
|
@@ -287,16 +419,16 @@ module EBNF
|
|
287
419
|
|
288
420
|
if rule.expr.first == :matches
|
289
421
|
debug("prod") {"Rule is regexp: #{rule}"}
|
290
|
-
|
291
|
-
error("No record of what token #{lhs} can start with") unless rule.first
|
292
422
|
return
|
293
423
|
end
|
294
424
|
|
425
|
+
error("No record of what token #{lhs.inspect} can start with") unless rule.first
|
426
|
+
|
295
427
|
if rule.alt?
|
296
428
|
# A First/Follow conflict appears when _eps is in the first
|
297
429
|
# of one rule and there is a token in the first and
|
298
430
|
# follow of the same rule
|
299
|
-
if rule.first.include?(:_eps) && !(overlap = ((rule.first & (rule.follow || [])) - [:eps])).empty?
|
431
|
+
if Array(rule.first).include?(:_eps) && !(overlap = ((Array(rule.first) & (rule.follow || [])) - [:eps])).empty?
|
300
432
|
error("First/Follow Conflict: #{overlap.first.inspect} is both first and follow of #{rule.sym}")
|
301
433
|
end
|
302
434
|
|
data/lib/ebnf/ll1/lexer.rb
CHANGED
@@ -29,7 +29,7 @@ module EBNF::LL1
|
|
29
29
|
# warn error.inspect
|
30
30
|
# end
|
31
31
|
#
|
32
|
-
# @see
|
32
|
+
# @see https://en.wikipedia.org/wiki/Lexical_analysis
|
33
33
|
class Lexer
|
34
34
|
include Enumerable
|
35
35
|
|
@@ -43,10 +43,10 @@ module EBNF::LL1
|
|
43
43
|
"\\'" => '\'', # \u0027 (apostrophe-quote, single quote mark)
|
44
44
|
'\\\\' => '\\' # \u005C (backslash)
|
45
45
|
}.freeze
|
46
|
-
ESCAPE_CHAR4 = /\\u(?:[0-9A-Fa-f]{4,4})
|
47
|
-
ESCAPE_CHAR8 = /\\U(?:[0-9A-Fa-f]{8,8})
|
48
|
-
ECHAR = /\\./
|
49
|
-
UCHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}
|
46
|
+
ESCAPE_CHAR4 = /\\u(?:[0-9A-Fa-f]{4,4})/u.freeze # \uXXXX
|
47
|
+
ESCAPE_CHAR8 = /\\U(?:[0-9A-Fa-f]{8,8})/u.freeze # \UXXXXXXXX
|
48
|
+
ECHAR = /\\./u.freeze # More liberal unescaping
|
49
|
+
UCHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}/n.freeze
|
50
50
|
|
51
51
|
##
|
52
52
|
# @return [Regexp] defines whitespace, including comments, otherwise whitespace must be explicit in terminals
|
@@ -59,7 +59,7 @@ module EBNF::LL1
|
|
59
59
|
#
|
60
60
|
# @param [String] string
|
61
61
|
# @return [String]
|
62
|
-
# @see
|
62
|
+
# @see https://www.w3.org/TR/rdf-sparql-query/#codepointEscape
|
63
63
|
def self.unescape_codepoints(string)
|
64
64
|
string = string.dup
|
65
65
|
string.force_encoding(Encoding::ASCII_8BIT) if string.respond_to?(:force_encoding)
|
@@ -81,7 +81,7 @@ module EBNF::LL1
|
|
81
81
|
#
|
82
82
|
# @param [String] input
|
83
83
|
# @return [String]
|
84
|
-
# @see
|
84
|
+
# @see https://www.w3.org/TR/rdf-sparql-query/#grammarEscapes
|
85
85
|
def self.unescape_string(input)
|
86
86
|
input.gsub(ECHAR) { |escaped| ESCAPE_CHARS[escaped] || escaped[1..-1]}
|
87
87
|
end
|
@@ -98,8 +98,8 @@ module EBNF::LL1
|
|
98
98
|
# @yieldparam [Lexer] lexer
|
99
99
|
# @return [Lexer]
|
100
100
|
# @raise [Lexer::Error] on invalid input
|
101
|
-
def self.tokenize(input, terminals, options
|
102
|
-
lexer = self.new(input, terminals, options)
|
101
|
+
def self.tokenize(input, terminals, **options, &block)
|
102
|
+
lexer = self.new(input, terminals, **options)
|
103
103
|
block_given? ? block.call(lexer) : lexer
|
104
104
|
end
|
105
105
|
|
@@ -115,17 +115,23 @@ module EBNF::LL1
|
|
115
115
|
# Whitespace between tokens, including comments
|
116
116
|
# @option options[Integer] :high_water passed to scanner
|
117
117
|
# @option options[Integer] :low_water passed to scanner
|
118
|
-
def initialize(input = nil, terminals = nil, options
|
118
|
+
def initialize(input = nil, terminals = nil, **options)
|
119
119
|
@options = options.dup
|
120
120
|
@whitespace = @options[:whitespace]
|
121
121
|
@terminals = terminals.map do |term|
|
122
|
-
term.is_a?(Array)
|
122
|
+
if term.is_a?(Array) && term.length ==3
|
123
|
+
# Last element is options
|
124
|
+
Terminal.new(term[0], term[1], **term[2])
|
125
|
+
elsif term.is_a?(Array)
|
126
|
+
Terminal.new(*term)
|
127
|
+
else
|
128
|
+
term
|
129
|
+
end
|
123
130
|
end
|
124
131
|
|
125
132
|
raise Error, "Terminal patterns not defined" unless @terminals && @terminals.length > 0
|
126
133
|
|
127
|
-
@
|
128
|
-
@scanner = Scanner.new(input, options)
|
134
|
+
@scanner = Scanner.new(input, **options)
|
129
135
|
end
|
130
136
|
|
131
137
|
##
|
@@ -140,12 +146,6 @@ module EBNF::LL1
|
|
140
146
|
# @return [String]
|
141
147
|
attr_accessor :input
|
142
148
|
|
143
|
-
##
|
144
|
-
# The current line number (zero-based).
|
145
|
-
#
|
146
|
-
# @return [Integer]
|
147
|
-
attr_reader :lineno
|
148
|
-
|
149
149
|
##
|
150
150
|
# Returns `true` if the input string is lexically valid.
|
151
151
|
#
|
@@ -187,7 +187,7 @@ module EBNF::LL1
|
|
187
187
|
|
188
188
|
@first ||= begin
|
189
189
|
{} while !scanner.eos? && skip_whitespace
|
190
|
-
return
|
190
|
+
return nil if scanner.eos?
|
191
191
|
|
192
192
|
token = match_token(*types)
|
193
193
|
|
@@ -226,7 +226,7 @@ module EBNF::LL1
|
|
226
226
|
# @return [Token]
|
227
227
|
def recover(*types)
|
228
228
|
until scanner.eos? || tok = match_token(*types)
|
229
|
-
if scanner.skip_until(@whitespace || /\s
|
229
|
+
if scanner.skip_until(@whitespace || /\s+/m).nil? # Skip past current "token"
|
230
230
|
# No whitespace at the end, must be and end of string
|
231
231
|
scanner.terminate
|
232
232
|
else
|
@@ -236,6 +236,14 @@ module EBNF::LL1
|
|
236
236
|
scanner.unscan if tok
|
237
237
|
first
|
238
238
|
end
|
239
|
+
|
240
|
+
##
|
241
|
+
# The current line number (one-based).
|
242
|
+
#
|
243
|
+
# @return [Integer]
|
244
|
+
def lineno
|
245
|
+
scanner.lineno
|
246
|
+
end
|
239
247
|
protected
|
240
248
|
|
241
249
|
# @return [StringScanner]
|
@@ -246,9 +254,7 @@ module EBNF::LL1
|
|
246
254
|
def skip_whitespace
|
247
255
|
# skip all white space, but keep track of the current line number
|
248
256
|
while @whitespace && !scanner.eos?
|
249
|
-
|
250
|
-
@lineno += matched.count("\n")
|
251
|
-
else
|
257
|
+
unless scanner.scan(@whitespace)
|
252
258
|
return
|
253
259
|
end
|
254
260
|
end
|
@@ -274,7 +280,6 @@ module EBNF::LL1
|
|
274
280
|
if matched = scanner.scan(term.regexp)
|
275
281
|
#STDERR.puts " matched #{term.type.inspect}: #{matched.inspect}"
|
276
282
|
tok = token(term.type, term.canonicalize(matched))
|
277
|
-
@lineno += matched.count("\n")
|
278
283
|
return tok
|
279
284
|
end
|
280
285
|
end
|
@@ -300,7 +305,7 @@ module EBNF::LL1
|
|
300
305
|
# Cause strings and codepoints to be unescaped.
|
301
306
|
# @option options [Regexp] :partial_regexp
|
302
307
|
# A regular expression matching the beginning of this terminal; useful for terminals that match things longer than the scanner low water mark.
|
303
|
-
def initialize(type, regexp, options
|
308
|
+
def initialize(type, regexp, **options)
|
304
309
|
@type, @regexp, @options = type, regexp, options
|
305
310
|
@partial_regexp = options[:partial_regexp]
|
306
311
|
@map = options.fetch(:map, {})
|
@@ -353,8 +358,8 @@ module EBNF::LL1
|
|
353
358
|
# Scanner instance with access to matched groups
|
354
359
|
# @param [Hash{Symbol => Object}] options
|
355
360
|
# @return [Token]
|
356
|
-
def token(type, value, options
|
357
|
-
Token.new(type, value,
|
361
|
+
def token(type, value, **options)
|
362
|
+
Token.new(type, value, lineno: lineno, **options)
|
358
363
|
end
|
359
364
|
|
360
365
|
##
|
@@ -365,7 +370,7 @@ module EBNF::LL1
|
|
365
370
|
# token.type #=> :LANGTAG
|
366
371
|
# token.value #=> "en"
|
367
372
|
#
|
368
|
-
# @see
|
373
|
+
# @see https://en.wikipedia.org/wiki/Lexical_analysis#Token
|
369
374
|
class Token
|
370
375
|
##
|
371
376
|
# The token's symbol type.
|
@@ -398,7 +403,7 @@ module EBNF::LL1
|
|
398
403
|
# @param [String] value
|
399
404
|
# @param [Hash{Symbol => Object}] options
|
400
405
|
# @option options [Integer] :lineno (nil)
|
401
|
-
def initialize(type, value, options
|
406
|
+
def initialize(type, value, **options)
|
402
407
|
@type = type.to_s.to_sym if type
|
403
408
|
@value = value.to_s
|
404
409
|
@options = options.dup
|
@@ -486,7 +491,7 @@ module EBNF::LL1
|
|
486
491
|
# "invalid token '%' on line 10",
|
487
492
|
# input: query, token: '%', lineno: 9)
|
488
493
|
#
|
489
|
-
# @see
|
494
|
+
# @see https://ruby-doc.org/core/classes/StandardError.html
|
490
495
|
class Error < StandardError
|
491
496
|
##
|
492
497
|
# The input string associated with the error.
|
@@ -514,7 +519,7 @@ module EBNF::LL1
|
|
514
519
|
# @option options [String] :input (nil)
|
515
520
|
# @option options [String] :token (nil)
|
516
521
|
# @option options [Integer] :lineno (nil)
|
517
|
-
def initialize(message, options
|
522
|
+
def initialize(message, **options)
|
518
523
|
@input = options[:input]
|
519
524
|
@token = options[:token]
|
520
525
|
@lineno = options[:lineno]
|