ebnf 1.2.0 → 2.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +223 -199
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/bin/ebnf +38 -19
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/doap.ttl +23 -18
- data/etc/ebnf.ebnf +21 -33
- data/etc/ebnf.html +76 -160
- data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
- data/etc/ebnf.ll1.sxp +182 -183
- data/etc/ebnf.peg.rb +90 -0
- data/etc/ebnf.peg.sxp +84 -0
- data/etc/ebnf.sxp +40 -41
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.html +1603 -1751
- data/etc/sparql.ll1.sxp +7372 -7372
- data/etc/sparql.peg.rb +532 -0
- data/etc/sparql.peg.sxp +597 -0
- data/etc/sparql.sxp +363 -362
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.html +465 -517
- data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
- data/etc/turtle.ll1.sxp +425 -425
- data/etc/turtle.peg.rb +182 -0
- data/etc/turtle.peg.sxp +199 -0
- data/etc/turtle.sxp +103 -101
- data/lib/ebnf.rb +6 -1
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +114 -69
- data/lib/ebnf/bnf.rb +1 -26
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +131 -3
- data/lib/ebnf/ll1/lexer.rb +20 -22
- data/lib/ebnf/ll1/parser.rb +97 -64
- data/lib/ebnf/ll1/scanner.rb +82 -50
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +39 -0
- data/lib/ebnf/peg/parser.rb +561 -0
- data/lib/ebnf/peg/rule.rb +250 -0
- data/lib/ebnf/rule.rb +442 -148
- data/lib/ebnf/terminals.rb +21 -0
- data/lib/ebnf/writer.rb +587 -82
- metadata +125 -18
- data/etc/sparql.rb +0 -45773
@@ -0,0 +1,75 @@
|
|
1
|
+
# This file is automatically generated by ebnf version 2.0.0
|
2
|
+
# Derived from etc/iso-ebnf.ebnf
|
3
|
+
module ISOEBNFMeta
|
4
|
+
RULES = [
|
5
|
+
EBNF::Rule.new(:syntax, nil, [:star, :syntax_rule]).extend(EBNF::PEG::Rule),
|
6
|
+
EBNF::Rule.new(:syntax_rule, nil, [:seq, :meta_identifier, :defining_symbol, :definitions_list, :terminator_symbol]).extend(EBNF::PEG::Rule),
|
7
|
+
EBNF::Rule.new(:definitions_list, nil, [:seq, :single_definition, :_definitions_list_1]).extend(EBNF::PEG::Rule),
|
8
|
+
EBNF::Rule.new(:_definitions_list_1, nil, [:star, :_definitions_list_2]).extend(EBNF::PEG::Rule),
|
9
|
+
EBNF::Rule.new(:_definitions_list_2, nil, [:seq, :definition_separator_symbol, :definitions_list]).extend(EBNF::PEG::Rule),
|
10
|
+
EBNF::Rule.new(:single_definition, nil, [:seq, :term, :_single_definition_1]).extend(EBNF::PEG::Rule),
|
11
|
+
EBNF::Rule.new(:_single_definition_1, nil, [:star, :_single_definition_2]).extend(EBNF::PEG::Rule),
|
12
|
+
EBNF::Rule.new(:_single_definition_2, nil, [:seq, ",", :term]).extend(EBNF::PEG::Rule),
|
13
|
+
EBNF::Rule.new(:term, nil, [:seq, :factor, :_term_1]).extend(EBNF::PEG::Rule),
|
14
|
+
EBNF::Rule.new(:_term_1, nil, [:opt, :_term_2]).extend(EBNF::PEG::Rule),
|
15
|
+
EBNF::Rule.new(:_term_2, nil, [:seq, "-", :exception]).extend(EBNF::PEG::Rule),
|
16
|
+
EBNF::Rule.new(:exception, nil, [:seq, :factor]).extend(EBNF::PEG::Rule),
|
17
|
+
EBNF::Rule.new(:factor, nil, [:seq, :_factor_1, :primary]).extend(EBNF::PEG::Rule),
|
18
|
+
EBNF::Rule.new(:_factor_1, nil, [:opt, :_factor_2]).extend(EBNF::PEG::Rule),
|
19
|
+
EBNF::Rule.new(:_factor_2, nil, [:seq, :integer, "*"]).extend(EBNF::PEG::Rule),
|
20
|
+
EBNF::Rule.new(:primary, nil, [:alt, :optional_sequence, :repeated_sequence, :special_sequence, :grouped_sequence, :meta_identifier, :terminal_string, :empty]).extend(EBNF::PEG::Rule),
|
21
|
+
EBNF::Rule.new(:optional_sequence, nil, [:seq, :start_option_symbol, :definitions_list, :end_option_symbol]).extend(EBNF::PEG::Rule),
|
22
|
+
EBNF::Rule.new(:repeated_sequence, nil, [:seq, :start_repeat_symbol, :definitions_list, :end_repeat_symbol]).extend(EBNF::PEG::Rule),
|
23
|
+
EBNF::Rule.new(:grouped_sequence, nil, [:seq, "(", :definitions_list, ")"]).extend(EBNF::PEG::Rule),
|
24
|
+
EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule),
|
25
|
+
EBNF::Rule.new(:terminal_string, nil, [:alt, :_terminal_string_1, :_terminal_string_2], kind: :terminal).extend(EBNF::PEG::Rule),
|
26
|
+
EBNF::Rule.new(:_terminal_string_1, nil, [:seq, "'", :_terminal_string_3, "'"]).extend(EBNF::PEG::Rule),
|
27
|
+
EBNF::Rule.new(:_terminal_string_3, nil, [:plus, :first_terminal_character]).extend(EBNF::PEG::Rule),
|
28
|
+
EBNF::Rule.new(:_terminal_string_2, nil, [:seq, "\"", :_terminal_string_4, "\""]).extend(EBNF::PEG::Rule),
|
29
|
+
EBNF::Rule.new(:_terminal_string_4, nil, [:plus, :second_terminal_character]).extend(EBNF::PEG::Rule),
|
30
|
+
EBNF::Rule.new(:meta_identifier, nil, [:seq, :letter, :_meta_identifier_1], kind: :terminal).extend(EBNF::PEG::Rule),
|
31
|
+
EBNF::Rule.new(:_meta_identifier_1, nil, [:star, :meta_identifier_character]).extend(EBNF::PEG::Rule),
|
32
|
+
EBNF::Rule.new(:integer, nil, [:plus, :decimal_digit], kind: :terminal).extend(EBNF::PEG::Rule),
|
33
|
+
EBNF::Rule.new(:special_sequence, nil, [:seq, "?", :_special_sequence_1, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
|
34
|
+
EBNF::Rule.new(:_special_sequence_1, nil, [:star, :special_sequence_character]).extend(EBNF::PEG::Rule),
|
35
|
+
EBNF::Rule.new(:comment, nil, [:seq, :start_comment_symbol, :_comment_1, :end_comment_symbol], kind: :terminal).extend(EBNF::PEG::Rule),
|
36
|
+
EBNF::Rule.new(:_comment_1, nil, [:star, :comment_symbol]).extend(EBNF::PEG::Rule),
|
37
|
+
EBNF::Rule.new(:comment_symbol, nil, [:alt, :comment, :commentless_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
|
38
|
+
EBNF::Rule.new(:commentless_symbol, nil, [:alt, :terminal_character, :meta_identifier, :integer, :terminal_string, :special_sequence], kind: :terminal).extend(EBNF::PEG::Rule),
|
39
|
+
EBNF::Rule.new(:letter, nil, [:range, "a-zA-Z"], kind: :terminal).extend(EBNF::PEG::Rule),
|
40
|
+
EBNF::Rule.new(:decimal_digit, nil, [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule),
|
41
|
+
EBNF::Rule.new(:meta_identifier_character, nil, [:alt, :letter, :decimal_digit, "_"], kind: :terminal).extend(EBNF::PEG::Rule),
|
42
|
+
EBNF::Rule.new(:first_terminal_character, nil, [:diff, :terminal_character, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
|
43
|
+
EBNF::Rule.new(:second_terminal_character, nil, [:diff, :terminal_character, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
|
44
|
+
EBNF::Rule.new(:special_sequence_character, nil, [:diff, :terminal_character, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
|
45
|
+
EBNF::Rule.new(:terminal_character, nil, [:alt, :letter, :decimal_digit, :concatenate_symbol, :defining_symbol, :definition_separator_symbol, :end_comment_symbol, :end_group_symbol, :end_option_symbol, :end_repeat_symbol, :except_symbol, :first_quote_symbol, :repetition_symbol, :second_quote_symbol, :special_sequence_symbol, :start_comment_symbol, :start_group_symbol, :start_option_symbol, :start_repeat_symbol, :terminator_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
|
46
|
+
EBNF::Rule.new(:other_character, nil, [:alt, :_other_character_1, "\\"], kind: :terminal).extend(EBNF::PEG::Rule),
|
47
|
+
EBNF::Rule.new(:_other_character_1, nil, [:range, ":+_%@&$<>^` ̃#x20#x23"], kind: :terminal).extend(EBNF::PEG::Rule),
|
48
|
+
EBNF::Rule.new(:gap_separator, nil, [:range, "#x9#xa#xb#xc#xd#x20"], kind: :terminal).extend(EBNF::PEG::Rule),
|
49
|
+
EBNF::Rule.new(:_pass, nil, [:alt, :__pass_1, :comment], kind: :pass).extend(EBNF::PEG::Rule),
|
50
|
+
EBNF::Rule.new(:__pass_1, nil, [:plus, :gap_separator]).extend(EBNF::PEG::Rule),
|
51
|
+
EBNF::Rule.new(:empty, nil, [:seq, ""], kind: :terminal).extend(EBNF::PEG::Rule),
|
52
|
+
EBNF::Rule.new(:concatenate_symbol, nil, [:seq, ","], kind: :terminal).extend(EBNF::PEG::Rule),
|
53
|
+
EBNF::Rule.new(:repetition_symbol, nil, [:seq, "*"], kind: :terminal).extend(EBNF::PEG::Rule),
|
54
|
+
EBNF::Rule.new(:except_symbol, nil, [:seq, "-"], kind: :terminal).extend(EBNF::PEG::Rule),
|
55
|
+
EBNF::Rule.new(:first_quote_symbol, nil, [:seq, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
|
56
|
+
EBNF::Rule.new(:second_quote_symbol, nil, [:seq, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
|
57
|
+
EBNF::Rule.new(:start_comment_symbol, nil, [:seq, "(*"], kind: :terminal).extend(EBNF::PEG::Rule),
|
58
|
+
EBNF::Rule.new(:end_comment_symbol, nil, [:seq, "*)"], kind: :terminal).extend(EBNF::PEG::Rule),
|
59
|
+
EBNF::Rule.new(:start_group_symbol, nil, [:seq, "("], kind: :terminal).extend(EBNF::PEG::Rule),
|
60
|
+
EBNF::Rule.new(:end_group_symbol, nil, [:seq, ")"], kind: :terminal).extend(EBNF::PEG::Rule),
|
61
|
+
EBNF::Rule.new(:special_sequence_symbol, nil, [:seq, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
|
62
|
+
EBNF::Rule.new(:defining_symbol, nil, [:alt, "=", ":"], kind: :terminal).extend(EBNF::PEG::Rule),
|
63
|
+
EBNF::Rule.new(:definition_separator_symbol, nil, [:alt, "|", "/", "!"], kind: :terminal).extend(EBNF::PEG::Rule),
|
64
|
+
EBNF::Rule.new(:terminator_symbol, nil, [:alt, ";", "."], kind: :terminal).extend(EBNF::PEG::Rule),
|
65
|
+
EBNF::Rule.new(:start_option_symbol, nil, [:seq, "["], kind: :terminal).extend(EBNF::PEG::Rule),
|
66
|
+
EBNF::Rule.new(:end_option_symbol, nil, [:seq, "]"], kind: :terminal).extend(EBNF::PEG::Rule),
|
67
|
+
EBNF::Rule.new(:start_repeat_symbol, nil, [:alt, "{", "(:"], kind: :terminal).extend(EBNF::PEG::Rule),
|
68
|
+
EBNF::Rule.new(:end_repeat_symbol, nil, [:alt, "}", ":)"], kind: :terminal).extend(EBNF::PEG::Rule),
|
69
|
+
EBNF::Rule.new(:gap_free_symbol, nil, [:alt, :_gap_free_symbol_1, :terminal_string], kind: :terminal).extend(EBNF::PEG::Rule),
|
70
|
+
EBNF::Rule.new(:_gap_free_symbol_1, nil, [:seq, :_gap_free_symbol_3, :terminal_character]).extend(EBNF::PEG::Rule),
|
71
|
+
EBNF::Rule.new(:_gap_free_symbol_3, nil, [:not, :_gap_free_symbol_2]).extend(EBNF::PEG::Rule),
|
72
|
+
EBNF::Rule.new(:_gap_free_symbol_2, nil, [:range, "'\""], kind: :terminal).extend(EBNF::PEG::Rule),
|
73
|
+
]
|
74
|
+
end
|
75
|
+
|
data/lib/ebnf/ll1.rb
CHANGED
@@ -1,4 +1,86 @@
|
|
1
1
|
module EBNF
|
2
|
+
##
|
3
|
+
# This module extends {EBNF::Base} to create metadata including _branch_, [First/Follow][], and other tables which is used by {EBNF::LL1::Parser} to recognize examples of the associated grammar.
|
4
|
+
#
|
5
|
+
### Branch Table
|
6
|
+
#
|
7
|
+
# The Branch table is a hash mapping production rules to a hash relating terminals appearing in input to sequence of productions to follow when the corresponding input terminal is found. This allows either the `seq` primitive, where all terminals map to the same sequence of productions, or the `alt` primitive, where each terminal may map to a different production.
|
8
|
+
#
|
9
|
+
# BRANCH = {
|
10
|
+
# :alt => {
|
11
|
+
# "(" => [:seq, :_alt_1],
|
12
|
+
# :HEX => [:seq, :_alt_1],
|
13
|
+
# :O_RANGE => [:seq, :_alt_1],
|
14
|
+
# :RANGE => [:seq, :_alt_1],
|
15
|
+
# :STRING1 => [:seq, :_alt_1],
|
16
|
+
# :STRING2 => [:seq, :_alt_1],
|
17
|
+
# :SYMBOL => [:seq, :_alt_1],
|
18
|
+
# },
|
19
|
+
# ...
|
20
|
+
# :declaration => {
|
21
|
+
# "@pass" => [:pass],
|
22
|
+
# "@terminals" => ["@terminals"],
|
23
|
+
# },
|
24
|
+
# ...
|
25
|
+
# }
|
26
|
+
#
|
27
|
+
# In this case the `alt` rule is `seq ('|' seq)*` can happen when any of the specified tokens appears on the input stream. The all cause the same token to be passed to the `seq` rule and follow with `_alt_1`, which handles the `('|' seq)*` portion of the rule, after the first sequence is matched.
|
28
|
+
#
|
29
|
+
# The `declaration` rule is `@terminals' | pass` using the `alt` primitive determining the production to run based on the terminal appearing on the input stream. Eventually, a terminal production is found and the token is consumed.
|
30
|
+
#
|
31
|
+
### First/Follow Table
|
32
|
+
#
|
33
|
+
# The [First/Follow][] table is a hash mapping production rules to the terminals that may proceed or follow the rule. For example:
|
34
|
+
#
|
35
|
+
# FIRST = {
|
36
|
+
# :alt => [
|
37
|
+
# :HEX,
|
38
|
+
# :SYMBOL,
|
39
|
+
# :RANGE,
|
40
|
+
# :O_RANGE,
|
41
|
+
# :STRING1,
|
42
|
+
# :STRING2,
|
43
|
+
# "("],
|
44
|
+
# ...
|
45
|
+
# }
|
46
|
+
#
|
47
|
+
### Terminals Table
|
48
|
+
#
|
49
|
+
# This table is a simple list of the terminal productions found in the grammar. For example:
|
50
|
+
#
|
51
|
+
# TERMINALS = ["(", ")", "-",
|
52
|
+
# "@pass", "@terminals",
|
53
|
+
# :HEX, :LHS, :O_RANGE,:POSTFIX,
|
54
|
+
# :RANGE, :STRING1, :STRING2, :SYMBOL,"|"
|
55
|
+
# ].freeze
|
56
|
+
#
|
57
|
+
### Cleanup Table
|
58
|
+
#
|
59
|
+
# This table identifies productions which used EBNF rules, which are transformed to BNF for actual parsing. This allows the parser, in some cases, to reproduce *star*, *plus*, and *opt* rule matches. For example:
|
60
|
+
#
|
61
|
+
# CLEANUP = {
|
62
|
+
# :_alt_1 => :star,
|
63
|
+
# :_alt_3 => :merge,
|
64
|
+
# :_diff_1 => :opt,
|
65
|
+
# :ebnf => :star,
|
66
|
+
# :_ebnf_2 => :merge,
|
67
|
+
# :_postfix_1 => :opt,
|
68
|
+
# :seq => :plus,
|
69
|
+
# :_seq_1 => :star,
|
70
|
+
# :_seq_2 => :merge,
|
71
|
+
# }.freeze
|
72
|
+
#
|
73
|
+
# In this case the `ebnf` rule was `(declaration | rule)*`. As BNF does not support a star operator, this is decomposed into a set of rules using `alt` and `seq` primitives:
|
74
|
+
#
|
75
|
+
# ebnf ::= _empty _ebnf_2
|
76
|
+
# _ebnf_1 ::= declaration | rule
|
77
|
+
# _ebnf_2 ::= _ebnf_1 ebnf
|
78
|
+
# _ebnf_3 ::= ebnf
|
79
|
+
#
|
80
|
+
# The `_empty` production matches an empty string, so allows for now value. `_ebnf_2` matches `declaration | rule` (using the `alt` primitive) followed by `ebnf`, creating a sequence of zero or more `declaration` or `alt` members.
|
81
|
+
#
|
82
|
+
# [First/Follow]: https://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
|
83
|
+
|
2
84
|
module LL1
|
3
85
|
autoload :Lexer, "ebnf/ll1/lexer"
|
4
86
|
autoload :Parser, "ebnf/ll1/parser"
|
@@ -51,8 +133,40 @@ module EBNF
|
|
51
133
|
##
|
52
134
|
# Create first/follow for each rule using techniques defined for LL(1) parsers.
|
53
135
|
#
|
136
|
+
# This takes rules which have transformed into BNF and adds first/follow and otehr information to the rules to allow the generation of metadata tables used for driving a parser.
|
137
|
+
#
|
138
|
+
# Given an initial rule in EBNF:
|
139
|
+
#
|
140
|
+
# (rule enbf "1" (star declaration rule))
|
141
|
+
#
|
142
|
+
# The BNF transformation becomes:
|
143
|
+
#
|
144
|
+
# (rule ebnf "1" (alt _empty _ebnf_2))
|
145
|
+
# (rule _ebnf_1 "1.1" (alt declaration rule))
|
146
|
+
# (rule _ebnf_2 "1.2" (seq _ebnf_1 ebnf))
|
147
|
+
# (rule _ebnf_3 "1.3" (seq ebnf))
|
148
|
+
#
|
149
|
+
# After running this method, the rules are annotated with first/follow and cleanup rules:
|
150
|
+
#
|
151
|
+
# (rule ebnf "1"
|
152
|
+
# (start #t)
|
153
|
+
# (first "@pass" "@terminals" LHS _eps)
|
154
|
+
# (follow _eof)
|
155
|
+
# (cleanup star)
|
156
|
+
# (alt _empty _ebnf_2))
|
157
|
+
# (rule _ebnf_1 "1.1"
|
158
|
+
# (first "@pass" "@terminals" LHS)
|
159
|
+
# (follow "@pass" "@terminals" LHS _eof)
|
160
|
+
# (alt declaration rule))
|
161
|
+
# (rule _ebnf_2 "1.2"
|
162
|
+
# (first "@pass" "@terminals" LHS)
|
163
|
+
# (follow _eof)
|
164
|
+
# (cleanup merge)
|
165
|
+
# (seq _ebnf_1 ebnf))
|
166
|
+
# (rule _ebnf_3 "1.3" (first "@pass" "@terminals" LHS _eps) (follow _eof) (seq ebnf))
|
167
|
+
#
|
54
168
|
# @return [EBNF] self
|
55
|
-
# @see
|
169
|
+
# @see https://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
|
56
170
|
# @param [Array<Symbol>] starts
|
57
171
|
# Set of symbols which are start rules
|
58
172
|
def first_follow(*starts)
|
@@ -96,8 +210,9 @@ module EBNF
|
|
96
210
|
firsts, follows = 0, 0
|
97
211
|
# add Fi(wi) to Fi(Ai) for every rule Ai → wi
|
98
212
|
#
|
99
|
-
# For sequences, this is the first rule in the sequence.
|
100
|
-
# For alts, this is every rule in the sequence
|
213
|
+
# * For sequences, this is the first rule in the sequence.
|
214
|
+
# * For alts, this is every rule in the sequence
|
215
|
+
# * Other rules don't matter, as they don't appear in strict BNF
|
101
216
|
each(:rule) do |ai|
|
102
217
|
# Fi(a w' ) = { a } for every terminal a
|
103
218
|
ai.terminals(ast).each do |t|
|
@@ -276,6 +391,19 @@ module EBNF
|
|
276
391
|
end
|
277
392
|
end
|
278
393
|
|
394
|
+
##
|
395
|
+
# Output Ruby parser files for LL(1) parsing
|
396
|
+
#
|
397
|
+
# @param [IO, StringIO] output
|
398
|
+
def to_ruby_ll1(output, **options)
|
399
|
+
self.outputTable(output, 'BRANCH', self.branch, 1)
|
400
|
+
self.outputTable(output, 'TERMINALS', self.terminals, 1)
|
401
|
+
self.outputTable(output, 'FIRST', self.first, 1)
|
402
|
+
self.outputTable(output, 'FOLLOW', self.follow, 1)
|
403
|
+
self.outputTable(output, 'CLEANUP', self.cleanup, 1)
|
404
|
+
self.outputTable(output, 'PASS', [self.pass], 1) if self.pass
|
405
|
+
end
|
406
|
+
|
279
407
|
private
|
280
408
|
def do_production(lhs)
|
281
409
|
rule = find_rule(lhs)
|
data/lib/ebnf/ll1/lexer.rb
CHANGED
@@ -29,7 +29,7 @@ module EBNF::LL1
|
|
29
29
|
# warn error.inspect
|
30
30
|
# end
|
31
31
|
#
|
32
|
-
# @see
|
32
|
+
# @see https://en.wikipedia.org/wiki/Lexical_analysis
|
33
33
|
class Lexer
|
34
34
|
include Enumerable
|
35
35
|
|
@@ -43,10 +43,10 @@ module EBNF::LL1
|
|
43
43
|
"\\'" => '\'', # \u0027 (apostrophe-quote, single quote mark)
|
44
44
|
'\\\\' => '\\' # \u005C (backslash)
|
45
45
|
}.freeze
|
46
|
-
ESCAPE_CHAR4 = /\\u(?:[0-9A-Fa-f]{4,4})
|
47
|
-
ESCAPE_CHAR8 = /\\U(?:[0-9A-Fa-f]{8,8})
|
48
|
-
ECHAR = /\\./
|
49
|
-
UCHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}
|
46
|
+
ESCAPE_CHAR4 = /\\u(?:[0-9A-Fa-f]{4,4})/u.freeze # \uXXXX
|
47
|
+
ESCAPE_CHAR8 = /\\U(?:[0-9A-Fa-f]{8,8})/u.freeze # \UXXXXXXXX
|
48
|
+
ECHAR = /\\./u.freeze # More liberal unescaping
|
49
|
+
UCHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}/n.freeze
|
50
50
|
|
51
51
|
##
|
52
52
|
# @return [Regexp] defines whitespace, including comments, otherwise whitespace must be explicit in terminals
|
@@ -59,7 +59,7 @@ module EBNF::LL1
|
|
59
59
|
#
|
60
60
|
# @param [String] string
|
61
61
|
# @return [String]
|
62
|
-
# @see
|
62
|
+
# @see https://www.w3.org/TR/rdf-sparql-query/#codepointEscape
|
63
63
|
def self.unescape_codepoints(string)
|
64
64
|
string = string.dup
|
65
65
|
string.force_encoding(Encoding::ASCII_8BIT) if string.respond_to?(:force_encoding)
|
@@ -81,7 +81,7 @@ module EBNF::LL1
|
|
81
81
|
#
|
82
82
|
# @param [String] input
|
83
83
|
# @return [String]
|
84
|
-
# @see
|
84
|
+
# @see https://www.w3.org/TR/rdf-sparql-query/#grammarEscapes
|
85
85
|
def self.unescape_string(input)
|
86
86
|
input.gsub(ECHAR) { |escaped| ESCAPE_CHARS[escaped] || escaped[1..-1]}
|
87
87
|
end
|
@@ -131,7 +131,6 @@ module EBNF::LL1
|
|
131
131
|
|
132
132
|
raise Error, "Terminal patterns not defined" unless @terminals && @terminals.length > 0
|
133
133
|
|
134
|
-
@lineno = 1
|
135
134
|
@scanner = Scanner.new(input, **options)
|
136
135
|
end
|
137
136
|
|
@@ -147,12 +146,6 @@ module EBNF::LL1
|
|
147
146
|
# @return [String]
|
148
147
|
attr_accessor :input
|
149
148
|
|
150
|
-
##
|
151
|
-
# The current line number (zero-based).
|
152
|
-
#
|
153
|
-
# @return [Integer]
|
154
|
-
attr_reader :lineno
|
155
|
-
|
156
149
|
##
|
157
150
|
# Returns `true` if the input string is lexically valid.
|
158
151
|
#
|
@@ -194,7 +187,7 @@ module EBNF::LL1
|
|
194
187
|
|
195
188
|
@first ||= begin
|
196
189
|
{} while !scanner.eos? && skip_whitespace
|
197
|
-
return
|
190
|
+
return nil if scanner.eos?
|
198
191
|
|
199
192
|
token = match_token(*types)
|
200
193
|
|
@@ -233,7 +226,7 @@ module EBNF::LL1
|
|
233
226
|
# @return [Token]
|
234
227
|
def recover(*types)
|
235
228
|
until scanner.eos? || tok = match_token(*types)
|
236
|
-
if scanner.skip_until(@whitespace || /\s
|
229
|
+
if scanner.skip_until(@whitespace || /\s+/m).nil? # Skip past current "token"
|
237
230
|
# No whitespace at the end, must be and end of string
|
238
231
|
scanner.terminate
|
239
232
|
else
|
@@ -243,6 +236,14 @@ module EBNF::LL1
|
|
243
236
|
scanner.unscan if tok
|
244
237
|
first
|
245
238
|
end
|
239
|
+
|
240
|
+
##
|
241
|
+
# The current line number (one-based).
|
242
|
+
#
|
243
|
+
# @return [Integer]
|
244
|
+
def lineno
|
245
|
+
scanner.lineno
|
246
|
+
end
|
246
247
|
protected
|
247
248
|
|
248
249
|
# @return [StringScanner]
|
@@ -253,9 +254,7 @@ module EBNF::LL1
|
|
253
254
|
def skip_whitespace
|
254
255
|
# skip all white space, but keep track of the current line number
|
255
256
|
while @whitespace && !scanner.eos?
|
256
|
-
|
257
|
-
@lineno += matched.count("\n")
|
258
|
-
else
|
257
|
+
unless scanner.scan(@whitespace)
|
259
258
|
return
|
260
259
|
end
|
261
260
|
end
|
@@ -281,7 +280,6 @@ module EBNF::LL1
|
|
281
280
|
if matched = scanner.scan(term.regexp)
|
282
281
|
#STDERR.puts " matched #{term.type.inspect}: #{matched.inspect}"
|
283
282
|
tok = token(term.type, term.canonicalize(matched))
|
284
|
-
@lineno += matched.count("\n")
|
285
283
|
return tok
|
286
284
|
end
|
287
285
|
end
|
@@ -372,7 +370,7 @@ module EBNF::LL1
|
|
372
370
|
# token.type #=> :LANGTAG
|
373
371
|
# token.value #=> "en"
|
374
372
|
#
|
375
|
-
# @see
|
373
|
+
# @see https://en.wikipedia.org/wiki/Lexical_analysis#Token
|
376
374
|
class Token
|
377
375
|
##
|
378
376
|
# The token's symbol type.
|
@@ -493,7 +491,7 @@ module EBNF::LL1
|
|
493
491
|
# "invalid token '%' on line 10",
|
494
492
|
# input: query, token: '%', lineno: 9)
|
495
493
|
#
|
496
|
-
# @see
|
494
|
+
# @see https://ruby-doc.org/core/classes/StandardError.html
|
497
495
|
class Error < StandardError
|
498
496
|
##
|
499
497
|
# The input string associated with the error.
|
data/lib/ebnf/ll1/parser.rb
CHANGED
@@ -3,12 +3,52 @@ require 'ebnf/ll1/lexer'
|
|
3
3
|
module EBNF::LL1
|
4
4
|
##
|
5
5
|
# A Generic LL1 parser using a lexer and branch tables defined using the SWAP tool chain (modified).
|
6
|
+
#
|
7
|
+
# # Creating terminal definitions and parser rules to parse generated grammars
|
8
|
+
#
|
9
|
+
# The parser is initialized to callbacks invoked on entry and exit
|
10
|
+
# to each `terminal` and `production`. A trivial parser loop can be described as follows:
|
11
|
+
#
|
12
|
+
# require 'ebnf/ll1/parser'
|
13
|
+
# require 'meta'
|
14
|
+
#
|
15
|
+
# class Parser
|
16
|
+
# include Meta
|
17
|
+
# include EBNF::LL1::Parser
|
18
|
+
#
|
19
|
+
# terminal(:SYMBOL, /([a-z]|[A-Z]|[0-9]|_)+/) do |prod, token, input|
|
20
|
+
# # Add data based on scanned token to input
|
21
|
+
# input[:symbol] = token.value
|
22
|
+
# end
|
23
|
+
#
|
24
|
+
# start_production(:rule) do |input, current, callback|
|
25
|
+
# # Process on start of production
|
26
|
+
# # Set state for entry into recursed rules through current
|
27
|
+
#
|
28
|
+
# # Callback to parser loop with callback
|
29
|
+
# end
|
30
|
+
#
|
31
|
+
# production(:rule) do |input, current, callback|
|
32
|
+
# # Process on end of production
|
33
|
+
# # return results in input, retrieve results from recursed rules in current
|
34
|
+
#
|
35
|
+
# # Callback to parser loop with callback
|
36
|
+
# end
|
37
|
+
#
|
38
|
+
# def initialize(input)
|
39
|
+
# parse(input, start_symbol,
|
40
|
+
# branch: BRANCH,
|
41
|
+
# first: FIRST,
|
42
|
+
# follow: FOLLOW,
|
43
|
+
# cleanup: CLEANUP
|
44
|
+
# ) do |context, *data|
|
45
|
+
# # Process calls from callback from productions
|
46
|
+
#
|
47
|
+
# rescue ArgumentError, RDF::LL1::Parser::Error => e
|
48
|
+
# progress("Parsing completed with errors:\n\t#{e.message}")
|
49
|
+
# raise RDF::ReaderError, e.message if validate?
|
50
|
+
# end
|
6
51
|
module Parser
|
7
|
-
##
|
8
|
-
# @private
|
9
|
-
# level above which debug messages are supressed
|
10
|
-
DEBUG_LEVEL = 10
|
11
|
-
|
12
52
|
##
|
13
53
|
# @return [Integer] line number of current token
|
14
54
|
attr_reader :lineno
|
@@ -186,7 +226,7 @@ module EBNF::LL1
|
|
186
226
|
# def each_statement(&block)
|
187
227
|
# @callback = block
|
188
228
|
#
|
189
|
-
# parse(START.to_sym) do |context, *data|
|
229
|
+
# parse(input, START.to_sym) do |context, *data|
|
190
230
|
# case context
|
191
231
|
# when :statement
|
192
232
|
# yield *data
|
@@ -205,16 +245,13 @@ module EBNF::LL1
|
|
205
245
|
# Lists valid terminals that can precede each production (for error recovery).
|
206
246
|
# @option options [Hash{Symbol,String => Array<Symbol,String>}] :follow ({})
|
207
247
|
# Lists valid terminals that can follow each production (for error recovery).
|
208
|
-
# @option options [Boolean] :validate (false)
|
209
|
-
# whether to validate the parsed statements and values. If not validating, the parser will attempt to recover from errors.
|
210
|
-
# @option options [Boolean] :progress
|
211
|
-
# Show progress of parser productions
|
212
|
-
# @option options [Boolean] :debug
|
213
|
-
# Detailed debug output
|
214
|
-
# @option options [Boolean] :reset_on_start
|
215
|
-
# Reset the parser state if the start token set with `prod` is found in a production. This reduces the production stack depth growth, which is appropriate for some grammars.
|
216
248
|
# @option options[Integer] :high_water passed to lexer
|
249
|
+
# @option options [Logger] :logger for errors/progress/debug.
|
217
250
|
# @option options[Integer] :low_water passed to lexer
|
251
|
+
# @option options [Boolean] :reset_on_start
|
252
|
+
# Reset the parser state if the start token set with `prod` is found in a production. This reduces the production stack depth growth, which is appropriate for some grammars.
|
253
|
+
# @option options [Boolean] :validate (false)
|
254
|
+
# whether to validate the parsed statements and values. If not validating, the parser will attempt to recover from errors.
|
218
255
|
# @yield [context, *data]
|
219
256
|
# Yields for to return data to parser
|
220
257
|
# @yieldparam [:statement, :trace] context
|
@@ -225,13 +262,9 @@ module EBNF::LL1
|
|
225
262
|
# @raise [Exception] Raises exceptions for parsing errors
|
226
263
|
# or errors raised during processing callbacks. Internal
|
227
264
|
# errors are raised using {Error}.
|
228
|
-
# @see
|
265
|
+
# @see https://cs.adelaide.edu.au/~charles/lt/Lectures/07-ErrorRecovery.pdf
|
229
266
|
def parse(input = nil, start = nil, **options, &block)
|
230
267
|
@options = options.dup
|
231
|
-
@options[:debug] ||= case
|
232
|
-
when @options[:progress] then 2
|
233
|
-
when @options[:validate] then 1
|
234
|
-
end
|
235
268
|
@branch = options[:branch]
|
236
269
|
@first = options[:first] ||= {}
|
237
270
|
@follow = options[:follow] ||= {}
|
@@ -356,9 +389,9 @@ module EBNF::LL1
|
|
356
389
|
end
|
357
390
|
|
358
391
|
# Get the list of follows for this sequence, this production and the stacked productions.
|
359
|
-
debug("recovery", "stack follows:"
|
392
|
+
debug("recovery", "stack follows:")
|
360
393
|
todo_stack.reverse.each do |todo|
|
361
|
-
debug("recovery"
|
394
|
+
debug("recovery") {" #{todo[:prod]}: #{@follow[todo[:prod]].inspect}"}
|
362
395
|
end
|
363
396
|
|
364
397
|
# Find all follows to the top of the stack
|
@@ -466,14 +499,15 @@ module EBNF::LL1
|
|
466
499
|
protected
|
467
500
|
|
468
501
|
##
|
469
|
-
# Error information, used as level `
|
502
|
+
# Error information, used as level `3` logger messages.
|
503
|
+
# Messages may be logged and are saved for reporting at end of parsing.
|
470
504
|
#
|
471
505
|
# @param [String] node Relevant location associated with message
|
472
506
|
# @param [String] message Error string
|
473
|
-
# @param [Hash] options
|
507
|
+
# @param [Hash{Symbol => Object}] options
|
474
508
|
# @option options [URI, #to_s] :production
|
475
509
|
# @option options [Token] :token
|
476
|
-
# @see
|
510
|
+
# @see #debug
|
477
511
|
def error(node, message, **options)
|
478
512
|
lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
|
479
513
|
m = "ERROR "
|
@@ -483,83 +517,82 @@ module EBNF::LL1
|
|
483
517
|
m += ", production = #{options[:production].inspect}" if options[:production]
|
484
518
|
@error_log << m unless @recovering
|
485
519
|
@recovering = true
|
486
|
-
debug(node, m, level:
|
520
|
+
debug(node, m, level: options.fetch(:level, 3), **options)
|
487
521
|
if options[:raise] || @options[:validate]
|
488
522
|
raise Error.new(m, lineno: lineno, token: options[:token], production: options[:production])
|
489
523
|
end
|
490
524
|
end
|
491
525
|
|
492
526
|
##
|
493
|
-
# Warning information, used as level `
|
527
|
+
# Warning information, used as level `2` logger messages.
|
528
|
+
# Messages may be logged and are saved for reporting at end of parsing.
|
494
529
|
#
|
495
530
|
# @param [String] node Relevant location associated with message
|
496
531
|
# @param [String] message Error string
|
497
532
|
# @param [Hash] options
|
498
533
|
# @option options [URI, #to_s] :production
|
499
534
|
# @option options [Token] :token
|
500
|
-
# @see
|
535
|
+
# @see #debug
|
501
536
|
def warn(node, message, **options)
|
537
|
+
lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
|
502
538
|
m = "WARNING "
|
503
|
-
m += "[line: #{
|
539
|
+
m += "[line: #{lineno}] " if lineno
|
504
540
|
m += message
|
505
541
|
m += " (found #{options[:token].inspect})" if options[:token]
|
506
542
|
m += ", production = #{options[:production].inspect}" if options[:production]
|
507
543
|
@error_log << m unless @recovering
|
508
|
-
debug(node, m, level:
|
544
|
+
debug(node, m, level: 2, lineno: lineno, **options)
|
509
545
|
end
|
510
546
|
|
511
547
|
##
|
512
|
-
# Progress
|
548
|
+
# Progress logged when parsing. Passed as level `1` logger messages.
|
513
549
|
#
|
514
|
-
#
|
550
|
+
# The call is ignored, unless `@options[:logger]` is set.
|
551
|
+
#
|
552
|
+
# @overload progress(node, message, **options, &block)
|
515
553
|
# @param [String] node Relevant location associated with message
|
516
554
|
# @param [String] message ("")
|
517
555
|
# @param [Hash] options
|
518
556
|
# @option options [Integer] :depth
|
519
557
|
# Recursion depth for indenting output
|
520
|
-
# @see
|
558
|
+
# @see #debug
|
521
559
|
def progress(node, *args, &block)
|
522
|
-
return unless @options[:
|
560
|
+
return unless @options[:logger]
|
561
|
+
lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
|
523
562
|
args << {} unless args.last.is_a?(Hash)
|
524
|
-
args.last[:level] ||=
|
563
|
+
args.last[:level] ||= 1
|
564
|
+
args.last[:lineno] ||= lineno
|
525
565
|
debug(node, *args, &block)
|
526
566
|
end
|
527
567
|
|
528
568
|
##
|
529
|
-
#
|
569
|
+
# Debug logging.
|
530
570
|
#
|
531
|
-
# The call is ignored, unless `@options[:
|
532
|
-
# case it yields tracing information as indicated. Additionally,
|
533
|
-
# if `@options[:debug]` is an Integer, the call is aborted if the
|
534
|
-
# `:level` option is less than than `:level`.
|
571
|
+
# The call is ignored, unless `@options[:logger]` is set.
|
535
572
|
#
|
536
573
|
# @overload debug(node, message, **options)
|
537
574
|
# @param [Array<String>] args Relevant location associated with message
|
538
575
|
# @param [Hash] options
|
539
576
|
# @option options [Integer] :depth
|
540
577
|
# Recursion depth for indenting output
|
541
|
-
# @
|
542
|
-
|
543
|
-
|
544
|
-
# progress information, and anything higher is for various levels
|
545
|
-
# of debug information.
|
546
|
-
#
|
547
|
-
# @yield trace, level, lineno, depth, args
|
548
|
-
# @yieldparam [:trace] trace
|
549
|
-
# @yieldparam [Integer] level
|
550
|
-
# @yieldparam [Integer] lineno
|
551
|
-
# @yieldparam [Integer] depth Recursive depth of productions
|
552
|
-
# @yieldparam [Array<String>] args
|
553
|
-
# @yieldreturn [String] added to message
|
554
|
-
def debug(*args)
|
555
|
-
return unless @options[:debug] && @parse_callback
|
578
|
+
# @yieldreturn [String] additional string appended to `message`.
|
579
|
+
def debug(*args, &block)
|
580
|
+
return unless @options[:logger]
|
556
581
|
options = args.last.is_a?(Hash) ? args.pop : {}
|
557
|
-
|
558
|
-
|
559
|
-
|
582
|
+
lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
|
583
|
+
level = options.fetch(:level, 0)
|
560
584
|
depth = options[:depth] || self.depth
|
561
|
-
|
562
|
-
|
585
|
+
|
586
|
+
if self.respond_to?(:log_debug)
|
587
|
+
level = [:debug, :info, :warn, :error, :fatal][level]
|
588
|
+
log_debug(*args, **options.merge(level: level, lineno: lineno, depth: depth), &block)
|
589
|
+
elsif @options[:logger].respond_to?(:add)
|
590
|
+
args << yield if block_given?
|
591
|
+
@options[:logger].add(level, "[#{lineno}]" + (" " * depth) + args.join(" "))
|
592
|
+
elsif @options[:logger].respond_to?(:<<)
|
593
|
+
args << yield if block_given?
|
594
|
+
@options[:logger] << "[#{lineno}]" + (" " * depth) + args.join(" ")
|
595
|
+
end
|
563
596
|
end
|
564
597
|
|
565
598
|
private
|
@@ -570,7 +603,7 @@ module EBNF::LL1
|
|
570
603
|
if handler
|
571
604
|
# Create a new production data element, potentially allowing handler
|
572
605
|
# to customize before pushing on the @prod_data stack
|
573
|
-
|
606
|
+
debug("#{prod}(:start):#{@prod_data.length}") {@prod_data.last}
|
574
607
|
data = {}
|
575
608
|
begin
|
576
609
|
self.class.eval_with_binding(self) {
|
@@ -584,12 +617,12 @@ module EBNF::LL1
|
|
584
617
|
elsif [:merge, :star].include?(@cleanup[prod])
|
585
618
|
# Save current data to merge later
|
586
619
|
@prod_data << {}
|
587
|
-
|
620
|
+
debug("#{prod}(:start}:#{@prod_data.length}:cleanup:#{@cleanup[prod]}") { get_token.inspect + (@recovering ? ' recovering' : '')}
|
588
621
|
else
|
589
622
|
# Make sure we push as many was we pop, even if there is no
|
590
623
|
# explicit start handler
|
591
624
|
@prod_data << {} if self.class.production_handlers[prod]
|
592
|
-
|
625
|
+
debug("#{prod}(:start:#{@prod_data.length})") { get_token.inspect + (@recovering ? ' recovering' : '')}
|
593
626
|
end
|
594
627
|
#puts "prod_data(s): " + @prod_data.inspect
|
595
628
|
end
|
@@ -623,7 +656,7 @@ module EBNF::LL1
|
|
623
656
|
else Array(input[k]) + Array(v)
|
624
657
|
end
|
625
658
|
end
|
626
|
-
|
659
|
+
debug("#{prod}(:finish):#{@prod_data.length} cleanup:#{@cleanup[prod]}") {@prod_data.last}
|
627
660
|
else
|
628
661
|
progress("#{prod}(:finish):#{@prod_data.length}") { "recovering" if @recovering }
|
629
662
|
end
|
@@ -730,7 +763,7 @@ module EBNF::LL1
|
|
730
763
|
# "invalid token '%' on line 10",
|
731
764
|
# token: '%', lineno: 9, production: :turtleDoc)
|
732
765
|
#
|
733
|
-
# @see
|
766
|
+
# @see https://ruby-doc.org/core/classes/StandardError.html
|
734
767
|
class Error < StandardError
|
735
768
|
##
|
736
769
|
# The current production.
|