ebnf 1.2.0 → 2.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +223 -199
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/bin/ebnf +38 -19
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/doap.ttl +23 -18
- data/etc/ebnf.ebnf +21 -33
- data/etc/ebnf.html +76 -160
- data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
- data/etc/ebnf.ll1.sxp +182 -183
- data/etc/ebnf.peg.rb +90 -0
- data/etc/ebnf.peg.sxp +84 -0
- data/etc/ebnf.sxp +40 -41
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.html +1603 -1751
- data/etc/sparql.ll1.sxp +7372 -7372
- data/etc/sparql.peg.rb +532 -0
- data/etc/sparql.peg.sxp +597 -0
- data/etc/sparql.sxp +363 -362
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.html +465 -517
- data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
- data/etc/turtle.ll1.sxp +425 -425
- data/etc/turtle.peg.rb +182 -0
- data/etc/turtle.peg.sxp +199 -0
- data/etc/turtle.sxp +103 -101
- data/lib/ebnf.rb +6 -1
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +114 -69
- data/lib/ebnf/bnf.rb +1 -26
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +131 -3
- data/lib/ebnf/ll1/lexer.rb +20 -22
- data/lib/ebnf/ll1/parser.rb +97 -64
- data/lib/ebnf/ll1/scanner.rb +82 -50
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +39 -0
- data/lib/ebnf/peg/parser.rb +561 -0
- data/lib/ebnf/peg/rule.rb +250 -0
- data/lib/ebnf/rule.rb +442 -148
- data/lib/ebnf/terminals.rb +21 -0
- data/lib/ebnf/writer.rb +587 -82
- metadata +125 -18
- data/etc/sparql.rb +0 -45773
@@ -0,0 +1,75 @@
|
|
1
|
+
# This file is automatically generated by ebnf version 2.0.0
|
2
|
+
# Derived from etc/iso-ebnf.ebnf
|
3
|
+
module ISOEBNFMeta
|
4
|
+
RULES = [
|
5
|
+
EBNF::Rule.new(:syntax, nil, [:star, :syntax_rule]).extend(EBNF::PEG::Rule),
|
6
|
+
EBNF::Rule.new(:syntax_rule, nil, [:seq, :meta_identifier, :defining_symbol, :definitions_list, :terminator_symbol]).extend(EBNF::PEG::Rule),
|
7
|
+
EBNF::Rule.new(:definitions_list, nil, [:seq, :single_definition, :_definitions_list_1]).extend(EBNF::PEG::Rule),
|
8
|
+
EBNF::Rule.new(:_definitions_list_1, nil, [:star, :_definitions_list_2]).extend(EBNF::PEG::Rule),
|
9
|
+
EBNF::Rule.new(:_definitions_list_2, nil, [:seq, :definition_separator_symbol, :definitions_list]).extend(EBNF::PEG::Rule),
|
10
|
+
EBNF::Rule.new(:single_definition, nil, [:seq, :term, :_single_definition_1]).extend(EBNF::PEG::Rule),
|
11
|
+
EBNF::Rule.new(:_single_definition_1, nil, [:star, :_single_definition_2]).extend(EBNF::PEG::Rule),
|
12
|
+
EBNF::Rule.new(:_single_definition_2, nil, [:seq, ",", :term]).extend(EBNF::PEG::Rule),
|
13
|
+
EBNF::Rule.new(:term, nil, [:seq, :factor, :_term_1]).extend(EBNF::PEG::Rule),
|
14
|
+
EBNF::Rule.new(:_term_1, nil, [:opt, :_term_2]).extend(EBNF::PEG::Rule),
|
15
|
+
EBNF::Rule.new(:_term_2, nil, [:seq, "-", :exception]).extend(EBNF::PEG::Rule),
|
16
|
+
EBNF::Rule.new(:exception, nil, [:seq, :factor]).extend(EBNF::PEG::Rule),
|
17
|
+
EBNF::Rule.new(:factor, nil, [:seq, :_factor_1, :primary]).extend(EBNF::PEG::Rule),
|
18
|
+
EBNF::Rule.new(:_factor_1, nil, [:opt, :_factor_2]).extend(EBNF::PEG::Rule),
|
19
|
+
EBNF::Rule.new(:_factor_2, nil, [:seq, :integer, "*"]).extend(EBNF::PEG::Rule),
|
20
|
+
EBNF::Rule.new(:primary, nil, [:alt, :optional_sequence, :repeated_sequence, :special_sequence, :grouped_sequence, :meta_identifier, :terminal_string, :empty]).extend(EBNF::PEG::Rule),
|
21
|
+
EBNF::Rule.new(:optional_sequence, nil, [:seq, :start_option_symbol, :definitions_list, :end_option_symbol]).extend(EBNF::PEG::Rule),
|
22
|
+
EBNF::Rule.new(:repeated_sequence, nil, [:seq, :start_repeat_symbol, :definitions_list, :end_repeat_symbol]).extend(EBNF::PEG::Rule),
|
23
|
+
EBNF::Rule.new(:grouped_sequence, nil, [:seq, "(", :definitions_list, ")"]).extend(EBNF::PEG::Rule),
|
24
|
+
EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule),
|
25
|
+
EBNF::Rule.new(:terminal_string, nil, [:alt, :_terminal_string_1, :_terminal_string_2], kind: :terminal).extend(EBNF::PEG::Rule),
|
26
|
+
EBNF::Rule.new(:_terminal_string_1, nil, [:seq, "'", :_terminal_string_3, "'"]).extend(EBNF::PEG::Rule),
|
27
|
+
EBNF::Rule.new(:_terminal_string_3, nil, [:plus, :first_terminal_character]).extend(EBNF::PEG::Rule),
|
28
|
+
EBNF::Rule.new(:_terminal_string_2, nil, [:seq, "\"", :_terminal_string_4, "\""]).extend(EBNF::PEG::Rule),
|
29
|
+
EBNF::Rule.new(:_terminal_string_4, nil, [:plus, :second_terminal_character]).extend(EBNF::PEG::Rule),
|
30
|
+
EBNF::Rule.new(:meta_identifier, nil, [:seq, :letter, :_meta_identifier_1], kind: :terminal).extend(EBNF::PEG::Rule),
|
31
|
+
EBNF::Rule.new(:_meta_identifier_1, nil, [:star, :meta_identifier_character]).extend(EBNF::PEG::Rule),
|
32
|
+
EBNF::Rule.new(:integer, nil, [:plus, :decimal_digit], kind: :terminal).extend(EBNF::PEG::Rule),
|
33
|
+
EBNF::Rule.new(:special_sequence, nil, [:seq, "?", :_special_sequence_1, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
|
34
|
+
EBNF::Rule.new(:_special_sequence_1, nil, [:star, :special_sequence_character]).extend(EBNF::PEG::Rule),
|
35
|
+
EBNF::Rule.new(:comment, nil, [:seq, :start_comment_symbol, :_comment_1, :end_comment_symbol], kind: :terminal).extend(EBNF::PEG::Rule),
|
36
|
+
EBNF::Rule.new(:_comment_1, nil, [:star, :comment_symbol]).extend(EBNF::PEG::Rule),
|
37
|
+
EBNF::Rule.new(:comment_symbol, nil, [:alt, :comment, :commentless_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
|
38
|
+
EBNF::Rule.new(:commentless_symbol, nil, [:alt, :terminal_character, :meta_identifier, :integer, :terminal_string, :special_sequence], kind: :terminal).extend(EBNF::PEG::Rule),
|
39
|
+
EBNF::Rule.new(:letter, nil, [:range, "a-zA-Z"], kind: :terminal).extend(EBNF::PEG::Rule),
|
40
|
+
EBNF::Rule.new(:decimal_digit, nil, [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule),
|
41
|
+
EBNF::Rule.new(:meta_identifier_character, nil, [:alt, :letter, :decimal_digit, "_"], kind: :terminal).extend(EBNF::PEG::Rule),
|
42
|
+
EBNF::Rule.new(:first_terminal_character, nil, [:diff, :terminal_character, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
|
43
|
+
EBNF::Rule.new(:second_terminal_character, nil, [:diff, :terminal_character, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
|
44
|
+
EBNF::Rule.new(:special_sequence_character, nil, [:diff, :terminal_character, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
|
45
|
+
EBNF::Rule.new(:terminal_character, nil, [:alt, :letter, :decimal_digit, :concatenate_symbol, :defining_symbol, :definition_separator_symbol, :end_comment_symbol, :end_group_symbol, :end_option_symbol, :end_repeat_symbol, :except_symbol, :first_quote_symbol, :repetition_symbol, :second_quote_symbol, :special_sequence_symbol, :start_comment_symbol, :start_group_symbol, :start_option_symbol, :start_repeat_symbol, :terminator_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
|
46
|
+
EBNF::Rule.new(:other_character, nil, [:alt, :_other_character_1, "\\"], kind: :terminal).extend(EBNF::PEG::Rule),
|
47
|
+
EBNF::Rule.new(:_other_character_1, nil, [:range, ":+_%@&$<>^` ̃#x20#x23"], kind: :terminal).extend(EBNF::PEG::Rule),
|
48
|
+
EBNF::Rule.new(:gap_separator, nil, [:range, "#x9#xa#xb#xc#xd#x20"], kind: :terminal).extend(EBNF::PEG::Rule),
|
49
|
+
EBNF::Rule.new(:_pass, nil, [:alt, :__pass_1, :comment], kind: :pass).extend(EBNF::PEG::Rule),
|
50
|
+
EBNF::Rule.new(:__pass_1, nil, [:plus, :gap_separator]).extend(EBNF::PEG::Rule),
|
51
|
+
EBNF::Rule.new(:empty, nil, [:seq, ""], kind: :terminal).extend(EBNF::PEG::Rule),
|
52
|
+
EBNF::Rule.new(:concatenate_symbol, nil, [:seq, ","], kind: :terminal).extend(EBNF::PEG::Rule),
|
53
|
+
EBNF::Rule.new(:repetition_symbol, nil, [:seq, "*"], kind: :terminal).extend(EBNF::PEG::Rule),
|
54
|
+
EBNF::Rule.new(:except_symbol, nil, [:seq, "-"], kind: :terminal).extend(EBNF::PEG::Rule),
|
55
|
+
EBNF::Rule.new(:first_quote_symbol, nil, [:seq, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
|
56
|
+
EBNF::Rule.new(:second_quote_symbol, nil, [:seq, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
|
57
|
+
EBNF::Rule.new(:start_comment_symbol, nil, [:seq, "(*"], kind: :terminal).extend(EBNF::PEG::Rule),
|
58
|
+
EBNF::Rule.new(:end_comment_symbol, nil, [:seq, "*)"], kind: :terminal).extend(EBNF::PEG::Rule),
|
59
|
+
EBNF::Rule.new(:start_group_symbol, nil, [:seq, "("], kind: :terminal).extend(EBNF::PEG::Rule),
|
60
|
+
EBNF::Rule.new(:end_group_symbol, nil, [:seq, ")"], kind: :terminal).extend(EBNF::PEG::Rule),
|
61
|
+
EBNF::Rule.new(:special_sequence_symbol, nil, [:seq, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
|
62
|
+
EBNF::Rule.new(:defining_symbol, nil, [:alt, "=", ":"], kind: :terminal).extend(EBNF::PEG::Rule),
|
63
|
+
EBNF::Rule.new(:definition_separator_symbol, nil, [:alt, "|", "/", "!"], kind: :terminal).extend(EBNF::PEG::Rule),
|
64
|
+
EBNF::Rule.new(:terminator_symbol, nil, [:alt, ";", "."], kind: :terminal).extend(EBNF::PEG::Rule),
|
65
|
+
EBNF::Rule.new(:start_option_symbol, nil, [:seq, "["], kind: :terminal).extend(EBNF::PEG::Rule),
|
66
|
+
EBNF::Rule.new(:end_option_symbol, nil, [:seq, "]"], kind: :terminal).extend(EBNF::PEG::Rule),
|
67
|
+
EBNF::Rule.new(:start_repeat_symbol, nil, [:alt, "{", "(:"], kind: :terminal).extend(EBNF::PEG::Rule),
|
68
|
+
EBNF::Rule.new(:end_repeat_symbol, nil, [:alt, "}", ":)"], kind: :terminal).extend(EBNF::PEG::Rule),
|
69
|
+
EBNF::Rule.new(:gap_free_symbol, nil, [:alt, :_gap_free_symbol_1, :terminal_string], kind: :terminal).extend(EBNF::PEG::Rule),
|
70
|
+
EBNF::Rule.new(:_gap_free_symbol_1, nil, [:seq, :_gap_free_symbol_3, :terminal_character]).extend(EBNF::PEG::Rule),
|
71
|
+
EBNF::Rule.new(:_gap_free_symbol_3, nil, [:not, :_gap_free_symbol_2]).extend(EBNF::PEG::Rule),
|
72
|
+
EBNF::Rule.new(:_gap_free_symbol_2, nil, [:range, "'\""], kind: :terminal).extend(EBNF::PEG::Rule),
|
73
|
+
]
|
74
|
+
end
|
75
|
+
|
data/lib/ebnf/ll1.rb
CHANGED
@@ -1,4 +1,86 @@
|
|
1
1
|
module EBNF
|
2
|
+
##
|
3
|
+
# This module extends {EBNF::Base} to create metadata including _branch_, [First/Follow][], and other tables which is used by {EBNF::LL1::Parser} to recognize examples of the associated grammar.
|
4
|
+
#
|
5
|
+
### Branch Table
|
6
|
+
#
|
7
|
+
# The Branch table is a hash mapping production rules to a hash relating terminals appearing in input to sequence of productions to follow when the corresponding input terminal is found. This allows either the `seq` primitive, where all terminals map to the same sequence of productions, or the `alt` primitive, where each terminal may map to a different production.
|
8
|
+
#
|
9
|
+
# BRANCH = {
|
10
|
+
# :alt => {
|
11
|
+
# "(" => [:seq, :_alt_1],
|
12
|
+
# :HEX => [:seq, :_alt_1],
|
13
|
+
# :O_RANGE => [:seq, :_alt_1],
|
14
|
+
# :RANGE => [:seq, :_alt_1],
|
15
|
+
# :STRING1 => [:seq, :_alt_1],
|
16
|
+
# :STRING2 => [:seq, :_alt_1],
|
17
|
+
# :SYMBOL => [:seq, :_alt_1],
|
18
|
+
# },
|
19
|
+
# ...
|
20
|
+
# :declaration => {
|
21
|
+
# "@pass" => [:pass],
|
22
|
+
# "@terminals" => ["@terminals"],
|
23
|
+
# },
|
24
|
+
# ...
|
25
|
+
# }
|
26
|
+
#
|
27
|
+
# In this case the `alt` rule is `seq ('|' seq)*` can happen when any of the specified tokens appears on the input stream. The all cause the same token to be passed to the `seq` rule and follow with `_alt_1`, which handles the `('|' seq)*` portion of the rule, after the first sequence is matched.
|
28
|
+
#
|
29
|
+
# The `declaration` rule is `@terminals' | pass` using the `alt` primitive determining the production to run based on the terminal appearing on the input stream. Eventually, a terminal production is found and the token is consumed.
|
30
|
+
#
|
31
|
+
### First/Follow Table
|
32
|
+
#
|
33
|
+
# The [First/Follow][] table is a hash mapping production rules to the terminals that may proceed or follow the rule. For example:
|
34
|
+
#
|
35
|
+
# FIRST = {
|
36
|
+
# :alt => [
|
37
|
+
# :HEX,
|
38
|
+
# :SYMBOL,
|
39
|
+
# :RANGE,
|
40
|
+
# :O_RANGE,
|
41
|
+
# :STRING1,
|
42
|
+
# :STRING2,
|
43
|
+
# "("],
|
44
|
+
# ...
|
45
|
+
# }
|
46
|
+
#
|
47
|
+
### Terminals Table
|
48
|
+
#
|
49
|
+
# This table is a simple list of the terminal productions found in the grammar. For example:
|
50
|
+
#
|
51
|
+
# TERMINALS = ["(", ")", "-",
|
52
|
+
# "@pass", "@terminals",
|
53
|
+
# :HEX, :LHS, :O_RANGE,:POSTFIX,
|
54
|
+
# :RANGE, :STRING1, :STRING2, :SYMBOL,"|"
|
55
|
+
# ].freeze
|
56
|
+
#
|
57
|
+
### Cleanup Table
|
58
|
+
#
|
59
|
+
# This table identifies productions which used EBNF rules, which are transformed to BNF for actual parsing. This allows the parser, in some cases, to reproduce *star*, *plus*, and *opt* rule matches. For example:
|
60
|
+
#
|
61
|
+
# CLEANUP = {
|
62
|
+
# :_alt_1 => :star,
|
63
|
+
# :_alt_3 => :merge,
|
64
|
+
# :_diff_1 => :opt,
|
65
|
+
# :ebnf => :star,
|
66
|
+
# :_ebnf_2 => :merge,
|
67
|
+
# :_postfix_1 => :opt,
|
68
|
+
# :seq => :plus,
|
69
|
+
# :_seq_1 => :star,
|
70
|
+
# :_seq_2 => :merge,
|
71
|
+
# }.freeze
|
72
|
+
#
|
73
|
+
# In this case the `ebnf` rule was `(declaration | rule)*`. As BNF does not support a star operator, this is decomposed into a set of rules using `alt` and `seq` primitives:
|
74
|
+
#
|
75
|
+
# ebnf ::= _empty _ebnf_2
|
76
|
+
# _ebnf_1 ::= declaration | rule
|
77
|
+
# _ebnf_2 ::= _ebnf_1 ebnf
|
78
|
+
# _ebnf_3 ::= ebnf
|
79
|
+
#
|
80
|
+
# The `_empty` production matches an empty string, so allows for now value. `_ebnf_2` matches `declaration | rule` (using the `alt` primitive) followed by `ebnf`, creating a sequence of zero or more `declaration` or `alt` members.
|
81
|
+
#
|
82
|
+
# [First/Follow]: https://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
|
83
|
+
|
2
84
|
module LL1
|
3
85
|
autoload :Lexer, "ebnf/ll1/lexer"
|
4
86
|
autoload :Parser, "ebnf/ll1/parser"
|
@@ -51,8 +133,40 @@ module EBNF
|
|
51
133
|
##
|
52
134
|
# Create first/follow for each rule using techniques defined for LL(1) parsers.
|
53
135
|
#
|
136
|
+
# This takes rules which have transformed into BNF and adds first/follow and otehr information to the rules to allow the generation of metadata tables used for driving a parser.
|
137
|
+
#
|
138
|
+
# Given an initial rule in EBNF:
|
139
|
+
#
|
140
|
+
# (rule enbf "1" (star declaration rule))
|
141
|
+
#
|
142
|
+
# The BNF transformation becomes:
|
143
|
+
#
|
144
|
+
# (rule ebnf "1" (alt _empty _ebnf_2))
|
145
|
+
# (rule _ebnf_1 "1.1" (alt declaration rule))
|
146
|
+
# (rule _ebnf_2 "1.2" (seq _ebnf_1 ebnf))
|
147
|
+
# (rule _ebnf_3 "1.3" (seq ebnf))
|
148
|
+
#
|
149
|
+
# After running this method, the rules are annotated with first/follow and cleanup rules:
|
150
|
+
#
|
151
|
+
# (rule ebnf "1"
|
152
|
+
# (start #t)
|
153
|
+
# (first "@pass" "@terminals" LHS _eps)
|
154
|
+
# (follow _eof)
|
155
|
+
# (cleanup star)
|
156
|
+
# (alt _empty _ebnf_2))
|
157
|
+
# (rule _ebnf_1 "1.1"
|
158
|
+
# (first "@pass" "@terminals" LHS)
|
159
|
+
# (follow "@pass" "@terminals" LHS _eof)
|
160
|
+
# (alt declaration rule))
|
161
|
+
# (rule _ebnf_2 "1.2"
|
162
|
+
# (first "@pass" "@terminals" LHS)
|
163
|
+
# (follow _eof)
|
164
|
+
# (cleanup merge)
|
165
|
+
# (seq _ebnf_1 ebnf))
|
166
|
+
# (rule _ebnf_3 "1.3" (first "@pass" "@terminals" LHS _eps) (follow _eof) (seq ebnf))
|
167
|
+
#
|
54
168
|
# @return [EBNF] self
|
55
|
-
# @see
|
169
|
+
# @see https://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
|
56
170
|
# @param [Array<Symbol>] starts
|
57
171
|
# Set of symbols which are start rules
|
58
172
|
def first_follow(*starts)
|
@@ -96,8 +210,9 @@ module EBNF
|
|
96
210
|
firsts, follows = 0, 0
|
97
211
|
# add Fi(wi) to Fi(Ai) for every rule Ai → wi
|
98
212
|
#
|
99
|
-
# For sequences, this is the first rule in the sequence.
|
100
|
-
# For alts, this is every rule in the sequence
|
213
|
+
# * For sequences, this is the first rule in the sequence.
|
214
|
+
# * For alts, this is every rule in the sequence
|
215
|
+
# * Other rules don't matter, as they don't appear in strict BNF
|
101
216
|
each(:rule) do |ai|
|
102
217
|
# Fi(a w' ) = { a } for every terminal a
|
103
218
|
ai.terminals(ast).each do |t|
|
@@ -276,6 +391,19 @@ module EBNF
|
|
276
391
|
end
|
277
392
|
end
|
278
393
|
|
394
|
+
##
|
395
|
+
# Output Ruby parser files for LL(1) parsing
|
396
|
+
#
|
397
|
+
# @param [IO, StringIO] output
|
398
|
+
def to_ruby_ll1(output, **options)
|
399
|
+
self.outputTable(output, 'BRANCH', self.branch, 1)
|
400
|
+
self.outputTable(output, 'TERMINALS', self.terminals, 1)
|
401
|
+
self.outputTable(output, 'FIRST', self.first, 1)
|
402
|
+
self.outputTable(output, 'FOLLOW', self.follow, 1)
|
403
|
+
self.outputTable(output, 'CLEANUP', self.cleanup, 1)
|
404
|
+
self.outputTable(output, 'PASS', [self.pass], 1) if self.pass
|
405
|
+
end
|
406
|
+
|
279
407
|
private
|
280
408
|
def do_production(lhs)
|
281
409
|
rule = find_rule(lhs)
|
data/lib/ebnf/ll1/lexer.rb
CHANGED
@@ -29,7 +29,7 @@ module EBNF::LL1
|
|
29
29
|
# warn error.inspect
|
30
30
|
# end
|
31
31
|
#
|
32
|
-
# @see
|
32
|
+
# @see https://en.wikipedia.org/wiki/Lexical_analysis
|
33
33
|
class Lexer
|
34
34
|
include Enumerable
|
35
35
|
|
@@ -43,10 +43,10 @@ module EBNF::LL1
|
|
43
43
|
"\\'" => '\'', # \u0027 (apostrophe-quote, single quote mark)
|
44
44
|
'\\\\' => '\\' # \u005C (backslash)
|
45
45
|
}.freeze
|
46
|
-
ESCAPE_CHAR4 = /\\u(?:[0-9A-Fa-f]{4,4})
|
47
|
-
ESCAPE_CHAR8 = /\\U(?:[0-9A-Fa-f]{8,8})
|
48
|
-
ECHAR = /\\./
|
49
|
-
UCHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}
|
46
|
+
ESCAPE_CHAR4 = /\\u(?:[0-9A-Fa-f]{4,4})/u.freeze # \uXXXX
|
47
|
+
ESCAPE_CHAR8 = /\\U(?:[0-9A-Fa-f]{8,8})/u.freeze # \UXXXXXXXX
|
48
|
+
ECHAR = /\\./u.freeze # More liberal unescaping
|
49
|
+
UCHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}/n.freeze
|
50
50
|
|
51
51
|
##
|
52
52
|
# @return [Regexp] defines whitespace, including comments, otherwise whitespace must be explicit in terminals
|
@@ -59,7 +59,7 @@ module EBNF::LL1
|
|
59
59
|
#
|
60
60
|
# @param [String] string
|
61
61
|
# @return [String]
|
62
|
-
# @see
|
62
|
+
# @see https://www.w3.org/TR/rdf-sparql-query/#codepointEscape
|
63
63
|
def self.unescape_codepoints(string)
|
64
64
|
string = string.dup
|
65
65
|
string.force_encoding(Encoding::ASCII_8BIT) if string.respond_to?(:force_encoding)
|
@@ -81,7 +81,7 @@ module EBNF::LL1
|
|
81
81
|
#
|
82
82
|
# @param [String] input
|
83
83
|
# @return [String]
|
84
|
-
# @see
|
84
|
+
# @see https://www.w3.org/TR/rdf-sparql-query/#grammarEscapes
|
85
85
|
def self.unescape_string(input)
|
86
86
|
input.gsub(ECHAR) { |escaped| ESCAPE_CHARS[escaped] || escaped[1..-1]}
|
87
87
|
end
|
@@ -131,7 +131,6 @@ module EBNF::LL1
|
|
131
131
|
|
132
132
|
raise Error, "Terminal patterns not defined" unless @terminals && @terminals.length > 0
|
133
133
|
|
134
|
-
@lineno = 1
|
135
134
|
@scanner = Scanner.new(input, **options)
|
136
135
|
end
|
137
136
|
|
@@ -147,12 +146,6 @@ module EBNF::LL1
|
|
147
146
|
# @return [String]
|
148
147
|
attr_accessor :input
|
149
148
|
|
150
|
-
##
|
151
|
-
# The current line number (zero-based).
|
152
|
-
#
|
153
|
-
# @return [Integer]
|
154
|
-
attr_reader :lineno
|
155
|
-
|
156
149
|
##
|
157
150
|
# Returns `true` if the input string is lexically valid.
|
158
151
|
#
|
@@ -194,7 +187,7 @@ module EBNF::LL1
|
|
194
187
|
|
195
188
|
@first ||= begin
|
196
189
|
{} while !scanner.eos? && skip_whitespace
|
197
|
-
return
|
190
|
+
return nil if scanner.eos?
|
198
191
|
|
199
192
|
token = match_token(*types)
|
200
193
|
|
@@ -233,7 +226,7 @@ module EBNF::LL1
|
|
233
226
|
# @return [Token]
|
234
227
|
def recover(*types)
|
235
228
|
until scanner.eos? || tok = match_token(*types)
|
236
|
-
if scanner.skip_until(@whitespace || /\s
|
229
|
+
if scanner.skip_until(@whitespace || /\s+/m).nil? # Skip past current "token"
|
237
230
|
# No whitespace at the end, must be and end of string
|
238
231
|
scanner.terminate
|
239
232
|
else
|
@@ -243,6 +236,14 @@ module EBNF::LL1
|
|
243
236
|
scanner.unscan if tok
|
244
237
|
first
|
245
238
|
end
|
239
|
+
|
240
|
+
##
|
241
|
+
# The current line number (one-based).
|
242
|
+
#
|
243
|
+
# @return [Integer]
|
244
|
+
def lineno
|
245
|
+
scanner.lineno
|
246
|
+
end
|
246
247
|
protected
|
247
248
|
|
248
249
|
# @return [StringScanner]
|
@@ -253,9 +254,7 @@ module EBNF::LL1
|
|
253
254
|
def skip_whitespace
|
254
255
|
# skip all white space, but keep track of the current line number
|
255
256
|
while @whitespace && !scanner.eos?
|
256
|
-
|
257
|
-
@lineno += matched.count("\n")
|
258
|
-
else
|
257
|
+
unless scanner.scan(@whitespace)
|
259
258
|
return
|
260
259
|
end
|
261
260
|
end
|
@@ -281,7 +280,6 @@ module EBNF::LL1
|
|
281
280
|
if matched = scanner.scan(term.regexp)
|
282
281
|
#STDERR.puts " matched #{term.type.inspect}: #{matched.inspect}"
|
283
282
|
tok = token(term.type, term.canonicalize(matched))
|
284
|
-
@lineno += matched.count("\n")
|
285
283
|
return tok
|
286
284
|
end
|
287
285
|
end
|
@@ -372,7 +370,7 @@ module EBNF::LL1
|
|
372
370
|
# token.type #=> :LANGTAG
|
373
371
|
# token.value #=> "en"
|
374
372
|
#
|
375
|
-
# @see
|
373
|
+
# @see https://en.wikipedia.org/wiki/Lexical_analysis#Token
|
376
374
|
class Token
|
377
375
|
##
|
378
376
|
# The token's symbol type.
|
@@ -493,7 +491,7 @@ module EBNF::LL1
|
|
493
491
|
# "invalid token '%' on line 10",
|
494
492
|
# input: query, token: '%', lineno: 9)
|
495
493
|
#
|
496
|
-
# @see
|
494
|
+
# @see https://ruby-doc.org/core/classes/StandardError.html
|
497
495
|
class Error < StandardError
|
498
496
|
##
|
499
497
|
# The input string associated with the error.
|
data/lib/ebnf/ll1/parser.rb
CHANGED
@@ -3,12 +3,52 @@ require 'ebnf/ll1/lexer'
|
|
3
3
|
module EBNF::LL1
|
4
4
|
##
|
5
5
|
# A Generic LL1 parser using a lexer and branch tables defined using the SWAP tool chain (modified).
|
6
|
+
#
|
7
|
+
# # Creating terminal definitions and parser rules to parse generated grammars
|
8
|
+
#
|
9
|
+
# The parser is initialized to callbacks invoked on entry and exit
|
10
|
+
# to each `terminal` and `production`. A trivial parser loop can be described as follows:
|
11
|
+
#
|
12
|
+
# require 'ebnf/ll1/parser'
|
13
|
+
# require 'meta'
|
14
|
+
#
|
15
|
+
# class Parser
|
16
|
+
# include Meta
|
17
|
+
# include EBNF::LL1::Parser
|
18
|
+
#
|
19
|
+
# terminal(:SYMBOL, /([a-z]|[A-Z]|[0-9]|_)+/) do |prod, token, input|
|
20
|
+
# # Add data based on scanned token to input
|
21
|
+
# input[:symbol] = token.value
|
22
|
+
# end
|
23
|
+
#
|
24
|
+
# start_production(:rule) do |input, current, callback|
|
25
|
+
# # Process on start of production
|
26
|
+
# # Set state for entry into recursed rules through current
|
27
|
+
#
|
28
|
+
# # Callback to parser loop with callback
|
29
|
+
# end
|
30
|
+
#
|
31
|
+
# production(:rule) do |input, current, callback|
|
32
|
+
# # Process on end of production
|
33
|
+
# # return results in input, retrieve results from recursed rules in current
|
34
|
+
#
|
35
|
+
# # Callback to parser loop with callback
|
36
|
+
# end
|
37
|
+
#
|
38
|
+
# def initialize(input)
|
39
|
+
# parse(input, start_symbol,
|
40
|
+
# branch: BRANCH,
|
41
|
+
# first: FIRST,
|
42
|
+
# follow: FOLLOW,
|
43
|
+
# cleanup: CLEANUP
|
44
|
+
# ) do |context, *data|
|
45
|
+
# # Process calls from callback from productions
|
46
|
+
#
|
47
|
+
# rescue ArgumentError, RDF::LL1::Parser::Error => e
|
48
|
+
# progress("Parsing completed with errors:\n\t#{e.message}")
|
49
|
+
# raise RDF::ReaderError, e.message if validate?
|
50
|
+
# end
|
6
51
|
module Parser
|
7
|
-
##
|
8
|
-
# @private
|
9
|
-
# level above which debug messages are supressed
|
10
|
-
DEBUG_LEVEL = 10
|
11
|
-
|
12
52
|
##
|
13
53
|
# @return [Integer] line number of current token
|
14
54
|
attr_reader :lineno
|
@@ -186,7 +226,7 @@ module EBNF::LL1
|
|
186
226
|
# def each_statement(&block)
|
187
227
|
# @callback = block
|
188
228
|
#
|
189
|
-
# parse(START.to_sym) do |context, *data|
|
229
|
+
# parse(input, START.to_sym) do |context, *data|
|
190
230
|
# case context
|
191
231
|
# when :statement
|
192
232
|
# yield *data
|
@@ -205,16 +245,13 @@ module EBNF::LL1
|
|
205
245
|
# Lists valid terminals that can precede each production (for error recovery).
|
206
246
|
# @option options [Hash{Symbol,String => Array<Symbol,String>}] :follow ({})
|
207
247
|
# Lists valid terminals that can follow each production (for error recovery).
|
208
|
-
# @option options [Boolean] :validate (false)
|
209
|
-
# whether to validate the parsed statements and values. If not validating, the parser will attempt to recover from errors.
|
210
|
-
# @option options [Boolean] :progress
|
211
|
-
# Show progress of parser productions
|
212
|
-
# @option options [Boolean] :debug
|
213
|
-
# Detailed debug output
|
214
|
-
# @option options [Boolean] :reset_on_start
|
215
|
-
# Reset the parser state if the start token set with `prod` is found in a production. This reduces the production stack depth growth, which is appropriate for some grammars.
|
216
248
|
# @option options[Integer] :high_water passed to lexer
|
249
|
+
# @option options [Logger] :logger for errors/progress/debug.
|
217
250
|
# @option options[Integer] :low_water passed to lexer
|
251
|
+
# @option options [Boolean] :reset_on_start
|
252
|
+
# Reset the parser state if the start token set with `prod` is found in a production. This reduces the production stack depth growth, which is appropriate for some grammars.
|
253
|
+
# @option options [Boolean] :validate (false)
|
254
|
+
# whether to validate the parsed statements and values. If not validating, the parser will attempt to recover from errors.
|
218
255
|
# @yield [context, *data]
|
219
256
|
# Yields for to return data to parser
|
220
257
|
# @yieldparam [:statement, :trace] context
|
@@ -225,13 +262,9 @@ module EBNF::LL1
|
|
225
262
|
# @raise [Exception] Raises exceptions for parsing errors
|
226
263
|
# or errors raised during processing callbacks. Internal
|
227
264
|
# errors are raised using {Error}.
|
228
|
-
# @see
|
265
|
+
# @see https://cs.adelaide.edu.au/~charles/lt/Lectures/07-ErrorRecovery.pdf
|
229
266
|
def parse(input = nil, start = nil, **options, &block)
|
230
267
|
@options = options.dup
|
231
|
-
@options[:debug] ||= case
|
232
|
-
when @options[:progress] then 2
|
233
|
-
when @options[:validate] then 1
|
234
|
-
end
|
235
268
|
@branch = options[:branch]
|
236
269
|
@first = options[:first] ||= {}
|
237
270
|
@follow = options[:follow] ||= {}
|
@@ -356,9 +389,9 @@ module EBNF::LL1
|
|
356
389
|
end
|
357
390
|
|
358
391
|
# Get the list of follows for this sequence, this production and the stacked productions.
|
359
|
-
debug("recovery", "stack follows:"
|
392
|
+
debug("recovery", "stack follows:")
|
360
393
|
todo_stack.reverse.each do |todo|
|
361
|
-
debug("recovery"
|
394
|
+
debug("recovery") {" #{todo[:prod]}: #{@follow[todo[:prod]].inspect}"}
|
362
395
|
end
|
363
396
|
|
364
397
|
# Find all follows to the top of the stack
|
@@ -466,14 +499,15 @@ module EBNF::LL1
|
|
466
499
|
protected
|
467
500
|
|
468
501
|
##
|
469
|
-
# Error information, used as level `
|
502
|
+
# Error information, used as level `3` logger messages.
|
503
|
+
# Messages may be logged and are saved for reporting at end of parsing.
|
470
504
|
#
|
471
505
|
# @param [String] node Relevant location associated with message
|
472
506
|
# @param [String] message Error string
|
473
|
-
# @param [Hash] options
|
507
|
+
# @param [Hash{Symbol => Object}] options
|
474
508
|
# @option options [URI, #to_s] :production
|
475
509
|
# @option options [Token] :token
|
476
|
-
# @see
|
510
|
+
# @see #debug
|
477
511
|
def error(node, message, **options)
|
478
512
|
lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
|
479
513
|
m = "ERROR "
|
@@ -483,83 +517,82 @@ module EBNF::LL1
|
|
483
517
|
m += ", production = #{options[:production].inspect}" if options[:production]
|
484
518
|
@error_log << m unless @recovering
|
485
519
|
@recovering = true
|
486
|
-
debug(node, m, level:
|
520
|
+
debug(node, m, level: options.fetch(:level, 3), **options)
|
487
521
|
if options[:raise] || @options[:validate]
|
488
522
|
raise Error.new(m, lineno: lineno, token: options[:token], production: options[:production])
|
489
523
|
end
|
490
524
|
end
|
491
525
|
|
492
526
|
##
|
493
|
-
# Warning information, used as level `
|
527
|
+
# Warning information, used as level `2` logger messages.
|
528
|
+
# Messages may be logged and are saved for reporting at end of parsing.
|
494
529
|
#
|
495
530
|
# @param [String] node Relevant location associated with message
|
496
531
|
# @param [String] message Error string
|
497
532
|
# @param [Hash] options
|
498
533
|
# @option options [URI, #to_s] :production
|
499
534
|
# @option options [Token] :token
|
500
|
-
# @see
|
535
|
+
# @see #debug
|
501
536
|
def warn(node, message, **options)
|
537
|
+
lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
|
502
538
|
m = "WARNING "
|
503
|
-
m += "[line: #{
|
539
|
+
m += "[line: #{lineno}] " if lineno
|
504
540
|
m += message
|
505
541
|
m += " (found #{options[:token].inspect})" if options[:token]
|
506
542
|
m += ", production = #{options[:production].inspect}" if options[:production]
|
507
543
|
@error_log << m unless @recovering
|
508
|
-
debug(node, m, level:
|
544
|
+
debug(node, m, level: 2, lineno: lineno, **options)
|
509
545
|
end
|
510
546
|
|
511
547
|
##
|
512
|
-
# Progress
|
548
|
+
# Progress logged when parsing. Passed as level `1` logger messages.
|
513
549
|
#
|
514
|
-
#
|
550
|
+
# The call is ignored, unless `@options[:logger]` is set.
|
551
|
+
#
|
552
|
+
# @overload progress(node, message, **options, &block)
|
515
553
|
# @param [String] node Relevant location associated with message
|
516
554
|
# @param [String] message ("")
|
517
555
|
# @param [Hash] options
|
518
556
|
# @option options [Integer] :depth
|
519
557
|
# Recursion depth for indenting output
|
520
|
-
# @see
|
558
|
+
# @see #debug
|
521
559
|
def progress(node, *args, &block)
|
522
|
-
return unless @options[:
|
560
|
+
return unless @options[:logger]
|
561
|
+
lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
|
523
562
|
args << {} unless args.last.is_a?(Hash)
|
524
|
-
args.last[:level] ||=
|
563
|
+
args.last[:level] ||= 1
|
564
|
+
args.last[:lineno] ||= lineno
|
525
565
|
debug(node, *args, &block)
|
526
566
|
end
|
527
567
|
|
528
568
|
##
|
529
|
-
#
|
569
|
+
# Debug logging.
|
530
570
|
#
|
531
|
-
# The call is ignored, unless `@options[:
|
532
|
-
# case it yields tracing information as indicated. Additionally,
|
533
|
-
# if `@options[:debug]` is an Integer, the call is aborted if the
|
534
|
-
# `:level` option is less than than `:level`.
|
571
|
+
# The call is ignored, unless `@options[:logger]` is set.
|
535
572
|
#
|
536
573
|
# @overload debug(node, message, **options)
|
537
574
|
# @param [Array<String>] args Relevant location associated with message
|
538
575
|
# @param [Hash] options
|
539
576
|
# @option options [Integer] :depth
|
540
577
|
# Recursion depth for indenting output
|
541
|
-
# @
|
542
|
-
|
543
|
-
|
544
|
-
# progress information, and anything higher is for various levels
|
545
|
-
# of debug information.
|
546
|
-
#
|
547
|
-
# @yield trace, level, lineno, depth, args
|
548
|
-
# @yieldparam [:trace] trace
|
549
|
-
# @yieldparam [Integer] level
|
550
|
-
# @yieldparam [Integer] lineno
|
551
|
-
# @yieldparam [Integer] depth Recursive depth of productions
|
552
|
-
# @yieldparam [Array<String>] args
|
553
|
-
# @yieldreturn [String] added to message
|
554
|
-
def debug(*args)
|
555
|
-
return unless @options[:debug] && @parse_callback
|
578
|
+
# @yieldreturn [String] additional string appended to `message`.
|
579
|
+
def debug(*args, &block)
|
580
|
+
return unless @options[:logger]
|
556
581
|
options = args.last.is_a?(Hash) ? args.pop : {}
|
557
|
-
|
558
|
-
|
559
|
-
|
582
|
+
lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
|
583
|
+
level = options.fetch(:level, 0)
|
560
584
|
depth = options[:depth] || self.depth
|
561
|
-
|
562
|
-
|
585
|
+
|
586
|
+
if self.respond_to?(:log_debug)
|
587
|
+
level = [:debug, :info, :warn, :error, :fatal][level]
|
588
|
+
log_debug(*args, **options.merge(level: level, lineno: lineno, depth: depth), &block)
|
589
|
+
elsif @options[:logger].respond_to?(:add)
|
590
|
+
args << yield if block_given?
|
591
|
+
@options[:logger].add(level, "[#{lineno}]" + (" " * depth) + args.join(" "))
|
592
|
+
elsif @options[:logger].respond_to?(:<<)
|
593
|
+
args << yield if block_given?
|
594
|
+
@options[:logger] << "[#{lineno}]" + (" " * depth) + args.join(" ")
|
595
|
+
end
|
563
596
|
end
|
564
597
|
|
565
598
|
private
|
@@ -570,7 +603,7 @@ module EBNF::LL1
|
|
570
603
|
if handler
|
571
604
|
# Create a new production data element, potentially allowing handler
|
572
605
|
# to customize before pushing on the @prod_data stack
|
573
|
-
|
606
|
+
debug("#{prod}(:start):#{@prod_data.length}") {@prod_data.last}
|
574
607
|
data = {}
|
575
608
|
begin
|
576
609
|
self.class.eval_with_binding(self) {
|
@@ -584,12 +617,12 @@ module EBNF::LL1
|
|
584
617
|
elsif [:merge, :star].include?(@cleanup[prod])
|
585
618
|
# Save current data to merge later
|
586
619
|
@prod_data << {}
|
587
|
-
|
620
|
+
debug("#{prod}(:start}:#{@prod_data.length}:cleanup:#{@cleanup[prod]}") { get_token.inspect + (@recovering ? ' recovering' : '')}
|
588
621
|
else
|
589
622
|
# Make sure we push as many was we pop, even if there is no
|
590
623
|
# explicit start handler
|
591
624
|
@prod_data << {} if self.class.production_handlers[prod]
|
592
|
-
|
625
|
+
debug("#{prod}(:start:#{@prod_data.length})") { get_token.inspect + (@recovering ? ' recovering' : '')}
|
593
626
|
end
|
594
627
|
#puts "prod_data(s): " + @prod_data.inspect
|
595
628
|
end
|
@@ -623,7 +656,7 @@ module EBNF::LL1
|
|
623
656
|
else Array(input[k]) + Array(v)
|
624
657
|
end
|
625
658
|
end
|
626
|
-
|
659
|
+
debug("#{prod}(:finish):#{@prod_data.length} cleanup:#{@cleanup[prod]}") {@prod_data.last}
|
627
660
|
else
|
628
661
|
progress("#{prod}(:finish):#{@prod_data.length}") { "recovering" if @recovering }
|
629
662
|
end
|
@@ -730,7 +763,7 @@ module EBNF::LL1
|
|
730
763
|
# "invalid token '%' on line 10",
|
731
764
|
# token: '%', lineno: 9, production: :turtleDoc)
|
732
765
|
#
|
733
|
-
# @see
|
766
|
+
# @see https://ruby-doc.org/core/classes/StandardError.html
|
734
767
|
class Error < StandardError
|
735
768
|
##
|
736
769
|
# The current production.
|