ebnf 1.1.2 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +218 -196
  3. data/UNLICENSE +1 -1
  4. data/VERSION +1 -1
  5. data/bin/ebnf +40 -21
  6. data/etc/abnf-core.ebnf +52 -0
  7. data/etc/abnf.abnf +121 -0
  8. data/etc/abnf.ebnf +124 -0
  9. data/etc/abnf.sxp +45 -0
  10. data/etc/doap.ttl +23 -15
  11. data/etc/ebnf.ebnf +21 -33
  12. data/etc/ebnf.html +171 -160
  13. data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
  14. data/etc/ebnf.ll1.sxp +182 -183
  15. data/etc/ebnf.peg.rb +90 -0
  16. data/etc/ebnf.peg.sxp +84 -0
  17. data/etc/ebnf.sxp +40 -41
  18. data/etc/iso-ebnf.ebnf +140 -0
  19. data/etc/iso-ebnf.isoebnf +138 -0
  20. data/etc/iso-ebnf.sxp +65 -0
  21. data/etc/sparql.ebnf +4 -4
  22. data/etc/sparql.html +1603 -1751
  23. data/etc/sparql.ll1.sxp +7372 -7372
  24. data/etc/sparql.peg.rb +532 -0
  25. data/etc/sparql.peg.sxp +597 -0
  26. data/etc/sparql.sxp +363 -362
  27. data/etc/turtle.ebnf +3 -3
  28. data/etc/turtle.html +465 -517
  29. data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
  30. data/etc/turtle.ll1.sxp +425 -425
  31. data/etc/turtle.peg.rb +182 -0
  32. data/etc/turtle.peg.sxp +199 -0
  33. data/etc/turtle.sxp +103 -101
  34. data/lib/ebnf.rb +7 -2
  35. data/lib/ebnf/abnf.rb +301 -0
  36. data/lib/ebnf/abnf/core.rb +23 -0
  37. data/lib/ebnf/abnf/meta.rb +111 -0
  38. data/lib/ebnf/base.rb +128 -87
  39. data/lib/ebnf/bnf.rb +1 -26
  40. data/lib/ebnf/ebnf/meta.rb +90 -0
  41. data/lib/ebnf/isoebnf.rb +229 -0
  42. data/lib/ebnf/isoebnf/meta.rb +75 -0
  43. data/lib/ebnf/ll1.rb +140 -8
  44. data/lib/ebnf/ll1/lexer.rb +37 -32
  45. data/lib/ebnf/ll1/parser.rb +113 -73
  46. data/lib/ebnf/ll1/scanner.rb +83 -51
  47. data/lib/ebnf/native.rb +320 -0
  48. data/lib/ebnf/parser.rb +285 -302
  49. data/lib/ebnf/peg.rb +39 -0
  50. data/lib/ebnf/peg/parser.rb +561 -0
  51. data/lib/ebnf/peg/rule.rb +241 -0
  52. data/lib/ebnf/rule.rb +453 -163
  53. data/lib/ebnf/terminals.rb +21 -0
  54. data/lib/ebnf/writer.rb +561 -88
  55. metadata +114 -28
  56. data/etc/sparql.rb +0 -45773
@@ -0,0 +1,229 @@
1
+ require_relative 'isoebnf/meta'
2
+ require 'logger'
3
+
4
+ # ISO EBNF parser
5
+ # Parses ISO EBNF into an array of {EBNF::Rule}.
6
+ module EBNF
7
+ class ISOEBNF
8
+ include EBNF::PEG::Parser
9
+
10
+ # The base for terminal-character, which omits "'", '"', and '?'.
11
+ # Could be more optimized, and one might quible
12
+ # with the overly-strictly defined character set,
13
+ # but it is correct.
14
+ TERMINAL_CHARACTER_BASE = %r{
15
+ [a-zA-Z0-9] | # letter | decimal digit
16
+ , | # concatenate symbol
17
+ = | # defining symbol
18
+ [\|\/!] | # definition separator symbol
19
+ \*\) | # end comment symbol
20
+ \) | # end group symbol
21
+ \] | # end option symbol
22
+ \} | # end repeat symbol
23
+ \- | # except symbol
24
+ #\' | # first quote symbol
25
+ \* | # repetition symbol
26
+ #\" | # second quote symbol
27
+ #\? | # special sequence symbol
28
+ \(\* | # start comment symbol
29
+ \( | # start group symbol
30
+ \[ | # start option symbol
31
+ \{ | # start repeat symbol
32
+ [;\.] | # terminator symbol
33
+ [:+_%@&$<>^\x20\x23\\`~] # other character
34
+ }x
35
+
36
+ TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"\?]}
37
+ FIRST_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|["\?]}
38
+ SECOND_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['\?]}
39
+ SPECIAL_SEQUENCE_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"]}
40
+
41
+ # Abstract syntax tree from parse
42
+ #
43
+ # @return [Array<EBNF::Rule>]
44
+ attr_reader :ast
45
+
46
+ # `[14] integer ::= decimal_digit+`
47
+ terminal(:integer, /\d+/) do |value, prod|
48
+ value.to_i
49
+ end
50
+
51
+ # `[15] meta_identifier ::= letter meta_identifier_character*`
52
+ terminal(:meta_identifier, /[a-zA-Z][a-zA-Z0-9_]*/) do |value|
53
+ value.to_sym
54
+ end
55
+
56
+ # `[17] terminal_string ::= ("'" first_terminal_character+ "'")`
57
+ # ` | ('"' second_terminal_character+ '"')`
58
+ terminal(:terminal_string, /(?:'#{FIRST_TERMINAL_CHARACTER}+')|(?:"#{SECOND_TERMINAL_CHARACTER}+")/x) do |value|
59
+ value[1..-2]
60
+ end
61
+
62
+ # `[20] special_sequence ::= '?' special_sequence_character* '?'`
63
+ terminal(:special_sequence, /\?#{SPECIAL_SEQUENCE_CHARACTER}+\?/)
64
+
65
+ # `[22] terminal_character ::= [a-zA-Z0-9]`
66
+ # ` | [,=;*}#x2d?([{;]`
67
+ # ` | '*)'`
68
+ # ` | '(*'`
69
+ # ` | ']'`
70
+ # ` | other_character`
71
+ terminal(:terminal_character, TERMINAL_CHARACTER)
72
+
73
+ # `[25] empty ::= ''`
74
+ terminal(:empty, //)
75
+
76
+ # `[26] definition_separator_symbol ::= '|' | '/' | '!'`
77
+ terminal(:definition_separator_symbol, /[\|\/!]/)
78
+
79
+ # `[27] terminator_symbol ::= ';' | '.'`
80
+ terminal(:terminator_symbol, /[;\.]/)
81
+
82
+ # `[28] start_option_symbol ::= '['
83
+ terminal(:start_option_symbol, /\[|(?:\(\/)/)
84
+
85
+ # `[29] end_option_symbol ::= ']'`
86
+ terminal(:end_option_symbol, /\]/)
87
+
88
+ # `[30] start_repeat_symbol ::= '{' | '(:'`
89
+ terminal(:start_repeat_symbol, /{|\(:/)
90
+
91
+ # `[31] end_repeat_symbol ::= '}' | ':)'`
92
+ terminal(:end_repeat_symbol, /}|:\)/)
93
+
94
+ # ## Non-terminal productions
95
+
96
+ # `[2] syntax_rule ::= meta_identifier '=' definitions_list terminator_symbol`
97
+ production(:syntax_rule, clear_packrat: true) do |value, data, callback|
98
+ # value contains an expression.
99
+ # Invoke callback
100
+ sym = value[0][:meta_identifier]
101
+ definitions_list = value[2][:definitions_list]
102
+ callback.call(:rule, EBNF::Rule.new(sym.to_sym, nil, definitions_list))
103
+ nil
104
+ end
105
+
106
+ # Setting `as_hash: true` in the start production makes the value of the form of a hash, rather than an array of hashes.
107
+ #
108
+ # `[3] definitions_list ::= single_definition (definition_separator_symbol definitions_list)*`
109
+ start_production(:definitions_list, as_hash: true)
110
+ production(:definitions_list) do |value|
111
+ if value[:_definitions_list_1].length > 0
112
+ [:alt, value[:single_definition]] + value[:_definitions_list_1]
113
+ else
114
+ value[:single_definition]
115
+ end
116
+ end
117
+ production(:_definitions_list_1) do |value|
118
+ Array(value.first)
119
+ end
120
+ start_production(:_definitions_list_2, as_hash: true)
121
+ production(:_definitions_list_2) do |value|
122
+ if Array(value[:definitions_list]).first == :alt
123
+ value[:definitions_list][1..-1]
124
+ else
125
+ [value[:definitions_list]]
126
+ end
127
+ end
128
+
129
+ # `[4] single_definition ::= term (',' term)*`
130
+ start_production(:single_definition, as_hash: true)
131
+ production(:single_definition) do |value|
132
+ if value[:_single_definition_1].length > 0
133
+ [:seq, value[:term]] + value[:_single_definition_1]
134
+ else
135
+ value[:term]
136
+ end
137
+ end
138
+ production(:_single_definition_1) do |value|
139
+ value.map {|a1| a1.last[:term]}.compact # Get rid of '|'
140
+ end
141
+
142
+ # `[5] term ::= factor ('-' exception)?`
143
+ start_production(:term, as_hash: true)
144
+ production(:term) do |value|
145
+ if value[:_term_1]
146
+ [:diff, value[:factor], value[:_term_1]]
147
+ else
148
+ value[:factor]
149
+ end
150
+ end
151
+ production(:_term_1) do |value|
152
+ value.last[:exception] if value
153
+ end
154
+
155
+ # `[6] exception ::= factor`
156
+ start_production(:exception, as_hash: true)
157
+ production(:exception) do |value|
158
+ value[:factor]
159
+ end
160
+
161
+ # `[7] factor ::= (integer '*')? primary`
162
+ start_production(:factor, as_hash: true)
163
+ production(:factor) do |value|
164
+ if value[:_factor_1]
165
+ [:rept, value[:_factor_1], value[:_factor_1], value[:primary]]
166
+ else
167
+ value[:primary]
168
+ end
169
+ end
170
+ production(:_factor_2) do |value|
171
+ value.first[:integer]
172
+ end
173
+
174
+ # `[9] optional_sequence ::= start_option_symbol definitions_list end_option_symbol`
175
+ production(:optional_sequence) do |value|
176
+ [:opt, value[1][:definitions_list]]
177
+ end
178
+
179
+ # `[10] repeated_sequence ::= start_repeat_symbol definitions_list end_repeat_symbol`
180
+ production(:repeated_sequence) do |value|
181
+ [:star, value[1][:definitions_list]]
182
+ end
183
+
184
+ # `[11] grouped_sequence ::= '(' definitions_list ')'`
185
+ production(:grouped_sequence) do |value|
186
+ [:seq, value[1][:definitions_list]]
187
+ end
188
+
189
+ # ## Parser invocation.
190
+ # On start, yield ourselves if a block is given, otherwise, return this parser instance
191
+ #
192
+ # @param [#read, #to_s] input
193
+ # @param [Hash{Symbol => Object}] options
194
+ # @option options [Boolean] :level
195
+ # Trace level. 0(debug), 1(info), 2(warn), 3(error).
196
+ # @return [EBNFParser]
197
+ def initialize(input, **options, &block)
198
+ # If the `level` option is set, instantiate a logger for collecting trace information.
199
+ if options.has_key?(:level)
200
+ options[:logger] = Logger.new(STDERR)
201
+ options[:logger].level = options[:level]
202
+ options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
203
+ end
204
+
205
+ # Read input, if necessary, which will be used in a Scanner.
206
+ @input = input.respond_to?(:read) ? input.read : input.to_s
207
+
208
+ parsing_terminals = false
209
+ @ast = []
210
+ parse(@input,
211
+ :syntax,
212
+ ISOEBNFMeta::RULES,
213
+ whitespace: %r{([\x09-\x0d\x20]|(?:\(\*(?:(?:\*[^\)])|[^*])*\*\)))+},
214
+ **options
215
+ ) do |context, *data|
216
+ rule = case context
217
+ when :rule
218
+ # A rule which has already been turned into a `Rule` object.
219
+ rule = data.first
220
+ rule.kind = :terminal if parsing_terminals
221
+ rule
222
+ end
223
+ @ast << rule if rule
224
+ end
225
+ rescue EBNF::PEG::Parser::Error => e
226
+ raise SyntaxError, e.message
227
+ end
228
+ end
229
+ end
@@ -0,0 +1,75 @@
1
+ # This file is automatically generated by ebnf version 2.0.0
2
+ # Derived from etc/iso-ebnf.ebnf
3
+ module ISOEBNFMeta
4
+ RULES = [
5
+ EBNF::Rule.new(:syntax, nil, [:star, :syntax_rule]).extend(EBNF::PEG::Rule),
6
+ EBNF::Rule.new(:syntax_rule, nil, [:seq, :meta_identifier, :defining_symbol, :definitions_list, :terminator_symbol]).extend(EBNF::PEG::Rule),
7
+ EBNF::Rule.new(:definitions_list, nil, [:seq, :single_definition, :_definitions_list_1]).extend(EBNF::PEG::Rule),
8
+ EBNF::Rule.new(:_definitions_list_1, nil, [:star, :_definitions_list_2]).extend(EBNF::PEG::Rule),
9
+ EBNF::Rule.new(:_definitions_list_2, nil, [:seq, :definition_separator_symbol, :definitions_list]).extend(EBNF::PEG::Rule),
10
+ EBNF::Rule.new(:single_definition, nil, [:seq, :term, :_single_definition_1]).extend(EBNF::PEG::Rule),
11
+ EBNF::Rule.new(:_single_definition_1, nil, [:star, :_single_definition_2]).extend(EBNF::PEG::Rule),
12
+ EBNF::Rule.new(:_single_definition_2, nil, [:seq, ",", :term]).extend(EBNF::PEG::Rule),
13
+ EBNF::Rule.new(:term, nil, [:seq, :factor, :_term_1]).extend(EBNF::PEG::Rule),
14
+ EBNF::Rule.new(:_term_1, nil, [:opt, :_term_2]).extend(EBNF::PEG::Rule),
15
+ EBNF::Rule.new(:_term_2, nil, [:seq, "-", :exception]).extend(EBNF::PEG::Rule),
16
+ EBNF::Rule.new(:exception, nil, [:seq, :factor]).extend(EBNF::PEG::Rule),
17
+ EBNF::Rule.new(:factor, nil, [:seq, :_factor_1, :primary]).extend(EBNF::PEG::Rule),
18
+ EBNF::Rule.new(:_factor_1, nil, [:opt, :_factor_2]).extend(EBNF::PEG::Rule),
19
+ EBNF::Rule.new(:_factor_2, nil, [:seq, :integer, "*"]).extend(EBNF::PEG::Rule),
20
+ EBNF::Rule.new(:primary, nil, [:alt, :optional_sequence, :repeated_sequence, :special_sequence, :grouped_sequence, :meta_identifier, :terminal_string, :empty]).extend(EBNF::PEG::Rule),
21
+ EBNF::Rule.new(:optional_sequence, nil, [:seq, :start_option_symbol, :definitions_list, :end_option_symbol]).extend(EBNF::PEG::Rule),
22
+ EBNF::Rule.new(:repeated_sequence, nil, [:seq, :start_repeat_symbol, :definitions_list, :end_repeat_symbol]).extend(EBNF::PEG::Rule),
23
+ EBNF::Rule.new(:grouped_sequence, nil, [:seq, "(", :definitions_list, ")"]).extend(EBNF::PEG::Rule),
24
+ EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule),
25
+ EBNF::Rule.new(:terminal_string, nil, [:alt, :_terminal_string_1, :_terminal_string_2], kind: :terminal).extend(EBNF::PEG::Rule),
26
+ EBNF::Rule.new(:_terminal_string_1, nil, [:seq, "'", :_terminal_string_3, "'"]).extend(EBNF::PEG::Rule),
27
+ EBNF::Rule.new(:_terminal_string_3, nil, [:plus, :first_terminal_character]).extend(EBNF::PEG::Rule),
28
+ EBNF::Rule.new(:_terminal_string_2, nil, [:seq, "\"", :_terminal_string_4, "\""]).extend(EBNF::PEG::Rule),
29
+ EBNF::Rule.new(:_terminal_string_4, nil, [:plus, :second_terminal_character]).extend(EBNF::PEG::Rule),
30
+ EBNF::Rule.new(:meta_identifier, nil, [:seq, :letter, :_meta_identifier_1], kind: :terminal).extend(EBNF::PEG::Rule),
31
+ EBNF::Rule.new(:_meta_identifier_1, nil, [:star, :meta_identifier_character]).extend(EBNF::PEG::Rule),
32
+ EBNF::Rule.new(:integer, nil, [:plus, :decimal_digit], kind: :terminal).extend(EBNF::PEG::Rule),
33
+ EBNF::Rule.new(:special_sequence, nil, [:seq, "?", :_special_sequence_1, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
34
+ EBNF::Rule.new(:_special_sequence_1, nil, [:star, :special_sequence_character]).extend(EBNF::PEG::Rule),
35
+ EBNF::Rule.new(:comment, nil, [:seq, :start_comment_symbol, :_comment_1, :end_comment_symbol], kind: :terminal).extend(EBNF::PEG::Rule),
36
+ EBNF::Rule.new(:_comment_1, nil, [:star, :comment_symbol]).extend(EBNF::PEG::Rule),
37
+ EBNF::Rule.new(:comment_symbol, nil, [:alt, :comment, :commentless_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
38
+ EBNF::Rule.new(:commentless_symbol, nil, [:alt, :terminal_character, :meta_identifier, :integer, :terminal_string, :special_sequence], kind: :terminal).extend(EBNF::PEG::Rule),
39
+ EBNF::Rule.new(:letter, nil, [:range, "a-zA-Z"], kind: :terminal).extend(EBNF::PEG::Rule),
40
+ EBNF::Rule.new(:decimal_digit, nil, [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule),
41
+ EBNF::Rule.new(:meta_identifier_character, nil, [:alt, :letter, :decimal_digit, "_"], kind: :terminal).extend(EBNF::PEG::Rule),
42
+ EBNF::Rule.new(:first_terminal_character, nil, [:diff, :terminal_character, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
43
+ EBNF::Rule.new(:second_terminal_character, nil, [:diff, :terminal_character, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
44
+ EBNF::Rule.new(:special_sequence_character, nil, [:diff, :terminal_character, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
45
+ EBNF::Rule.new(:terminal_character, nil, [:alt, :letter, :decimal_digit, :concatenate_symbol, :defining_symbol, :definition_separator_symbol, :end_comment_symbol, :end_group_symbol, :end_option_symbol, :end_repeat_symbol, :except_symbol, :first_quote_symbol, :repetition_symbol, :second_quote_symbol, :special_sequence_symbol, :start_comment_symbol, :start_group_symbol, :start_option_symbol, :start_repeat_symbol, :terminator_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
46
+ EBNF::Rule.new(:other_character, nil, [:alt, :_other_character_1, "\\"], kind: :terminal).extend(EBNF::PEG::Rule),
47
+ EBNF::Rule.new(:_other_character_1, nil, [:range, ":+_%@&$<>^` ̃#x20#x23"], kind: :terminal).extend(EBNF::PEG::Rule),
48
+ EBNF::Rule.new(:gap_separator, nil, [:range, "#x9#xa#xb#xc#xd#x20"], kind: :terminal).extend(EBNF::PEG::Rule),
49
+ EBNF::Rule.new(:_pass, nil, [:alt, :__pass_1, :comment], kind: :pass).extend(EBNF::PEG::Rule),
50
+ EBNF::Rule.new(:__pass_1, nil, [:plus, :gap_separator]).extend(EBNF::PEG::Rule),
51
+ EBNF::Rule.new(:empty, nil, [:seq, ""], kind: :terminal).extend(EBNF::PEG::Rule),
52
+ EBNF::Rule.new(:concatenate_symbol, nil, [:seq, ","], kind: :terminal).extend(EBNF::PEG::Rule),
53
+ EBNF::Rule.new(:repetition_symbol, nil, [:seq, "*"], kind: :terminal).extend(EBNF::PEG::Rule),
54
+ EBNF::Rule.new(:except_symbol, nil, [:seq, "-"], kind: :terminal).extend(EBNF::PEG::Rule),
55
+ EBNF::Rule.new(:first_quote_symbol, nil, [:seq, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
56
+ EBNF::Rule.new(:second_quote_symbol, nil, [:seq, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
57
+ EBNF::Rule.new(:start_comment_symbol, nil, [:seq, "(*"], kind: :terminal).extend(EBNF::PEG::Rule),
58
+ EBNF::Rule.new(:end_comment_symbol, nil, [:seq, "*)"], kind: :terminal).extend(EBNF::PEG::Rule),
59
+ EBNF::Rule.new(:start_group_symbol, nil, [:seq, "("], kind: :terminal).extend(EBNF::PEG::Rule),
60
+ EBNF::Rule.new(:end_group_symbol, nil, [:seq, ")"], kind: :terminal).extend(EBNF::PEG::Rule),
61
+ EBNF::Rule.new(:special_sequence_symbol, nil, [:seq, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
62
+ EBNF::Rule.new(:defining_symbol, nil, [:alt, "=", ":"], kind: :terminal).extend(EBNF::PEG::Rule),
63
+ EBNF::Rule.new(:definition_separator_symbol, nil, [:alt, "|", "/", "!"], kind: :terminal).extend(EBNF::PEG::Rule),
64
+ EBNF::Rule.new(:terminator_symbol, nil, [:alt, ";", "."], kind: :terminal).extend(EBNF::PEG::Rule),
65
+ EBNF::Rule.new(:start_option_symbol, nil, [:seq, "["], kind: :terminal).extend(EBNF::PEG::Rule),
66
+ EBNF::Rule.new(:end_option_symbol, nil, [:seq, "]"], kind: :terminal).extend(EBNF::PEG::Rule),
67
+ EBNF::Rule.new(:start_repeat_symbol, nil, [:alt, "{", "(:"], kind: :terminal).extend(EBNF::PEG::Rule),
68
+ EBNF::Rule.new(:end_repeat_symbol, nil, [:alt, "}", ":)"], kind: :terminal).extend(EBNF::PEG::Rule),
69
+ EBNF::Rule.new(:gap_free_symbol, nil, [:alt, :_gap_free_symbol_1, :terminal_string], kind: :terminal).extend(EBNF::PEG::Rule),
70
+ EBNF::Rule.new(:_gap_free_symbol_1, nil, [:seq, :_gap_free_symbol_3, :terminal_character]).extend(EBNF::PEG::Rule),
71
+ EBNF::Rule.new(:_gap_free_symbol_3, nil, [:not, :_gap_free_symbol_2]).extend(EBNF::PEG::Rule),
72
+ EBNF::Rule.new(:_gap_free_symbol_2, nil, [:range, "'\""], kind: :terminal).extend(EBNF::PEG::Rule),
73
+ ]
74
+ end
75
+
@@ -1,4 +1,86 @@
1
1
  module EBNF
2
+ ##
3
+ # This module extends {EBNF::Base} to create metadata including _branch_, [First/Follow][], and other tables which is used by {EBNF::LL1::Parser} to recognize examples of the associated grammar.
4
+ #
5
+ ### Branch Table
6
+ #
7
+ # The Branch table is a hash mapping production rules to a hash relating terminals appearing in input to sequence of productions to follow when the corresponding input terminal is found. This allows either the `seq` primitive, where all terminals map to the same sequence of productions, or the `alt` primitive, where each terminal may map to a different production.
8
+ #
9
+ # BRANCH = {
10
+ # :alt => {
11
+ # "(" => [:seq, :_alt_1],
12
+ # :HEX => [:seq, :_alt_1],
13
+ # :O_RANGE => [:seq, :_alt_1],
14
+ # :RANGE => [:seq, :_alt_1],
15
+ # :STRING1 => [:seq, :_alt_1],
16
+ # :STRING2 => [:seq, :_alt_1],
17
+ # :SYMBOL => [:seq, :_alt_1],
18
+ # },
19
+ # ...
20
+ # :declaration => {
21
+ # "@pass" => [:pass],
22
+ # "@terminals" => ["@terminals"],
23
+ # },
24
+ # ...
25
+ # }
26
+ #
27
+ # In this case the `alt` rule is `seq ('|' seq)*` can happen when any of the specified tokens appears on the input stream. The all cause the same token to be passed to the `seq` rule and follow with `_alt_1`, which handles the `('|' seq)*` portion of the rule, after the first sequence is matched.
28
+ #
29
+ # The `declaration` rule is `@terminals' | pass` using the `alt` primitive determining the production to run based on the terminal appearing on the input stream. Eventually, a terminal production is found and the token is consumed.
30
+ #
31
+ ### First/Follow Table
32
+ #
33
+ # The [First/Follow][] table is a hash mapping production rules to the terminals that may proceed or follow the rule. For example:
34
+ #
35
+ # FIRST = {
36
+ # :alt => [
37
+ # :HEX,
38
+ # :SYMBOL,
39
+ # :RANGE,
40
+ # :O_RANGE,
41
+ # :STRING1,
42
+ # :STRING2,
43
+ # "("],
44
+ # ...
45
+ # }
46
+ #
47
+ ### Terminals Table
48
+ #
49
+ # This table is a simple list of the terminal productions found in the grammar. For example:
50
+ #
51
+ # TERMINALS = ["(", ")", "-",
52
+ # "@pass", "@terminals",
53
+ # :HEX, :LHS, :O_RANGE,:POSTFIX,
54
+ # :RANGE, :STRING1, :STRING2, :SYMBOL,"|"
55
+ # ].freeze
56
+ #
57
+ ### Cleanup Table
58
+ #
59
+ # This table identifies productions which used EBNF rules, which are transformed to BNF for actual parsing. This allows the parser, in some cases, to reproduce *star*, *plus*, and *opt* rule matches. For example:
60
+ #
61
+ # CLEANUP = {
62
+ # :_alt_1 => :star,
63
+ # :_alt_3 => :merge,
64
+ # :_diff_1 => :opt,
65
+ # :ebnf => :star,
66
+ # :_ebnf_2 => :merge,
67
+ # :_postfix_1 => :opt,
68
+ # :seq => :plus,
69
+ # :_seq_1 => :star,
70
+ # :_seq_2 => :merge,
71
+ # }.freeze
72
+ #
73
+ # In this case the `ebnf` rule was `(declaration | rule)*`. As BNF does not support a star operator, this is decomposed into a set of rules using `alt` and `seq` primitives:
74
+ #
75
+ # ebnf ::= _empty _ebnf_2
76
+ # _ebnf_1 ::= declaration | rule
77
+ # _ebnf_2 ::= _ebnf_1 ebnf
78
+ # _ebnf_3 ::= ebnf
79
+ #
80
+ # The `_empty` production matches an empty string, so allows for now value. `_ebnf_2` matches `declaration | rule` (using the `alt` primitive) followed by `ebnf`, creating a sequence of zero or more `declaration` or `alt` members.
81
+ #
82
+ # [First/Follow]: https://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
83
+
2
84
  module LL1
3
85
  autoload :Lexer, "ebnf/ll1/lexer"
4
86
  autoload :Parser, "ebnf/ll1/parser"
@@ -51,8 +133,40 @@ module EBNF
51
133
  ##
52
134
  # Create first/follow for each rule using techniques defined for LL(1) parsers.
53
135
  #
136
+ # This takes rules which have transformed into BNF and adds first/follow and otehr information to the rules to allow the generation of metadata tables used for driving a parser.
137
+ #
138
+ # Given an initial rule in EBNF:
139
+ #
140
+ # (rule enbf "1" (star declaration rule))
141
+ #
142
+ # The BNF transformation becomes:
143
+ #
144
+ # (rule ebnf "1" (alt _empty _ebnf_2))
145
+ # (rule _ebnf_1 "1.1" (alt declaration rule))
146
+ # (rule _ebnf_2 "1.2" (seq _ebnf_1 ebnf))
147
+ # (rule _ebnf_3 "1.3" (seq ebnf))
148
+ #
149
+ # After running this method, the rules are annotated with first/follow and cleanup rules:
150
+ #
151
+ # (rule ebnf "1"
152
+ # (start #t)
153
+ # (first "@pass" "@terminals" LHS _eps)
154
+ # (follow _eof)
155
+ # (cleanup star)
156
+ # (alt _empty _ebnf_2))
157
+ # (rule _ebnf_1 "1.1"
158
+ # (first "@pass" "@terminals" LHS)
159
+ # (follow "@pass" "@terminals" LHS _eof)
160
+ # (alt declaration rule))
161
+ # (rule _ebnf_2 "1.2"
162
+ # (first "@pass" "@terminals" LHS)
163
+ # (follow _eof)
164
+ # (cleanup merge)
165
+ # (seq _ebnf_1 ebnf))
166
+ # (rule _ebnf_3 "1.3" (first "@pass" "@terminals" LHS _eps) (follow _eof) (seq ebnf))
167
+ #
54
168
  # @return [EBNF] self
55
- # @see http://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
169
+ # @see https://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
56
170
  # @param [Array<Symbol>] starts
57
171
  # Set of symbols which are start rules
58
172
  def first_follow(*starts)
@@ -96,8 +210,9 @@ module EBNF
96
210
  firsts, follows = 0, 0
97
211
  # add Fi(wi) to Fi(Ai) for every rule Ai → wi
98
212
  #
99
- # For sequences, this is the first rule in the sequence.
100
- # For alts, this is every rule in the sequence
213
+ # * For sequences, this is the first rule in the sequence.
214
+ # * For alts, this is every rule in the sequence
215
+ # * Other rules don't matter, as they don't appear in strict BNF
101
216
  each(:rule) do |ai|
102
217
  # Fi(a w' ) = { a } for every terminal a
103
218
  ai.terminals(ast).each do |t|
@@ -168,6 +283,10 @@ module EBNF
168
283
  progress("first_follow") {"(#{ittr}) firsts #{firsts}, follows #{follows}"}
169
284
  ittr += 1
170
285
  end while (firsts + follows) > 0
286
+
287
+ debug("Fi.2-post: non-terminals without first") do
288
+ ast.reject(&:terminal?).reject(&:first).map(&:sym)
289
+ end if ast.reject(&:terminal?).any? {|r| r.first.nil?}
171
290
  end
172
291
  end
173
292
 
@@ -216,7 +335,7 @@ module EBNF
216
335
  @branch = {}
217
336
  @already = []
218
337
  @agenda = []
219
- @starts.each do |start|
338
+ Array(@starts).each do |start|
220
339
  do_production(start)
221
340
  while !@agenda.empty?
222
341
  x = @agenda.shift
@@ -265,13 +384,26 @@ module EBNF
265
384
  end
266
385
  end
267
386
  io.puts "#{ind0}}.freeze\n"
268
- else
387
+ elsif table
269
388
  io.puts "#{ind0}#{name} = [\n#{ind1}" +
270
389
  table.sort_by{|t| t.to_s.sub(/^_/, '')}.map(&:inspect).join(",\n#{ind1}") +
271
390
  "\n#{ind0}].freeze\n"
272
391
  end
273
392
  end
274
393
 
394
+ ##
395
+ # Output Ruby parser files for LL(1) parsing
396
+ #
397
+ # @param [IO, StringIO] output
398
+ def to_ruby_ll1(output, **options)
399
+ self.outputTable(output, 'BRANCH', self.branch, 1)
400
+ self.outputTable(output, 'TERMINALS', self.terminals, 1)
401
+ self.outputTable(output, 'FIRST', self.first, 1)
402
+ self.outputTable(output, 'FOLLOW', self.follow, 1)
403
+ self.outputTable(output, 'CLEANUP', self.cleanup, 1)
404
+ self.outputTable(output, 'PASS', [self.pass], 1) if self.pass
405
+ end
406
+
275
407
  private
276
408
  def do_production(lhs)
277
409
  rule = find_rule(lhs)
@@ -287,16 +419,16 @@ module EBNF
287
419
 
288
420
  if rule.expr.first == :matches
289
421
  debug("prod") {"Rule is regexp: #{rule}"}
290
-
291
- error("No record of what token #{lhs} can start with") unless rule.first
292
422
  return
293
423
  end
294
424
 
425
+ error("No record of what token #{lhs.inspect} can start with") unless rule.first
426
+
295
427
  if rule.alt?
296
428
  # A First/Follow conflict appears when _eps is in the first
297
429
  # of one rule and there is a token in the first and
298
430
  # follow of the same rule
299
- if rule.first.include?(:_eps) && !(overlap = ((rule.first & (rule.follow || [])) - [:eps])).empty?
431
+ if Array(rule.first).include?(:_eps) && !(overlap = ((Array(rule.first) & (rule.follow || [])) - [:eps])).empty?
300
432
  error("First/Follow Conflict: #{overlap.first.inspect} is both first and follow of #{rule.sym}")
301
433
  end
302
434