ebnf 1.2.0 → 2.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +223 -199
  3. data/UNLICENSE +1 -1
  4. data/VERSION +1 -1
  5. data/bin/ebnf +38 -19
  6. data/etc/abnf-core.ebnf +52 -0
  7. data/etc/abnf.abnf +121 -0
  8. data/etc/abnf.ebnf +124 -0
  9. data/etc/abnf.sxp +45 -0
  10. data/etc/doap.ttl +23 -18
  11. data/etc/ebnf.ebnf +21 -33
  12. data/etc/ebnf.html +76 -160
  13. data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
  14. data/etc/ebnf.ll1.sxp +182 -183
  15. data/etc/ebnf.peg.rb +90 -0
  16. data/etc/ebnf.peg.sxp +84 -0
  17. data/etc/ebnf.sxp +40 -41
  18. data/etc/iso-ebnf.ebnf +140 -0
  19. data/etc/iso-ebnf.isoebnf +138 -0
  20. data/etc/iso-ebnf.sxp +65 -0
  21. data/etc/sparql.ebnf +4 -4
  22. data/etc/sparql.html +1603 -1751
  23. data/etc/sparql.ll1.sxp +7372 -7372
  24. data/etc/sparql.peg.rb +532 -0
  25. data/etc/sparql.peg.sxp +597 -0
  26. data/etc/sparql.sxp +363 -362
  27. data/etc/turtle.ebnf +3 -3
  28. data/etc/turtle.html +465 -517
  29. data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
  30. data/etc/turtle.ll1.sxp +425 -425
  31. data/etc/turtle.peg.rb +182 -0
  32. data/etc/turtle.peg.sxp +199 -0
  33. data/etc/turtle.sxp +103 -101
  34. data/lib/ebnf.rb +6 -1
  35. data/lib/ebnf/abnf.rb +301 -0
  36. data/lib/ebnf/abnf/core.rb +23 -0
  37. data/lib/ebnf/abnf/meta.rb +111 -0
  38. data/lib/ebnf/base.rb +114 -69
  39. data/lib/ebnf/bnf.rb +1 -26
  40. data/lib/ebnf/ebnf/meta.rb +90 -0
  41. data/lib/ebnf/isoebnf.rb +229 -0
  42. data/lib/ebnf/isoebnf/meta.rb +75 -0
  43. data/lib/ebnf/ll1.rb +131 -3
  44. data/lib/ebnf/ll1/lexer.rb +20 -22
  45. data/lib/ebnf/ll1/parser.rb +97 -64
  46. data/lib/ebnf/ll1/scanner.rb +82 -50
  47. data/lib/ebnf/native.rb +320 -0
  48. data/lib/ebnf/parser.rb +285 -302
  49. data/lib/ebnf/peg.rb +39 -0
  50. data/lib/ebnf/peg/parser.rb +561 -0
  51. data/lib/ebnf/peg/rule.rb +250 -0
  52. data/lib/ebnf/rule.rb +442 -148
  53. data/lib/ebnf/terminals.rb +21 -0
  54. data/lib/ebnf/writer.rb +587 -82
  55. metadata +125 -18
  56. data/etc/sparql.rb +0 -45773
@@ -0,0 +1,75 @@
1
+ # This file is automatically generated by ebnf version 2.0.0
2
+ # Derived from etc/iso-ebnf.ebnf
3
+ module ISOEBNFMeta
4
+ RULES = [
5
+ EBNF::Rule.new(:syntax, nil, [:star, :syntax_rule]).extend(EBNF::PEG::Rule),
6
+ EBNF::Rule.new(:syntax_rule, nil, [:seq, :meta_identifier, :defining_symbol, :definitions_list, :terminator_symbol]).extend(EBNF::PEG::Rule),
7
+ EBNF::Rule.new(:definitions_list, nil, [:seq, :single_definition, :_definitions_list_1]).extend(EBNF::PEG::Rule),
8
+ EBNF::Rule.new(:_definitions_list_1, nil, [:star, :_definitions_list_2]).extend(EBNF::PEG::Rule),
9
+ EBNF::Rule.new(:_definitions_list_2, nil, [:seq, :definition_separator_symbol, :definitions_list]).extend(EBNF::PEG::Rule),
10
+ EBNF::Rule.new(:single_definition, nil, [:seq, :term, :_single_definition_1]).extend(EBNF::PEG::Rule),
11
+ EBNF::Rule.new(:_single_definition_1, nil, [:star, :_single_definition_2]).extend(EBNF::PEG::Rule),
12
+ EBNF::Rule.new(:_single_definition_2, nil, [:seq, ",", :term]).extend(EBNF::PEG::Rule),
13
+ EBNF::Rule.new(:term, nil, [:seq, :factor, :_term_1]).extend(EBNF::PEG::Rule),
14
+ EBNF::Rule.new(:_term_1, nil, [:opt, :_term_2]).extend(EBNF::PEG::Rule),
15
+ EBNF::Rule.new(:_term_2, nil, [:seq, "-", :exception]).extend(EBNF::PEG::Rule),
16
+ EBNF::Rule.new(:exception, nil, [:seq, :factor]).extend(EBNF::PEG::Rule),
17
+ EBNF::Rule.new(:factor, nil, [:seq, :_factor_1, :primary]).extend(EBNF::PEG::Rule),
18
+ EBNF::Rule.new(:_factor_1, nil, [:opt, :_factor_2]).extend(EBNF::PEG::Rule),
19
+ EBNF::Rule.new(:_factor_2, nil, [:seq, :integer, "*"]).extend(EBNF::PEG::Rule),
20
+ EBNF::Rule.new(:primary, nil, [:alt, :optional_sequence, :repeated_sequence, :special_sequence, :grouped_sequence, :meta_identifier, :terminal_string, :empty]).extend(EBNF::PEG::Rule),
21
+ EBNF::Rule.new(:optional_sequence, nil, [:seq, :start_option_symbol, :definitions_list, :end_option_symbol]).extend(EBNF::PEG::Rule),
22
+ EBNF::Rule.new(:repeated_sequence, nil, [:seq, :start_repeat_symbol, :definitions_list, :end_repeat_symbol]).extend(EBNF::PEG::Rule),
23
+ EBNF::Rule.new(:grouped_sequence, nil, [:seq, "(", :definitions_list, ")"]).extend(EBNF::PEG::Rule),
24
+ EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule),
25
+ EBNF::Rule.new(:terminal_string, nil, [:alt, :_terminal_string_1, :_terminal_string_2], kind: :terminal).extend(EBNF::PEG::Rule),
26
+ EBNF::Rule.new(:_terminal_string_1, nil, [:seq, "'", :_terminal_string_3, "'"]).extend(EBNF::PEG::Rule),
27
+ EBNF::Rule.new(:_terminal_string_3, nil, [:plus, :first_terminal_character]).extend(EBNF::PEG::Rule),
28
+ EBNF::Rule.new(:_terminal_string_2, nil, [:seq, "\"", :_terminal_string_4, "\""]).extend(EBNF::PEG::Rule),
29
+ EBNF::Rule.new(:_terminal_string_4, nil, [:plus, :second_terminal_character]).extend(EBNF::PEG::Rule),
30
+ EBNF::Rule.new(:meta_identifier, nil, [:seq, :letter, :_meta_identifier_1], kind: :terminal).extend(EBNF::PEG::Rule),
31
+ EBNF::Rule.new(:_meta_identifier_1, nil, [:star, :meta_identifier_character]).extend(EBNF::PEG::Rule),
32
+ EBNF::Rule.new(:integer, nil, [:plus, :decimal_digit], kind: :terminal).extend(EBNF::PEG::Rule),
33
+ EBNF::Rule.new(:special_sequence, nil, [:seq, "?", :_special_sequence_1, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
34
+ EBNF::Rule.new(:_special_sequence_1, nil, [:star, :special_sequence_character]).extend(EBNF::PEG::Rule),
35
+ EBNF::Rule.new(:comment, nil, [:seq, :start_comment_symbol, :_comment_1, :end_comment_symbol], kind: :terminal).extend(EBNF::PEG::Rule),
36
+ EBNF::Rule.new(:_comment_1, nil, [:star, :comment_symbol]).extend(EBNF::PEG::Rule),
37
+ EBNF::Rule.new(:comment_symbol, nil, [:alt, :comment, :commentless_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
38
+ EBNF::Rule.new(:commentless_symbol, nil, [:alt, :terminal_character, :meta_identifier, :integer, :terminal_string, :special_sequence], kind: :terminal).extend(EBNF::PEG::Rule),
39
+ EBNF::Rule.new(:letter, nil, [:range, "a-zA-Z"], kind: :terminal).extend(EBNF::PEG::Rule),
40
+ EBNF::Rule.new(:decimal_digit, nil, [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule),
41
+ EBNF::Rule.new(:meta_identifier_character, nil, [:alt, :letter, :decimal_digit, "_"], kind: :terminal).extend(EBNF::PEG::Rule),
42
+ EBNF::Rule.new(:first_terminal_character, nil, [:diff, :terminal_character, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
43
+ EBNF::Rule.new(:second_terminal_character, nil, [:diff, :terminal_character, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
44
+ EBNF::Rule.new(:special_sequence_character, nil, [:diff, :terminal_character, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
45
+ EBNF::Rule.new(:terminal_character, nil, [:alt, :letter, :decimal_digit, :concatenate_symbol, :defining_symbol, :definition_separator_symbol, :end_comment_symbol, :end_group_symbol, :end_option_symbol, :end_repeat_symbol, :except_symbol, :first_quote_symbol, :repetition_symbol, :second_quote_symbol, :special_sequence_symbol, :start_comment_symbol, :start_group_symbol, :start_option_symbol, :start_repeat_symbol, :terminator_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
46
+ EBNF::Rule.new(:other_character, nil, [:alt, :_other_character_1, "\\"], kind: :terminal).extend(EBNF::PEG::Rule),
47
+ EBNF::Rule.new(:_other_character_1, nil, [:range, ":+_%@&$<>^` ̃#x20#x23"], kind: :terminal).extend(EBNF::PEG::Rule),
48
+ EBNF::Rule.new(:gap_separator, nil, [:range, "#x9#xa#xb#xc#xd#x20"], kind: :terminal).extend(EBNF::PEG::Rule),
49
+ EBNF::Rule.new(:_pass, nil, [:alt, :__pass_1, :comment], kind: :pass).extend(EBNF::PEG::Rule),
50
+ EBNF::Rule.new(:__pass_1, nil, [:plus, :gap_separator]).extend(EBNF::PEG::Rule),
51
+ EBNF::Rule.new(:empty, nil, [:seq, ""], kind: :terminal).extend(EBNF::PEG::Rule),
52
+ EBNF::Rule.new(:concatenate_symbol, nil, [:seq, ","], kind: :terminal).extend(EBNF::PEG::Rule),
53
+ EBNF::Rule.new(:repetition_symbol, nil, [:seq, "*"], kind: :terminal).extend(EBNF::PEG::Rule),
54
+ EBNF::Rule.new(:except_symbol, nil, [:seq, "-"], kind: :terminal).extend(EBNF::PEG::Rule),
55
+ EBNF::Rule.new(:first_quote_symbol, nil, [:seq, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
56
+ EBNF::Rule.new(:second_quote_symbol, nil, [:seq, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
57
+ EBNF::Rule.new(:start_comment_symbol, nil, [:seq, "(*"], kind: :terminal).extend(EBNF::PEG::Rule),
58
+ EBNF::Rule.new(:end_comment_symbol, nil, [:seq, "*)"], kind: :terminal).extend(EBNF::PEG::Rule),
59
+ EBNF::Rule.new(:start_group_symbol, nil, [:seq, "("], kind: :terminal).extend(EBNF::PEG::Rule),
60
+ EBNF::Rule.new(:end_group_symbol, nil, [:seq, ")"], kind: :terminal).extend(EBNF::PEG::Rule),
61
+ EBNF::Rule.new(:special_sequence_symbol, nil, [:seq, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
62
+ EBNF::Rule.new(:defining_symbol, nil, [:alt, "=", ":"], kind: :terminal).extend(EBNF::PEG::Rule),
63
+ EBNF::Rule.new(:definition_separator_symbol, nil, [:alt, "|", "/", "!"], kind: :terminal).extend(EBNF::PEG::Rule),
64
+ EBNF::Rule.new(:terminator_symbol, nil, [:alt, ";", "."], kind: :terminal).extend(EBNF::PEG::Rule),
65
+ EBNF::Rule.new(:start_option_symbol, nil, [:seq, "["], kind: :terminal).extend(EBNF::PEG::Rule),
66
+ EBNF::Rule.new(:end_option_symbol, nil, [:seq, "]"], kind: :terminal).extend(EBNF::PEG::Rule),
67
+ EBNF::Rule.new(:start_repeat_symbol, nil, [:alt, "{", "(:"], kind: :terminal).extend(EBNF::PEG::Rule),
68
+ EBNF::Rule.new(:end_repeat_symbol, nil, [:alt, "}", ":)"], kind: :terminal).extend(EBNF::PEG::Rule),
69
+ EBNF::Rule.new(:gap_free_symbol, nil, [:alt, :_gap_free_symbol_1, :terminal_string], kind: :terminal).extend(EBNF::PEG::Rule),
70
+ EBNF::Rule.new(:_gap_free_symbol_1, nil, [:seq, :_gap_free_symbol_3, :terminal_character]).extend(EBNF::PEG::Rule),
71
+ EBNF::Rule.new(:_gap_free_symbol_3, nil, [:not, :_gap_free_symbol_2]).extend(EBNF::PEG::Rule),
72
+ EBNF::Rule.new(:_gap_free_symbol_2, nil, [:range, "'\""], kind: :terminal).extend(EBNF::PEG::Rule),
73
+ ]
74
+ end
75
+
data/lib/ebnf/ll1.rb CHANGED
@@ -1,4 +1,86 @@
1
1
  module EBNF
2
+ ##
3
+ # This module extends {EBNF::Base} to create metadata including _branch_, [First/Follow][], and other tables which is used by {EBNF::LL1::Parser} to recognize examples of the associated grammar.
4
+ #
5
+ ### Branch Table
6
+ #
7
+ # The Branch table is a hash mapping production rules to a hash relating terminals appearing in input to sequence of productions to follow when the corresponding input terminal is found. This allows either the `seq` primitive, where all terminals map to the same sequence of productions, or the `alt` primitive, where each terminal may map to a different production.
8
+ #
9
+ # BRANCH = {
10
+ # :alt => {
11
+ # "(" => [:seq, :_alt_1],
12
+ # :HEX => [:seq, :_alt_1],
13
+ # :O_RANGE => [:seq, :_alt_1],
14
+ # :RANGE => [:seq, :_alt_1],
15
+ # :STRING1 => [:seq, :_alt_1],
16
+ # :STRING2 => [:seq, :_alt_1],
17
+ # :SYMBOL => [:seq, :_alt_1],
18
+ # },
19
+ # ...
20
+ # :declaration => {
21
+ # "@pass" => [:pass],
22
+ # "@terminals" => ["@terminals"],
23
+ # },
24
+ # ...
25
+ # }
26
+ #
27
+ # In this case the `alt` rule is `seq ('|' seq)*` can happen when any of the specified tokens appears on the input stream. The all cause the same token to be passed to the `seq` rule and follow with `_alt_1`, which handles the `('|' seq)*` portion of the rule, after the first sequence is matched.
28
+ #
29
+ # The `declaration` rule is `@terminals' | pass` using the `alt` primitive determining the production to run based on the terminal appearing on the input stream. Eventually, a terminal production is found and the token is consumed.
30
+ #
31
+ ### First/Follow Table
32
+ #
33
+ # The [First/Follow][] table is a hash mapping production rules to the terminals that may proceed or follow the rule. For example:
34
+ #
35
+ # FIRST = {
36
+ # :alt => [
37
+ # :HEX,
38
+ # :SYMBOL,
39
+ # :RANGE,
40
+ # :O_RANGE,
41
+ # :STRING1,
42
+ # :STRING2,
43
+ # "("],
44
+ # ...
45
+ # }
46
+ #
47
+ ### Terminals Table
48
+ #
49
+ # This table is a simple list of the terminal productions found in the grammar. For example:
50
+ #
51
+ # TERMINALS = ["(", ")", "-",
52
+ # "@pass", "@terminals",
53
+ # :HEX, :LHS, :O_RANGE,:POSTFIX,
54
+ # :RANGE, :STRING1, :STRING2, :SYMBOL,"|"
55
+ # ].freeze
56
+ #
57
+ ### Cleanup Table
58
+ #
59
+ # This table identifies productions which used EBNF rules, which are transformed to BNF for actual parsing. This allows the parser, in some cases, to reproduce *star*, *plus*, and *opt* rule matches. For example:
60
+ #
61
+ # CLEANUP = {
62
+ # :_alt_1 => :star,
63
+ # :_alt_3 => :merge,
64
+ # :_diff_1 => :opt,
65
+ # :ebnf => :star,
66
+ # :_ebnf_2 => :merge,
67
+ # :_postfix_1 => :opt,
68
+ # :seq => :plus,
69
+ # :_seq_1 => :star,
70
+ # :_seq_2 => :merge,
71
+ # }.freeze
72
+ #
73
+ # In this case the `ebnf` rule was `(declaration | rule)*`. As BNF does not support a star operator, this is decomposed into a set of rules using `alt` and `seq` primitives:
74
+ #
75
+ # ebnf ::= _empty _ebnf_2
76
+ # _ebnf_1 ::= declaration | rule
77
+ # _ebnf_2 ::= _ebnf_1 ebnf
78
+ # _ebnf_3 ::= ebnf
79
+ #
80
+ # The `_empty` production matches an empty string, so allows for now value. `_ebnf_2` matches `declaration | rule` (using the `alt` primitive) followed by `ebnf`, creating a sequence of zero or more `declaration` or `alt` members.
81
+ #
82
+ # [First/Follow]: https://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
83
+
2
84
  module LL1
3
85
  autoload :Lexer, "ebnf/ll1/lexer"
4
86
  autoload :Parser, "ebnf/ll1/parser"
@@ -51,8 +133,40 @@ module EBNF
51
133
  ##
52
134
  # Create first/follow for each rule using techniques defined for LL(1) parsers.
53
135
  #
136
+ # This takes rules which have transformed into BNF and adds first/follow and otehr information to the rules to allow the generation of metadata tables used for driving a parser.
137
+ #
138
+ # Given an initial rule in EBNF:
139
+ #
140
+ # (rule enbf "1" (star declaration rule))
141
+ #
142
+ # The BNF transformation becomes:
143
+ #
144
+ # (rule ebnf "1" (alt _empty _ebnf_2))
145
+ # (rule _ebnf_1 "1.1" (alt declaration rule))
146
+ # (rule _ebnf_2 "1.2" (seq _ebnf_1 ebnf))
147
+ # (rule _ebnf_3 "1.3" (seq ebnf))
148
+ #
149
+ # After running this method, the rules are annotated with first/follow and cleanup rules:
150
+ #
151
+ # (rule ebnf "1"
152
+ # (start #t)
153
+ # (first "@pass" "@terminals" LHS _eps)
154
+ # (follow _eof)
155
+ # (cleanup star)
156
+ # (alt _empty _ebnf_2))
157
+ # (rule _ebnf_1 "1.1"
158
+ # (first "@pass" "@terminals" LHS)
159
+ # (follow "@pass" "@terminals" LHS _eof)
160
+ # (alt declaration rule))
161
+ # (rule _ebnf_2 "1.2"
162
+ # (first "@pass" "@terminals" LHS)
163
+ # (follow _eof)
164
+ # (cleanup merge)
165
+ # (seq _ebnf_1 ebnf))
166
+ # (rule _ebnf_3 "1.3" (first "@pass" "@terminals" LHS _eps) (follow _eof) (seq ebnf))
167
+ #
54
168
  # @return [EBNF] self
55
- # @see http://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
169
+ # @see https://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
56
170
  # @param [Array<Symbol>] starts
57
171
  # Set of symbols which are start rules
58
172
  def first_follow(*starts)
@@ -96,8 +210,9 @@ module EBNF
96
210
  firsts, follows = 0, 0
97
211
  # add Fi(wi) to Fi(Ai) for every rule Ai → wi
98
212
  #
99
- # For sequences, this is the first rule in the sequence.
100
- # For alts, this is every rule in the sequence
213
+ # * For sequences, this is the first rule in the sequence.
214
+ # * For alts, this is every rule in the sequence
215
+ # * Other rules don't matter, as they don't appear in strict BNF
101
216
  each(:rule) do |ai|
102
217
  # Fi(a w' ) = { a } for every terminal a
103
218
  ai.terminals(ast).each do |t|
@@ -276,6 +391,19 @@ module EBNF
276
391
  end
277
392
  end
278
393
 
394
+ ##
395
+ # Output Ruby parser files for LL(1) parsing
396
+ #
397
+ # @param [IO, StringIO] output
398
+ def to_ruby_ll1(output, **options)
399
+ self.outputTable(output, 'BRANCH', self.branch, 1)
400
+ self.outputTable(output, 'TERMINALS', self.terminals, 1)
401
+ self.outputTable(output, 'FIRST', self.first, 1)
402
+ self.outputTable(output, 'FOLLOW', self.follow, 1)
403
+ self.outputTable(output, 'CLEANUP', self.cleanup, 1)
404
+ self.outputTable(output, 'PASS', [self.pass], 1) if self.pass
405
+ end
406
+
279
407
  private
280
408
  def do_production(lhs)
281
409
  rule = find_rule(lhs)
@@ -29,7 +29,7 @@ module EBNF::LL1
29
29
  # warn error.inspect
30
30
  # end
31
31
  #
32
- # @see http://en.wikipedia.org/wiki/Lexical_analysis
32
+ # @see https://en.wikipedia.org/wiki/Lexical_analysis
33
33
  class Lexer
34
34
  include Enumerable
35
35
 
@@ -43,10 +43,10 @@ module EBNF::LL1
43
43
  "\\'" => '\'', # \u0027 (apostrophe-quote, single quote mark)
44
44
  '\\\\' => '\\' # \u005C (backslash)
45
45
  }.freeze
46
- ESCAPE_CHAR4 = /\\u(?:[0-9A-Fa-f]{4,4})/.freeze # \uXXXX
47
- ESCAPE_CHAR8 = /\\U(?:[0-9A-Fa-f]{8,8})/.freeze # \UXXXXXXXX
48
- ECHAR = /\\./ # More liberal unescaping
49
- UCHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}/.freeze
46
+ ESCAPE_CHAR4 = /\\u(?:[0-9A-Fa-f]{4,4})/u.freeze # \uXXXX
47
+ ESCAPE_CHAR8 = /\\U(?:[0-9A-Fa-f]{8,8})/u.freeze # \UXXXXXXXX
48
+ ECHAR = /\\./u.freeze # More liberal unescaping
49
+ UCHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}/n.freeze
50
50
 
51
51
  ##
52
52
  # @return [Regexp] defines whitespace, including comments, otherwise whitespace must be explicit in terminals
@@ -59,7 +59,7 @@ module EBNF::LL1
59
59
  #
60
60
  # @param [String] string
61
61
  # @return [String]
62
- # @see http://www.w3.org/TR/rdf-sparql-query/#codepointEscape
62
+ # @see https://www.w3.org/TR/rdf-sparql-query/#codepointEscape
63
63
  def self.unescape_codepoints(string)
64
64
  string = string.dup
65
65
  string.force_encoding(Encoding::ASCII_8BIT) if string.respond_to?(:force_encoding)
@@ -81,7 +81,7 @@ module EBNF::LL1
81
81
  #
82
82
  # @param [String] input
83
83
  # @return [String]
84
- # @see http://www.w3.org/TR/rdf-sparql-query/#grammarEscapes
84
+ # @see https://www.w3.org/TR/rdf-sparql-query/#grammarEscapes
85
85
  def self.unescape_string(input)
86
86
  input.gsub(ECHAR) { |escaped| ESCAPE_CHARS[escaped] || escaped[1..-1]}
87
87
  end
@@ -131,7 +131,6 @@ module EBNF::LL1
131
131
 
132
132
  raise Error, "Terminal patterns not defined" unless @terminals && @terminals.length > 0
133
133
 
134
- @lineno = 1
135
134
  @scanner = Scanner.new(input, **options)
136
135
  end
137
136
 
@@ -147,12 +146,6 @@ module EBNF::LL1
147
146
  # @return [String]
148
147
  attr_accessor :input
149
148
 
150
- ##
151
- # The current line number (zero-based).
152
- #
153
- # @return [Integer]
154
- attr_reader :lineno
155
-
156
149
  ##
157
150
  # Returns `true` if the input string is lexically valid.
158
151
  #
@@ -194,7 +187,7 @@ module EBNF::LL1
194
187
 
195
188
  @first ||= begin
196
189
  {} while !scanner.eos? && skip_whitespace
197
- return @scanner = nil if scanner.eos?
190
+ return nil if scanner.eos?
198
191
 
199
192
  token = match_token(*types)
200
193
 
@@ -233,7 +226,7 @@ module EBNF::LL1
233
226
  # @return [Token]
234
227
  def recover(*types)
235
228
  until scanner.eos? || tok = match_token(*types)
236
- if scanner.skip_until(@whitespace || /\s/m).nil? # Skip past current "token"
229
+ if scanner.skip_until(@whitespace || /\s+/m).nil? # Skip past current "token"
237
230
  # No whitespace at the end, must be and end of string
238
231
  scanner.terminate
239
232
  else
@@ -243,6 +236,14 @@ module EBNF::LL1
243
236
  scanner.unscan if tok
244
237
  first
245
238
  end
239
+
240
+ ##
241
+ # The current line number (one-based).
242
+ #
243
+ # @return [Integer]
244
+ def lineno
245
+ scanner.lineno
246
+ end
246
247
  protected
247
248
 
248
249
  # @return [StringScanner]
@@ -253,9 +254,7 @@ module EBNF::LL1
253
254
  def skip_whitespace
254
255
  # skip all white space, but keep track of the current line number
255
256
  while @whitespace && !scanner.eos?
256
- if matched = scanner.scan(@whitespace)
257
- @lineno += matched.count("\n")
258
- else
257
+ unless scanner.scan(@whitespace)
259
258
  return
260
259
  end
261
260
  end
@@ -281,7 +280,6 @@ module EBNF::LL1
281
280
  if matched = scanner.scan(term.regexp)
282
281
  #STDERR.puts " matched #{term.type.inspect}: #{matched.inspect}"
283
282
  tok = token(term.type, term.canonicalize(matched))
284
- @lineno += matched.count("\n")
285
283
  return tok
286
284
  end
287
285
  end
@@ -372,7 +370,7 @@ module EBNF::LL1
372
370
  # token.type #=> :LANGTAG
373
371
  # token.value #=> "en"
374
372
  #
375
- # @see http://en.wikipedia.org/wiki/Lexical_analysis#Token
373
+ # @see https://en.wikipedia.org/wiki/Lexical_analysis#Token
376
374
  class Token
377
375
  ##
378
376
  # The token's symbol type.
@@ -493,7 +491,7 @@ module EBNF::LL1
493
491
  # "invalid token '%' on line 10",
494
492
  # input: query, token: '%', lineno: 9)
495
493
  #
496
- # @see http://ruby-doc.org/core/classes/StandardError.html
494
+ # @see https://ruby-doc.org/core/classes/StandardError.html
497
495
  class Error < StandardError
498
496
  ##
499
497
  # The input string associated with the error.
@@ -3,12 +3,52 @@ require 'ebnf/ll1/lexer'
3
3
  module EBNF::LL1
4
4
  ##
5
5
  # A Generic LL1 parser using a lexer and branch tables defined using the SWAP tool chain (modified).
6
+ #
7
+ # # Creating terminal definitions and parser rules to parse generated grammars
8
+ #
9
+ # The parser is initialized to callbacks invoked on entry and exit
10
+ # to each `terminal` and `production`. A trivial parser loop can be described as follows:
11
+ #
12
+ # require 'ebnf/ll1/parser'
13
+ # require 'meta'
14
+ #
15
+ # class Parser
16
+ # include Meta
17
+ # include EBNF::LL1::Parser
18
+ #
19
+ # terminal(:SYMBOL, /([a-z]|[A-Z]|[0-9]|_)+/) do |prod, token, input|
20
+ # # Add data based on scanned token to input
21
+ # input[:symbol] = token.value
22
+ # end
23
+ #
24
+ # start_production(:rule) do |input, current, callback|
25
+ # # Process on start of production
26
+ # # Set state for entry into recursed rules through current
27
+ #
28
+ # # Callback to parser loop with callback
29
+ # end
30
+ #
31
+ # production(:rule) do |input, current, callback|
32
+ # # Process on end of production
33
+ # # return results in input, retrieve results from recursed rules in current
34
+ #
35
+ # # Callback to parser loop with callback
36
+ # end
37
+ #
38
+ # def initialize(input)
39
+ # parse(input, start_symbol,
40
+ # branch: BRANCH,
41
+ # first: FIRST,
42
+ # follow: FOLLOW,
43
+ # cleanup: CLEANUP
44
+ # ) do |context, *data|
45
+ # # Process calls from callback from productions
46
+ #
47
+ # rescue ArgumentError, RDF::LL1::Parser::Error => e
48
+ # progress("Parsing completed with errors:\n\t#{e.message}")
49
+ # raise RDF::ReaderError, e.message if validate?
50
+ # end
6
51
  module Parser
7
- ##
8
- # @private
9
- # level above which debug messages are supressed
10
- DEBUG_LEVEL = 10
11
-
12
52
  ##
13
53
  # @return [Integer] line number of current token
14
54
  attr_reader :lineno
@@ -186,7 +226,7 @@ module EBNF::LL1
186
226
  # def each_statement(&block)
187
227
  # @callback = block
188
228
  #
189
- # parse(START.to_sym) do |context, *data|
229
+ # parse(input, START.to_sym) do |context, *data|
190
230
  # case context
191
231
  # when :statement
192
232
  # yield *data
@@ -205,16 +245,13 @@ module EBNF::LL1
205
245
  # Lists valid terminals that can precede each production (for error recovery).
206
246
  # @option options [Hash{Symbol,String => Array<Symbol,String>}] :follow ({})
207
247
  # Lists valid terminals that can follow each production (for error recovery).
208
- # @option options [Boolean] :validate (false)
209
- # whether to validate the parsed statements and values. If not validating, the parser will attempt to recover from errors.
210
- # @option options [Boolean] :progress
211
- # Show progress of parser productions
212
- # @option options [Boolean] :debug
213
- # Detailed debug output
214
- # @option options [Boolean] :reset_on_start
215
- # Reset the parser state if the start token set with `prod` is found in a production. This reduces the production stack depth growth, which is appropriate for some grammars.
216
248
  # @option options[Integer] :high_water passed to lexer
249
+ # @option options [Logger] :logger for errors/progress/debug.
217
250
  # @option options[Integer] :low_water passed to lexer
251
+ # @option options [Boolean] :reset_on_start
252
+ # Reset the parser state if the start token set with `prod` is found in a production. This reduces the production stack depth growth, which is appropriate for some grammars.
253
+ # @option options [Boolean] :validate (false)
254
+ # whether to validate the parsed statements and values. If not validating, the parser will attempt to recover from errors.
218
255
  # @yield [context, *data]
219
256
  # Yields for to return data to parser
220
257
  # @yieldparam [:statement, :trace] context
@@ -225,13 +262,9 @@ module EBNF::LL1
225
262
  # @raise [Exception] Raises exceptions for parsing errors
226
263
  # or errors raised during processing callbacks. Internal
227
264
  # errors are raised using {Error}.
228
- # @see http://cs.adelaide.edu.au/~charles/lt/Lectures/07-ErrorRecovery.pdf
265
+ # @see https://cs.adelaide.edu.au/~charles/lt/Lectures/07-ErrorRecovery.pdf
229
266
  def parse(input = nil, start = nil, **options, &block)
230
267
  @options = options.dup
231
- @options[:debug] ||= case
232
- when @options[:progress] then 2
233
- when @options[:validate] then 1
234
- end
235
268
  @branch = options[:branch]
236
269
  @first = options[:first] ||= {}
237
270
  @follow = options[:follow] ||= {}
@@ -356,9 +389,9 @@ module EBNF::LL1
356
389
  end
357
390
 
358
391
  # Get the list of follows for this sequence, this production and the stacked productions.
359
- debug("recovery", "stack follows:", level: 4)
392
+ debug("recovery", "stack follows:")
360
393
  todo_stack.reverse.each do |todo|
361
- debug("recovery", level: 4) {" #{todo[:prod]}: #{@follow[todo[:prod]].inspect}"}
394
+ debug("recovery") {" #{todo[:prod]}: #{@follow[todo[:prod]].inspect}"}
362
395
  end
363
396
 
364
397
  # Find all follows to the top of the stack
@@ -466,14 +499,15 @@ module EBNF::LL1
466
499
  protected
467
500
 
468
501
  ##
469
- # Error information, used as level `0` debug messages.
502
+ # Error information, used as level `3` logger messages.
503
+ # Messages may be logged and are saved for reporting at end of parsing.
470
504
  #
471
505
  # @param [String] node Relevant location associated with message
472
506
  # @param [String] message Error string
473
- # @param [Hash] options
507
+ # @param [Hash{Symbol => Object}] options
474
508
  # @option options [URI, #to_s] :production
475
509
  # @option options [Token] :token
476
- # @see {#debug}
510
+ # @see #debug
477
511
  def error(node, message, **options)
478
512
  lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
479
513
  m = "ERROR "
@@ -483,83 +517,82 @@ module EBNF::LL1
483
517
  m += ", production = #{options[:production].inspect}" if options[:production]
484
518
  @error_log << m unless @recovering
485
519
  @recovering = true
486
- debug(node, m, level: 0, **options)
520
+ debug(node, m, level: options.fetch(:level, 3), **options)
487
521
  if options[:raise] || @options[:validate]
488
522
  raise Error.new(m, lineno: lineno, token: options[:token], production: options[:production])
489
523
  end
490
524
  end
491
525
 
492
526
  ##
493
- # Warning information, used as level `1` debug messages.
527
+ # Warning information, used as level `2` logger messages.
528
+ # Messages may be logged and are saved for reporting at end of parsing.
494
529
  #
495
530
  # @param [String] node Relevant location associated with message
496
531
  # @param [String] message Error string
497
532
  # @param [Hash] options
498
533
  # @option options [URI, #to_s] :production
499
534
  # @option options [Token] :token
500
- # @see {#debug}
535
+ # @see #debug
501
536
  def warn(node, message, **options)
537
+ lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
502
538
  m = "WARNING "
503
- m += "[line: #{@lineno}] " if @lineno
539
+ m += "[line: #{lineno}] " if lineno
504
540
  m += message
505
541
  m += " (found #{options[:token].inspect})" if options[:token]
506
542
  m += ", production = #{options[:production].inspect}" if options[:production]
507
543
  @error_log << m unless @recovering
508
- debug(node, m, level: 1, **options)
544
+ debug(node, m, level: 2, lineno: lineno, **options)
509
545
  end
510
546
 
511
547
  ##
512
- # Progress output when parsing. Passed as level `2` debug messages.
548
+ # Progress logged when parsing. Passed as level `1` logger messages.
513
549
  #
514
- # @overload progress(node, message, **options)
550
+ # The call is ignored, unless `@options[:logger]` is set.
551
+ #
552
+ # @overload progress(node, message, **options, &block)
515
553
  # @param [String] node Relevant location associated with message
516
554
  # @param [String] message ("")
517
555
  # @param [Hash] options
518
556
  # @option options [Integer] :depth
519
557
  # Recursion depth for indenting output
520
- # @see {#debug}
558
+ # @see #debug
521
559
  def progress(node, *args, &block)
522
- return unless @options[:progress] || @options[:debug]
560
+ return unless @options[:logger]
561
+ lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
523
562
  args << {} unless args.last.is_a?(Hash)
524
- args.last[:level] ||= 2
563
+ args.last[:level] ||= 1
564
+ args.last[:lineno] ||= lineno
525
565
  debug(node, *args, &block)
526
566
  end
527
567
 
528
568
  ##
529
- # Progress output when debugging.
569
+ # Debug logging.
530
570
  #
531
- # The call is ignored, unless `@options[:debug]` is set, in which
532
- # case it yields tracing information as indicated. Additionally,
533
- # if `@options[:debug]` is an Integer, the call is aborted if the
534
- # `:level` option is less than than `:level`.
571
+ # The call is ignored, unless `@options[:logger]` is set.
535
572
  #
536
573
  # @overload debug(node, message, **options)
537
574
  # @param [Array<String>] args Relevant location associated with message
538
575
  # @param [Hash] options
539
576
  # @option options [Integer] :depth
540
577
  # Recursion depth for indenting output
541
- # @option options [Integer] :level
542
- # Level assigned to message, by convention, level `0` is for
543
- # errors, level `1` is for warnings, level `2` is for parser
544
- # progress information, and anything higher is for various levels
545
- # of debug information.
546
- #
547
- # @yield trace, level, lineno, depth, args
548
- # @yieldparam [:trace] trace
549
- # @yieldparam [Integer] level
550
- # @yieldparam [Integer] lineno
551
- # @yieldparam [Integer] depth Recursive depth of productions
552
- # @yieldparam [Array<String>] args
553
- # @yieldreturn [String] added to message
554
- def debug(*args)
555
- return unless @options[:debug] && @parse_callback
578
+ # @yieldreturn [String] additional string appended to `message`.
579
+ def debug(*args, &block)
580
+ return unless @options[:logger]
556
581
  options = args.last.is_a?(Hash) ? args.pop : {}
557
- debug_level = options.fetch(:level, 3)
558
- return if @options[:debug].is_a?(Integer) && debug_level > @options[:debug]
559
-
582
+ lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
583
+ level = options.fetch(:level, 0)
560
584
  depth = options[:depth] || self.depth
561
- args << yield if block_given?
562
- @parse_callback.call(:trace, debug_level, @lineno, depth, *args)
585
+
586
+ if self.respond_to?(:log_debug)
587
+ level = [:debug, :info, :warn, :error, :fatal][level]
588
+ log_debug(*args, **options.merge(level: level, lineno: lineno, depth: depth), &block)
589
+ elsif @options[:logger].respond_to?(:add)
590
+ args << yield if block_given?
591
+ @options[:logger].add(level, "[#{lineno}]" + (" " * depth) + args.join(" "))
592
+ elsif @options[:logger].respond_to?(:<<)
593
+ args << yield if block_given?
594
+ @options[:logger] << "[#{lineno}]" + (" " * depth) + args.join(" ")
595
+ end
563
596
  end
564
597
 
565
598
  private
@@ -570,7 +603,7 @@ module EBNF::LL1
570
603
  if handler
571
604
  # Create a new production data element, potentially allowing handler
572
605
  # to customize before pushing on the @prod_data stack
573
- progress("#{prod}(:start):#{@prod_data.length}") {@prod_data.last}
606
+ debug("#{prod}(:start):#{@prod_data.length}") {@prod_data.last}
574
607
  data = {}
575
608
  begin
576
609
  self.class.eval_with_binding(self) {
@@ -584,12 +617,12 @@ module EBNF::LL1
584
617
  elsif [:merge, :star].include?(@cleanup[prod])
585
618
  # Save current data to merge later
586
619
  @prod_data << {}
587
- progress("#{prod}(:start}:#{@prod_data.length}:cleanup:#{@cleanup[prod]}") { get_token.inspect + (@recovering ? ' recovering' : '')}
620
+ debug("#{prod}(:start}:#{@prod_data.length}:cleanup:#{@cleanup[prod]}") { get_token.inspect + (@recovering ? ' recovering' : '')}
588
621
  else
589
622
  # Make sure we push as many was we pop, even if there is no
590
623
  # explicit start handler
591
624
  @prod_data << {} if self.class.production_handlers[prod]
592
- progress("#{prod}(:start:#{@prod_data.length})") { get_token.inspect + (@recovering ? ' recovering' : '')}
625
+ debug("#{prod}(:start:#{@prod_data.length})") { get_token.inspect + (@recovering ? ' recovering' : '')}
593
626
  end
594
627
  #puts "prod_data(s): " + @prod_data.inspect
595
628
  end
@@ -623,7 +656,7 @@ module EBNF::LL1
623
656
  else Array(input[k]) + Array(v)
624
657
  end
625
658
  end
626
- progress("#{prod}(:finish):#{@prod_data.length} cleanup:#{@cleanup[prod]}") {@prod_data.last}
659
+ debug("#{prod}(:finish):#{@prod_data.length} cleanup:#{@cleanup[prod]}") {@prod_data.last}
627
660
  else
628
661
  progress("#{prod}(:finish):#{@prod_data.length}") { "recovering" if @recovering }
629
662
  end
@@ -730,7 +763,7 @@ module EBNF::LL1
730
763
  # "invalid token '%' on line 10",
731
764
  # token: '%', lineno: 9, production: :turtleDoc)
732
765
  #
733
- # @see http://ruby-doc.org/core/classes/StandardError.html
766
+ # @see https://ruby-doc.org/core/classes/StandardError.html
734
767
  class Error < StandardError
735
768
  ##
736
769
  # The current production.