ebnf 1.2.0 → 2.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +223 -199
  3. data/UNLICENSE +1 -1
  4. data/VERSION +1 -1
  5. data/bin/ebnf +38 -19
  6. data/etc/abnf-core.ebnf +52 -0
  7. data/etc/abnf.abnf +121 -0
  8. data/etc/abnf.ebnf +124 -0
  9. data/etc/abnf.sxp +45 -0
  10. data/etc/doap.ttl +23 -18
  11. data/etc/ebnf.ebnf +21 -33
  12. data/etc/ebnf.html +76 -160
  13. data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
  14. data/etc/ebnf.ll1.sxp +182 -183
  15. data/etc/ebnf.peg.rb +90 -0
  16. data/etc/ebnf.peg.sxp +84 -0
  17. data/etc/ebnf.sxp +40 -41
  18. data/etc/iso-ebnf.ebnf +140 -0
  19. data/etc/iso-ebnf.isoebnf +138 -0
  20. data/etc/iso-ebnf.sxp +65 -0
  21. data/etc/sparql.ebnf +4 -4
  22. data/etc/sparql.html +1603 -1751
  23. data/etc/sparql.ll1.sxp +7372 -7372
  24. data/etc/sparql.peg.rb +532 -0
  25. data/etc/sparql.peg.sxp +597 -0
  26. data/etc/sparql.sxp +363 -362
  27. data/etc/turtle.ebnf +3 -3
  28. data/etc/turtle.html +465 -517
  29. data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
  30. data/etc/turtle.ll1.sxp +425 -425
  31. data/etc/turtle.peg.rb +182 -0
  32. data/etc/turtle.peg.sxp +199 -0
  33. data/etc/turtle.sxp +103 -101
  34. data/lib/ebnf.rb +6 -1
  35. data/lib/ebnf/abnf.rb +301 -0
  36. data/lib/ebnf/abnf/core.rb +23 -0
  37. data/lib/ebnf/abnf/meta.rb +111 -0
  38. data/lib/ebnf/base.rb +114 -69
  39. data/lib/ebnf/bnf.rb +1 -26
  40. data/lib/ebnf/ebnf/meta.rb +90 -0
  41. data/lib/ebnf/isoebnf.rb +229 -0
  42. data/lib/ebnf/isoebnf/meta.rb +75 -0
  43. data/lib/ebnf/ll1.rb +131 -3
  44. data/lib/ebnf/ll1/lexer.rb +20 -22
  45. data/lib/ebnf/ll1/parser.rb +97 -64
  46. data/lib/ebnf/ll1/scanner.rb +82 -50
  47. data/lib/ebnf/native.rb +320 -0
  48. data/lib/ebnf/parser.rb +285 -302
  49. data/lib/ebnf/peg.rb +39 -0
  50. data/lib/ebnf/peg/parser.rb +561 -0
  51. data/lib/ebnf/peg/rule.rb +250 -0
  52. data/lib/ebnf/rule.rb +442 -148
  53. data/lib/ebnf/terminals.rb +21 -0
  54. data/lib/ebnf/writer.rb +587 -82
  55. metadata +125 -18
  56. data/etc/sparql.rb +0 -45773
@@ -0,0 +1,75 @@
1
+ # This file is automatically generated by ebnf version 2.0.0
2
+ # Derived from etc/iso-ebnf.ebnf
3
+ module ISOEBNFMeta
4
+ RULES = [
5
+ EBNF::Rule.new(:syntax, nil, [:star, :syntax_rule]).extend(EBNF::PEG::Rule),
6
+ EBNF::Rule.new(:syntax_rule, nil, [:seq, :meta_identifier, :defining_symbol, :definitions_list, :terminator_symbol]).extend(EBNF::PEG::Rule),
7
+ EBNF::Rule.new(:definitions_list, nil, [:seq, :single_definition, :_definitions_list_1]).extend(EBNF::PEG::Rule),
8
+ EBNF::Rule.new(:_definitions_list_1, nil, [:star, :_definitions_list_2]).extend(EBNF::PEG::Rule),
9
+ EBNF::Rule.new(:_definitions_list_2, nil, [:seq, :definition_separator_symbol, :definitions_list]).extend(EBNF::PEG::Rule),
10
+ EBNF::Rule.new(:single_definition, nil, [:seq, :term, :_single_definition_1]).extend(EBNF::PEG::Rule),
11
+ EBNF::Rule.new(:_single_definition_1, nil, [:star, :_single_definition_2]).extend(EBNF::PEG::Rule),
12
+ EBNF::Rule.new(:_single_definition_2, nil, [:seq, ",", :term]).extend(EBNF::PEG::Rule),
13
+ EBNF::Rule.new(:term, nil, [:seq, :factor, :_term_1]).extend(EBNF::PEG::Rule),
14
+ EBNF::Rule.new(:_term_1, nil, [:opt, :_term_2]).extend(EBNF::PEG::Rule),
15
+ EBNF::Rule.new(:_term_2, nil, [:seq, "-", :exception]).extend(EBNF::PEG::Rule),
16
+ EBNF::Rule.new(:exception, nil, [:seq, :factor]).extend(EBNF::PEG::Rule),
17
+ EBNF::Rule.new(:factor, nil, [:seq, :_factor_1, :primary]).extend(EBNF::PEG::Rule),
18
+ EBNF::Rule.new(:_factor_1, nil, [:opt, :_factor_2]).extend(EBNF::PEG::Rule),
19
+ EBNF::Rule.new(:_factor_2, nil, [:seq, :integer, "*"]).extend(EBNF::PEG::Rule),
20
+ EBNF::Rule.new(:primary, nil, [:alt, :optional_sequence, :repeated_sequence, :special_sequence, :grouped_sequence, :meta_identifier, :terminal_string, :empty]).extend(EBNF::PEG::Rule),
21
+ EBNF::Rule.new(:optional_sequence, nil, [:seq, :start_option_symbol, :definitions_list, :end_option_symbol]).extend(EBNF::PEG::Rule),
22
+ EBNF::Rule.new(:repeated_sequence, nil, [:seq, :start_repeat_symbol, :definitions_list, :end_repeat_symbol]).extend(EBNF::PEG::Rule),
23
+ EBNF::Rule.new(:grouped_sequence, nil, [:seq, "(", :definitions_list, ")"]).extend(EBNF::PEG::Rule),
24
+ EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule),
25
+ EBNF::Rule.new(:terminal_string, nil, [:alt, :_terminal_string_1, :_terminal_string_2], kind: :terminal).extend(EBNF::PEG::Rule),
26
+ EBNF::Rule.new(:_terminal_string_1, nil, [:seq, "'", :_terminal_string_3, "'"]).extend(EBNF::PEG::Rule),
27
+ EBNF::Rule.new(:_terminal_string_3, nil, [:plus, :first_terminal_character]).extend(EBNF::PEG::Rule),
28
+ EBNF::Rule.new(:_terminal_string_2, nil, [:seq, "\"", :_terminal_string_4, "\""]).extend(EBNF::PEG::Rule),
29
+ EBNF::Rule.new(:_terminal_string_4, nil, [:plus, :second_terminal_character]).extend(EBNF::PEG::Rule),
30
+ EBNF::Rule.new(:meta_identifier, nil, [:seq, :letter, :_meta_identifier_1], kind: :terminal).extend(EBNF::PEG::Rule),
31
+ EBNF::Rule.new(:_meta_identifier_1, nil, [:star, :meta_identifier_character]).extend(EBNF::PEG::Rule),
32
+ EBNF::Rule.new(:integer, nil, [:plus, :decimal_digit], kind: :terminal).extend(EBNF::PEG::Rule),
33
+ EBNF::Rule.new(:special_sequence, nil, [:seq, "?", :_special_sequence_1, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
34
+ EBNF::Rule.new(:_special_sequence_1, nil, [:star, :special_sequence_character]).extend(EBNF::PEG::Rule),
35
+ EBNF::Rule.new(:comment, nil, [:seq, :start_comment_symbol, :_comment_1, :end_comment_symbol], kind: :terminal).extend(EBNF::PEG::Rule),
36
+ EBNF::Rule.new(:_comment_1, nil, [:star, :comment_symbol]).extend(EBNF::PEG::Rule),
37
+ EBNF::Rule.new(:comment_symbol, nil, [:alt, :comment, :commentless_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
38
+ EBNF::Rule.new(:commentless_symbol, nil, [:alt, :terminal_character, :meta_identifier, :integer, :terminal_string, :special_sequence], kind: :terminal).extend(EBNF::PEG::Rule),
39
+ EBNF::Rule.new(:letter, nil, [:range, "a-zA-Z"], kind: :terminal).extend(EBNF::PEG::Rule),
40
+ EBNF::Rule.new(:decimal_digit, nil, [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule),
41
+ EBNF::Rule.new(:meta_identifier_character, nil, [:alt, :letter, :decimal_digit, "_"], kind: :terminal).extend(EBNF::PEG::Rule),
42
+ EBNF::Rule.new(:first_terminal_character, nil, [:diff, :terminal_character, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
43
+ EBNF::Rule.new(:second_terminal_character, nil, [:diff, :terminal_character, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
44
+ EBNF::Rule.new(:special_sequence_character, nil, [:diff, :terminal_character, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
45
+ EBNF::Rule.new(:terminal_character, nil, [:alt, :letter, :decimal_digit, :concatenate_symbol, :defining_symbol, :definition_separator_symbol, :end_comment_symbol, :end_group_symbol, :end_option_symbol, :end_repeat_symbol, :except_symbol, :first_quote_symbol, :repetition_symbol, :second_quote_symbol, :special_sequence_symbol, :start_comment_symbol, :start_group_symbol, :start_option_symbol, :start_repeat_symbol, :terminator_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
46
+ EBNF::Rule.new(:other_character, nil, [:alt, :_other_character_1, "\\"], kind: :terminal).extend(EBNF::PEG::Rule),
47
+ EBNF::Rule.new(:_other_character_1, nil, [:range, ":+_%@&$<>^` ̃#x20#x23"], kind: :terminal).extend(EBNF::PEG::Rule),
48
+ EBNF::Rule.new(:gap_separator, nil, [:range, "#x9#xa#xb#xc#xd#x20"], kind: :terminal).extend(EBNF::PEG::Rule),
49
+ EBNF::Rule.new(:_pass, nil, [:alt, :__pass_1, :comment], kind: :pass).extend(EBNF::PEG::Rule),
50
+ EBNF::Rule.new(:__pass_1, nil, [:plus, :gap_separator]).extend(EBNF::PEG::Rule),
51
+ EBNF::Rule.new(:empty, nil, [:seq, ""], kind: :terminal).extend(EBNF::PEG::Rule),
52
+ EBNF::Rule.new(:concatenate_symbol, nil, [:seq, ","], kind: :terminal).extend(EBNF::PEG::Rule),
53
+ EBNF::Rule.new(:repetition_symbol, nil, [:seq, "*"], kind: :terminal).extend(EBNF::PEG::Rule),
54
+ EBNF::Rule.new(:except_symbol, nil, [:seq, "-"], kind: :terminal).extend(EBNF::PEG::Rule),
55
+ EBNF::Rule.new(:first_quote_symbol, nil, [:seq, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
56
+ EBNF::Rule.new(:second_quote_symbol, nil, [:seq, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
57
+ EBNF::Rule.new(:start_comment_symbol, nil, [:seq, "(*"], kind: :terminal).extend(EBNF::PEG::Rule),
58
+ EBNF::Rule.new(:end_comment_symbol, nil, [:seq, "*)"], kind: :terminal).extend(EBNF::PEG::Rule),
59
+ EBNF::Rule.new(:start_group_symbol, nil, [:seq, "("], kind: :terminal).extend(EBNF::PEG::Rule),
60
+ EBNF::Rule.new(:end_group_symbol, nil, [:seq, ")"], kind: :terminal).extend(EBNF::PEG::Rule),
61
+ EBNF::Rule.new(:special_sequence_symbol, nil, [:seq, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
62
+ EBNF::Rule.new(:defining_symbol, nil, [:alt, "=", ":"], kind: :terminal).extend(EBNF::PEG::Rule),
63
+ EBNF::Rule.new(:definition_separator_symbol, nil, [:alt, "|", "/", "!"], kind: :terminal).extend(EBNF::PEG::Rule),
64
+ EBNF::Rule.new(:terminator_symbol, nil, [:alt, ";", "."], kind: :terminal).extend(EBNF::PEG::Rule),
65
+ EBNF::Rule.new(:start_option_symbol, nil, [:seq, "["], kind: :terminal).extend(EBNF::PEG::Rule),
66
+ EBNF::Rule.new(:end_option_symbol, nil, [:seq, "]"], kind: :terminal).extend(EBNF::PEG::Rule),
67
+ EBNF::Rule.new(:start_repeat_symbol, nil, [:alt, "{", "(:"], kind: :terminal).extend(EBNF::PEG::Rule),
68
+ EBNF::Rule.new(:end_repeat_symbol, nil, [:alt, "}", ":)"], kind: :terminal).extend(EBNF::PEG::Rule),
69
+ EBNF::Rule.new(:gap_free_symbol, nil, [:alt, :_gap_free_symbol_1, :terminal_string], kind: :terminal).extend(EBNF::PEG::Rule),
70
+ EBNF::Rule.new(:_gap_free_symbol_1, nil, [:seq, :_gap_free_symbol_3, :terminal_character]).extend(EBNF::PEG::Rule),
71
+ EBNF::Rule.new(:_gap_free_symbol_3, nil, [:not, :_gap_free_symbol_2]).extend(EBNF::PEG::Rule),
72
+ EBNF::Rule.new(:_gap_free_symbol_2, nil, [:range, "'\""], kind: :terminal).extend(EBNF::PEG::Rule),
73
+ ]
74
+ end
75
+
data/lib/ebnf/ll1.rb CHANGED
@@ -1,4 +1,86 @@
1
1
  module EBNF
2
+ ##
3
+ # This module extends {EBNF::Base} to create metadata including _branch_, [First/Follow][], and other tables which is used by {EBNF::LL1::Parser} to recognize examples of the associated grammar.
4
+ #
5
+ ### Branch Table
6
+ #
7
+ # The Branch table is a hash mapping production rules to a hash relating terminals appearing in input to sequence of productions to follow when the corresponding input terminal is found. This allows either the `seq` primitive, where all terminals map to the same sequence of productions, or the `alt` primitive, where each terminal may map to a different production.
8
+ #
9
+ # BRANCH = {
10
+ # :alt => {
11
+ # "(" => [:seq, :_alt_1],
12
+ # :HEX => [:seq, :_alt_1],
13
+ # :O_RANGE => [:seq, :_alt_1],
14
+ # :RANGE => [:seq, :_alt_1],
15
+ # :STRING1 => [:seq, :_alt_1],
16
+ # :STRING2 => [:seq, :_alt_1],
17
+ # :SYMBOL => [:seq, :_alt_1],
18
+ # },
19
+ # ...
20
+ # :declaration => {
21
+ # "@pass" => [:pass],
22
+ # "@terminals" => ["@terminals"],
23
+ # },
24
+ # ...
25
+ # }
26
+ #
27
+ # In this case the `alt` rule is `seq ('|' seq)*` can happen when any of the specified tokens appears on the input stream. The all cause the same token to be passed to the `seq` rule and follow with `_alt_1`, which handles the `('|' seq)*` portion of the rule, after the first sequence is matched.
28
+ #
29
+ # The `declaration` rule is `@terminals' | pass` using the `alt` primitive determining the production to run based on the terminal appearing on the input stream. Eventually, a terminal production is found and the token is consumed.
30
+ #
31
+ ### First/Follow Table
32
+ #
33
+ # The [First/Follow][] table is a hash mapping production rules to the terminals that may proceed or follow the rule. For example:
34
+ #
35
+ # FIRST = {
36
+ # :alt => [
37
+ # :HEX,
38
+ # :SYMBOL,
39
+ # :RANGE,
40
+ # :O_RANGE,
41
+ # :STRING1,
42
+ # :STRING2,
43
+ # "("],
44
+ # ...
45
+ # }
46
+ #
47
+ ### Terminals Table
48
+ #
49
+ # This table is a simple list of the terminal productions found in the grammar. For example:
50
+ #
51
+ # TERMINALS = ["(", ")", "-",
52
+ # "@pass", "@terminals",
53
+ # :HEX, :LHS, :O_RANGE,:POSTFIX,
54
+ # :RANGE, :STRING1, :STRING2, :SYMBOL,"|"
55
+ # ].freeze
56
+ #
57
+ ### Cleanup Table
58
+ #
59
+ # This table identifies productions which used EBNF rules, which are transformed to BNF for actual parsing. This allows the parser, in some cases, to reproduce *star*, *plus*, and *opt* rule matches. For example:
60
+ #
61
+ # CLEANUP = {
62
+ # :_alt_1 => :star,
63
+ # :_alt_3 => :merge,
64
+ # :_diff_1 => :opt,
65
+ # :ebnf => :star,
66
+ # :_ebnf_2 => :merge,
67
+ # :_postfix_1 => :opt,
68
+ # :seq => :plus,
69
+ # :_seq_1 => :star,
70
+ # :_seq_2 => :merge,
71
+ # }.freeze
72
+ #
73
+ # In this case the `ebnf` rule was `(declaration | rule)*`. As BNF does not support a star operator, this is decomposed into a set of rules using `alt` and `seq` primitives:
74
+ #
75
+ # ebnf ::= _empty _ebnf_2
76
+ # _ebnf_1 ::= declaration | rule
77
+ # _ebnf_2 ::= _ebnf_1 ebnf
78
+ # _ebnf_3 ::= ebnf
79
+ #
80
+ # The `_empty` production matches an empty string, so allows for now value. `_ebnf_2` matches `declaration | rule` (using the `alt` primitive) followed by `ebnf`, creating a sequence of zero or more `declaration` or `alt` members.
81
+ #
82
+ # [First/Follow]: https://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
83
+
2
84
  module LL1
3
85
  autoload :Lexer, "ebnf/ll1/lexer"
4
86
  autoload :Parser, "ebnf/ll1/parser"
@@ -51,8 +133,40 @@ module EBNF
51
133
  ##
52
134
  # Create first/follow for each rule using techniques defined for LL(1) parsers.
53
135
  #
136
+ # This takes rules which have transformed into BNF and adds first/follow and otehr information to the rules to allow the generation of metadata tables used for driving a parser.
137
+ #
138
+ # Given an initial rule in EBNF:
139
+ #
140
+ # (rule enbf "1" (star declaration rule))
141
+ #
142
+ # The BNF transformation becomes:
143
+ #
144
+ # (rule ebnf "1" (alt _empty _ebnf_2))
145
+ # (rule _ebnf_1 "1.1" (alt declaration rule))
146
+ # (rule _ebnf_2 "1.2" (seq _ebnf_1 ebnf))
147
+ # (rule _ebnf_3 "1.3" (seq ebnf))
148
+ #
149
+ # After running this method, the rules are annotated with first/follow and cleanup rules:
150
+ #
151
+ # (rule ebnf "1"
152
+ # (start #t)
153
+ # (first "@pass" "@terminals" LHS _eps)
154
+ # (follow _eof)
155
+ # (cleanup star)
156
+ # (alt _empty _ebnf_2))
157
+ # (rule _ebnf_1 "1.1"
158
+ # (first "@pass" "@terminals" LHS)
159
+ # (follow "@pass" "@terminals" LHS _eof)
160
+ # (alt declaration rule))
161
+ # (rule _ebnf_2 "1.2"
162
+ # (first "@pass" "@terminals" LHS)
163
+ # (follow _eof)
164
+ # (cleanup merge)
165
+ # (seq _ebnf_1 ebnf))
166
+ # (rule _ebnf_3 "1.3" (first "@pass" "@terminals" LHS _eps) (follow _eof) (seq ebnf))
167
+ #
54
168
  # @return [EBNF] self
55
- # @see http://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
169
+ # @see https://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
56
170
  # @param [Array<Symbol>] starts
57
171
  # Set of symbols which are start rules
58
172
  def first_follow(*starts)
@@ -96,8 +210,9 @@ module EBNF
96
210
  firsts, follows = 0, 0
97
211
  # add Fi(wi) to Fi(Ai) for every rule Ai → wi
98
212
  #
99
- # For sequences, this is the first rule in the sequence.
100
- # For alts, this is every rule in the sequence
213
+ # * For sequences, this is the first rule in the sequence.
214
+ # * For alts, this is every rule in the sequence
215
+ # * Other rules don't matter, as they don't appear in strict BNF
101
216
  each(:rule) do |ai|
102
217
  # Fi(a w' ) = { a } for every terminal a
103
218
  ai.terminals(ast).each do |t|
@@ -276,6 +391,19 @@ module EBNF
276
391
  end
277
392
  end
278
393
 
394
+ ##
395
+ # Output Ruby parser files for LL(1) parsing
396
+ #
397
+ # @param [IO, StringIO] output
398
+ def to_ruby_ll1(output, **options)
399
+ self.outputTable(output, 'BRANCH', self.branch, 1)
400
+ self.outputTable(output, 'TERMINALS', self.terminals, 1)
401
+ self.outputTable(output, 'FIRST', self.first, 1)
402
+ self.outputTable(output, 'FOLLOW', self.follow, 1)
403
+ self.outputTable(output, 'CLEANUP', self.cleanup, 1)
404
+ self.outputTable(output, 'PASS', [self.pass], 1) if self.pass
405
+ end
406
+
279
407
  private
280
408
  def do_production(lhs)
281
409
  rule = find_rule(lhs)
@@ -29,7 +29,7 @@ module EBNF::LL1
29
29
  # warn error.inspect
30
30
  # end
31
31
  #
32
- # @see http://en.wikipedia.org/wiki/Lexical_analysis
32
+ # @see https://en.wikipedia.org/wiki/Lexical_analysis
33
33
  class Lexer
34
34
  include Enumerable
35
35
 
@@ -43,10 +43,10 @@ module EBNF::LL1
43
43
  "\\'" => '\'', # \u0027 (apostrophe-quote, single quote mark)
44
44
  '\\\\' => '\\' # \u005C (backslash)
45
45
  }.freeze
46
- ESCAPE_CHAR4 = /\\u(?:[0-9A-Fa-f]{4,4})/.freeze # \uXXXX
47
- ESCAPE_CHAR8 = /\\U(?:[0-9A-Fa-f]{8,8})/.freeze # \UXXXXXXXX
48
- ECHAR = /\\./ # More liberal unescaping
49
- UCHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}/.freeze
46
+ ESCAPE_CHAR4 = /\\u(?:[0-9A-Fa-f]{4,4})/u.freeze # \uXXXX
47
+ ESCAPE_CHAR8 = /\\U(?:[0-9A-Fa-f]{8,8})/u.freeze # \UXXXXXXXX
48
+ ECHAR = /\\./u.freeze # More liberal unescaping
49
+ UCHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}/n.freeze
50
50
 
51
51
  ##
52
52
  # @return [Regexp] defines whitespace, including comments, otherwise whitespace must be explicit in terminals
@@ -59,7 +59,7 @@ module EBNF::LL1
59
59
  #
60
60
  # @param [String] string
61
61
  # @return [String]
62
- # @see http://www.w3.org/TR/rdf-sparql-query/#codepointEscape
62
+ # @see https://www.w3.org/TR/rdf-sparql-query/#codepointEscape
63
63
  def self.unescape_codepoints(string)
64
64
  string = string.dup
65
65
  string.force_encoding(Encoding::ASCII_8BIT) if string.respond_to?(:force_encoding)
@@ -81,7 +81,7 @@ module EBNF::LL1
81
81
  #
82
82
  # @param [String] input
83
83
  # @return [String]
84
- # @see http://www.w3.org/TR/rdf-sparql-query/#grammarEscapes
84
+ # @see https://www.w3.org/TR/rdf-sparql-query/#grammarEscapes
85
85
  def self.unescape_string(input)
86
86
  input.gsub(ECHAR) { |escaped| ESCAPE_CHARS[escaped] || escaped[1..-1]}
87
87
  end
@@ -131,7 +131,6 @@ module EBNF::LL1
131
131
 
132
132
  raise Error, "Terminal patterns not defined" unless @terminals && @terminals.length > 0
133
133
 
134
- @lineno = 1
135
134
  @scanner = Scanner.new(input, **options)
136
135
  end
137
136
 
@@ -147,12 +146,6 @@ module EBNF::LL1
147
146
  # @return [String]
148
147
  attr_accessor :input
149
148
 
150
- ##
151
- # The current line number (zero-based).
152
- #
153
- # @return [Integer]
154
- attr_reader :lineno
155
-
156
149
  ##
157
150
  # Returns `true` if the input string is lexically valid.
158
151
  #
@@ -194,7 +187,7 @@ module EBNF::LL1
194
187
 
195
188
  @first ||= begin
196
189
  {} while !scanner.eos? && skip_whitespace
197
- return @scanner = nil if scanner.eos?
190
+ return nil if scanner.eos?
198
191
 
199
192
  token = match_token(*types)
200
193
 
@@ -233,7 +226,7 @@ module EBNF::LL1
233
226
  # @return [Token]
234
227
  def recover(*types)
235
228
  until scanner.eos? || tok = match_token(*types)
236
- if scanner.skip_until(@whitespace || /\s/m).nil? # Skip past current "token"
229
+ if scanner.skip_until(@whitespace || /\s+/m).nil? # Skip past current "token"
237
230
  # No whitespace at the end, must be and end of string
238
231
  scanner.terminate
239
232
  else
@@ -243,6 +236,14 @@ module EBNF::LL1
243
236
  scanner.unscan if tok
244
237
  first
245
238
  end
239
+
240
+ ##
241
+ # The current line number (one-based).
242
+ #
243
+ # @return [Integer]
244
+ def lineno
245
+ scanner.lineno
246
+ end
246
247
  protected
247
248
 
248
249
  # @return [StringScanner]
@@ -253,9 +254,7 @@ module EBNF::LL1
253
254
  def skip_whitespace
254
255
  # skip all white space, but keep track of the current line number
255
256
  while @whitespace && !scanner.eos?
256
- if matched = scanner.scan(@whitespace)
257
- @lineno += matched.count("\n")
258
- else
257
+ unless scanner.scan(@whitespace)
259
258
  return
260
259
  end
261
260
  end
@@ -281,7 +280,6 @@ module EBNF::LL1
281
280
  if matched = scanner.scan(term.regexp)
282
281
  #STDERR.puts " matched #{term.type.inspect}: #{matched.inspect}"
283
282
  tok = token(term.type, term.canonicalize(matched))
284
- @lineno += matched.count("\n")
285
283
  return tok
286
284
  end
287
285
  end
@@ -372,7 +370,7 @@ module EBNF::LL1
372
370
  # token.type #=> :LANGTAG
373
371
  # token.value #=> "en"
374
372
  #
375
- # @see http://en.wikipedia.org/wiki/Lexical_analysis#Token
373
+ # @see https://en.wikipedia.org/wiki/Lexical_analysis#Token
376
374
  class Token
377
375
  ##
378
376
  # The token's symbol type.
@@ -493,7 +491,7 @@ module EBNF::LL1
493
491
  # "invalid token '%' on line 10",
494
492
  # input: query, token: '%', lineno: 9)
495
493
  #
496
- # @see http://ruby-doc.org/core/classes/StandardError.html
494
+ # @see https://ruby-doc.org/core/classes/StandardError.html
497
495
  class Error < StandardError
498
496
  ##
499
497
  # The input string associated with the error.
@@ -3,12 +3,52 @@ require 'ebnf/ll1/lexer'
3
3
  module EBNF::LL1
4
4
  ##
5
5
  # A Generic LL1 parser using a lexer and branch tables defined using the SWAP tool chain (modified).
6
+ #
7
+ # # Creating terminal definitions and parser rules to parse generated grammars
8
+ #
9
+ # The parser is initialized to callbacks invoked on entry and exit
10
+ # to each `terminal` and `production`. A trivial parser loop can be described as follows:
11
+ #
12
+ # require 'ebnf/ll1/parser'
13
+ # require 'meta'
14
+ #
15
+ # class Parser
16
+ # include Meta
17
+ # include EBNF::LL1::Parser
18
+ #
19
+ # terminal(:SYMBOL, /([a-z]|[A-Z]|[0-9]|_)+/) do |prod, token, input|
20
+ # # Add data based on scanned token to input
21
+ # input[:symbol] = token.value
22
+ # end
23
+ #
24
+ # start_production(:rule) do |input, current, callback|
25
+ # # Process on start of production
26
+ # # Set state for entry into recursed rules through current
27
+ #
28
+ # # Callback to parser loop with callback
29
+ # end
30
+ #
31
+ # production(:rule) do |input, current, callback|
32
+ # # Process on end of production
33
+ # # return results in input, retrieve results from recursed rules in current
34
+ #
35
+ # # Callback to parser loop with callback
36
+ # end
37
+ #
38
+ # def initialize(input)
39
+ # parse(input, start_symbol,
40
+ # branch: BRANCH,
41
+ # first: FIRST,
42
+ # follow: FOLLOW,
43
+ # cleanup: CLEANUP
44
+ # ) do |context, *data|
45
+ # # Process calls from callback from productions
46
+ #
47
+ # rescue ArgumentError, RDF::LL1::Parser::Error => e
48
+ # progress("Parsing completed with errors:\n\t#{e.message}")
49
+ # raise RDF::ReaderError, e.message if validate?
50
+ # end
6
51
  module Parser
7
- ##
8
- # @private
9
- # level above which debug messages are supressed
10
- DEBUG_LEVEL = 10
11
-
12
52
  ##
13
53
  # @return [Integer] line number of current token
14
54
  attr_reader :lineno
@@ -186,7 +226,7 @@ module EBNF::LL1
186
226
  # def each_statement(&block)
187
227
  # @callback = block
188
228
  #
189
- # parse(START.to_sym) do |context, *data|
229
+ # parse(input, START.to_sym) do |context, *data|
190
230
  # case context
191
231
  # when :statement
192
232
  # yield *data
@@ -205,16 +245,13 @@ module EBNF::LL1
205
245
  # Lists valid terminals that can precede each production (for error recovery).
206
246
  # @option options [Hash{Symbol,String => Array<Symbol,String>}] :follow ({})
207
247
  # Lists valid terminals that can follow each production (for error recovery).
208
- # @option options [Boolean] :validate (false)
209
- # whether to validate the parsed statements and values. If not validating, the parser will attempt to recover from errors.
210
- # @option options [Boolean] :progress
211
- # Show progress of parser productions
212
- # @option options [Boolean] :debug
213
- # Detailed debug output
214
- # @option options [Boolean] :reset_on_start
215
- # Reset the parser state if the start token set with `prod` is found in a production. This reduces the production stack depth growth, which is appropriate for some grammars.
216
248
  # @option options[Integer] :high_water passed to lexer
249
+ # @option options [Logger] :logger for errors/progress/debug.
217
250
  # @option options[Integer] :low_water passed to lexer
251
+ # @option options [Boolean] :reset_on_start
252
+ # Reset the parser state if the start token set with `prod` is found in a production. This reduces the production stack depth growth, which is appropriate for some grammars.
253
+ # @option options [Boolean] :validate (false)
254
+ # whether to validate the parsed statements and values. If not validating, the parser will attempt to recover from errors.
218
255
  # @yield [context, *data]
219
256
  # Yields for to return data to parser
220
257
  # @yieldparam [:statement, :trace] context
@@ -225,13 +262,9 @@ module EBNF::LL1
225
262
  # @raise [Exception] Raises exceptions for parsing errors
226
263
  # or errors raised during processing callbacks. Internal
227
264
  # errors are raised using {Error}.
228
- # @see http://cs.adelaide.edu.au/~charles/lt/Lectures/07-ErrorRecovery.pdf
265
+ # @see https://cs.adelaide.edu.au/~charles/lt/Lectures/07-ErrorRecovery.pdf
229
266
  def parse(input = nil, start = nil, **options, &block)
230
267
  @options = options.dup
231
- @options[:debug] ||= case
232
- when @options[:progress] then 2
233
- when @options[:validate] then 1
234
- end
235
268
  @branch = options[:branch]
236
269
  @first = options[:first] ||= {}
237
270
  @follow = options[:follow] ||= {}
@@ -356,9 +389,9 @@ module EBNF::LL1
356
389
  end
357
390
 
358
391
  # Get the list of follows for this sequence, this production and the stacked productions.
359
- debug("recovery", "stack follows:", level: 4)
392
+ debug("recovery", "stack follows:")
360
393
  todo_stack.reverse.each do |todo|
361
- debug("recovery", level: 4) {" #{todo[:prod]}: #{@follow[todo[:prod]].inspect}"}
394
+ debug("recovery") {" #{todo[:prod]}: #{@follow[todo[:prod]].inspect}"}
362
395
  end
363
396
 
364
397
  # Find all follows to the top of the stack
@@ -466,14 +499,15 @@ module EBNF::LL1
466
499
  protected
467
500
 
468
501
  ##
469
- # Error information, used as level `0` debug messages.
502
+ # Error information, used as level `3` logger messages.
503
+ # Messages may be logged and are saved for reporting at end of parsing.
470
504
  #
471
505
  # @param [String] node Relevant location associated with message
472
506
  # @param [String] message Error string
473
- # @param [Hash] options
507
+ # @param [Hash{Symbol => Object}] options
474
508
  # @option options [URI, #to_s] :production
475
509
  # @option options [Token] :token
476
- # @see {#debug}
510
+ # @see #debug
477
511
  def error(node, message, **options)
478
512
  lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
479
513
  m = "ERROR "
@@ -483,83 +517,82 @@ module EBNF::LL1
483
517
  m += ", production = #{options[:production].inspect}" if options[:production]
484
518
  @error_log << m unless @recovering
485
519
  @recovering = true
486
- debug(node, m, level: 0, **options)
520
+ debug(node, m, level: options.fetch(:level, 3), **options)
487
521
  if options[:raise] || @options[:validate]
488
522
  raise Error.new(m, lineno: lineno, token: options[:token], production: options[:production])
489
523
  end
490
524
  end
491
525
 
492
526
  ##
493
- # Warning information, used as level `1` debug messages.
527
+ # Warning information, used as level `2` logger messages.
528
+ # Messages may be logged and are saved for reporting at end of parsing.
494
529
  #
495
530
  # @param [String] node Relevant location associated with message
496
531
  # @param [String] message Error string
497
532
  # @param [Hash] options
498
533
  # @option options [URI, #to_s] :production
499
534
  # @option options [Token] :token
500
- # @see {#debug}
535
+ # @see #debug
501
536
  def warn(node, message, **options)
537
+ lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
502
538
  m = "WARNING "
503
- m += "[line: #{@lineno}] " if @lineno
539
+ m += "[line: #{lineno}] " if lineno
504
540
  m += message
505
541
  m += " (found #{options[:token].inspect})" if options[:token]
506
542
  m += ", production = #{options[:production].inspect}" if options[:production]
507
543
  @error_log << m unless @recovering
508
- debug(node, m, level: 1, **options)
544
+ debug(node, m, level: 2, lineno: lineno, **options)
509
545
  end
510
546
 
511
547
  ##
512
- # Progress output when parsing. Passed as level `2` debug messages.
548
+ # Progress logged when parsing. Passed as level `1` logger messages.
513
549
  #
514
- # @overload progress(node, message, **options)
550
+ # The call is ignored, unless `@options[:logger]` is set.
551
+ #
552
+ # @overload progress(node, message, **options, &block)
515
553
  # @param [String] node Relevant location associated with message
516
554
  # @param [String] message ("")
517
555
  # @param [Hash] options
518
556
  # @option options [Integer] :depth
519
557
  # Recursion depth for indenting output
520
- # @see {#debug}
558
+ # @see #debug
521
559
  def progress(node, *args, &block)
522
- return unless @options[:progress] || @options[:debug]
560
+ return unless @options[:logger]
561
+ lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
523
562
  args << {} unless args.last.is_a?(Hash)
524
- args.last[:level] ||= 2
563
+ args.last[:level] ||= 1
564
+ args.last[:lineno] ||= lineno
525
565
  debug(node, *args, &block)
526
566
  end
527
567
 
528
568
  ##
529
- # Progress output when debugging.
569
+ # Debug logging.
530
570
  #
531
- # The call is ignored, unless `@options[:debug]` is set, in which
532
- # case it yields tracing information as indicated. Additionally,
533
- # if `@options[:debug]` is an Integer, the call is aborted if the
534
- # `:level` option is less than than `:level`.
571
+ # The call is ignored, unless `@options[:logger]` is set.
535
572
  #
536
573
  # @overload debug(node, message, **options)
537
574
  # @param [Array<String>] args Relevant location associated with message
538
575
  # @param [Hash] options
539
576
  # @option options [Integer] :depth
540
577
  # Recursion depth for indenting output
541
- # @option options [Integer] :level
542
- # Level assigned to message, by convention, level `0` is for
543
- # errors, level `1` is for warnings, level `2` is for parser
544
- # progress information, and anything higher is for various levels
545
- # of debug information.
546
- #
547
- # @yield trace, level, lineno, depth, args
548
- # @yieldparam [:trace] trace
549
- # @yieldparam [Integer] level
550
- # @yieldparam [Integer] lineno
551
- # @yieldparam [Integer] depth Recursive depth of productions
552
- # @yieldparam [Array<String>] args
553
- # @yieldreturn [String] added to message
554
- def debug(*args)
555
- return unless @options[:debug] && @parse_callback
578
+ # @yieldreturn [String] additional string appended to `message`.
579
+ def debug(*args, &block)
580
+ return unless @options[:logger]
556
581
  options = args.last.is_a?(Hash) ? args.pop : {}
557
- debug_level = options.fetch(:level, 3)
558
- return if @options[:debug].is_a?(Integer) && debug_level > @options[:debug]
559
-
582
+ lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
583
+ level = options.fetch(:level, 0)
560
584
  depth = options[:depth] || self.depth
561
- args << yield if block_given?
562
- @parse_callback.call(:trace, debug_level, @lineno, depth, *args)
585
+
586
+ if self.respond_to?(:log_debug)
587
+ level = [:debug, :info, :warn, :error, :fatal][level]
588
+ log_debug(*args, **options.merge(level: level, lineno: lineno, depth: depth), &block)
589
+ elsif @options[:logger].respond_to?(:add)
590
+ args << yield if block_given?
591
+ @options[:logger].add(level, "[#{lineno}]" + (" " * depth) + args.join(" "))
592
+ elsif @options[:logger].respond_to?(:<<)
593
+ args << yield if block_given?
594
+ @options[:logger] << "[#{lineno}]" + (" " * depth) + args.join(" ")
595
+ end
563
596
  end
564
597
 
565
598
  private
@@ -570,7 +603,7 @@ module EBNF::LL1
570
603
  if handler
571
604
  # Create a new production data element, potentially allowing handler
572
605
  # to customize before pushing on the @prod_data stack
573
- progress("#{prod}(:start):#{@prod_data.length}") {@prod_data.last}
606
+ debug("#{prod}(:start):#{@prod_data.length}") {@prod_data.last}
574
607
  data = {}
575
608
  begin
576
609
  self.class.eval_with_binding(self) {
@@ -584,12 +617,12 @@ module EBNF::LL1
584
617
  elsif [:merge, :star].include?(@cleanup[prod])
585
618
  # Save current data to merge later
586
619
  @prod_data << {}
587
- progress("#{prod}(:start}:#{@prod_data.length}:cleanup:#{@cleanup[prod]}") { get_token.inspect + (@recovering ? ' recovering' : '')}
620
+ debug("#{prod}(:start}:#{@prod_data.length}:cleanup:#{@cleanup[prod]}") { get_token.inspect + (@recovering ? ' recovering' : '')}
588
621
  else
589
622
  # Make sure we push as many was we pop, even if there is no
590
623
  # explicit start handler
591
624
  @prod_data << {} if self.class.production_handlers[prod]
592
- progress("#{prod}(:start:#{@prod_data.length})") { get_token.inspect + (@recovering ? ' recovering' : '')}
625
+ debug("#{prod}(:start:#{@prod_data.length})") { get_token.inspect + (@recovering ? ' recovering' : '')}
593
626
  end
594
627
  #puts "prod_data(s): " + @prod_data.inspect
595
628
  end
@@ -623,7 +656,7 @@ module EBNF::LL1
623
656
  else Array(input[k]) + Array(v)
624
657
  end
625
658
  end
626
- progress("#{prod}(:finish):#{@prod_data.length} cleanup:#{@cleanup[prod]}") {@prod_data.last}
659
+ debug("#{prod}(:finish):#{@prod_data.length} cleanup:#{@cleanup[prod]}") {@prod_data.last}
627
660
  else
628
661
  progress("#{prod}(:finish):#{@prod_data.length}") { "recovering" if @recovering }
629
662
  end
@@ -730,7 +763,7 @@ module EBNF::LL1
730
763
  # "invalid token '%' on line 10",
731
764
  # token: '%', lineno: 9, production: :turtleDoc)
732
765
  #
733
- # @see http://ruby-doc.org/core/classes/StandardError.html
766
+ # @see https://ruby-doc.org/core/classes/StandardError.html
734
767
  class Error < StandardError
735
768
  ##
736
769
  # The current production.