ebnf 1.1.3 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +221 -198
  3. data/UNLICENSE +1 -1
  4. data/VERSION +1 -1
  5. data/bin/ebnf +40 -21
  6. data/etc/abnf-core.ebnf +52 -0
  7. data/etc/abnf.abnf +121 -0
  8. data/etc/abnf.ebnf +124 -0
  9. data/etc/abnf.sxp +45 -0
  10. data/etc/doap.ttl +23 -15
  11. data/etc/ebnf.ebnf +21 -33
  12. data/etc/ebnf.html +171 -160
  13. data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
  14. data/etc/ebnf.ll1.sxp +182 -183
  15. data/etc/ebnf.peg.rb +90 -0
  16. data/etc/ebnf.peg.sxp +84 -0
  17. data/etc/ebnf.sxp +40 -41
  18. data/etc/iso-ebnf.ebnf +140 -0
  19. data/etc/iso-ebnf.isoebnf +138 -0
  20. data/etc/iso-ebnf.sxp +65 -0
  21. data/etc/sparql.ebnf +4 -4
  22. data/etc/sparql.html +1603 -1751
  23. data/etc/sparql.ll1.sxp +7372 -7372
  24. data/etc/sparql.peg.rb +532 -0
  25. data/etc/sparql.peg.sxp +597 -0
  26. data/etc/sparql.sxp +363 -362
  27. data/etc/turtle.ebnf +3 -3
  28. data/etc/turtle.html +465 -517
  29. data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
  30. data/etc/turtle.ll1.sxp +425 -425
  31. data/etc/turtle.peg.rb +182 -0
  32. data/etc/turtle.peg.sxp +199 -0
  33. data/etc/turtle.sxp +103 -101
  34. data/lib/ebnf.rb +7 -2
  35. data/lib/ebnf/abnf.rb +301 -0
  36. data/lib/ebnf/abnf/core.rb +23 -0
  37. data/lib/ebnf/abnf/meta.rb +111 -0
  38. data/lib/ebnf/base.rb +113 -69
  39. data/lib/ebnf/bnf.rb +1 -26
  40. data/lib/ebnf/ebnf/meta.rb +90 -0
  41. data/lib/ebnf/isoebnf.rb +229 -0
  42. data/lib/ebnf/isoebnf/meta.rb +75 -0
  43. data/lib/ebnf/ll1.rb +138 -6
  44. data/lib/ebnf/ll1/lexer.rb +37 -32
  45. data/lib/ebnf/ll1/parser.rb +113 -73
  46. data/lib/ebnf/ll1/scanner.rb +83 -51
  47. data/lib/ebnf/native.rb +320 -0
  48. data/lib/ebnf/parser.rb +285 -302
  49. data/lib/ebnf/peg.rb +39 -0
  50. data/lib/ebnf/peg/parser.rb +561 -0
  51. data/lib/ebnf/peg/rule.rb +250 -0
  52. data/lib/ebnf/rule.rb +443 -148
  53. data/lib/ebnf/terminals.rb +21 -0
  54. data/lib/ebnf/writer.rb +565 -83
  55. metadata +107 -29
  56. data/etc/sparql.rb +0 -45773
@@ -0,0 +1,75 @@
1
+ # This file is automatically generated by ebnf version 2.0.0
2
+ # Derived from etc/iso-ebnf.ebnf
3
+ module ISOEBNFMeta
4
+ RULES = [
5
+ EBNF::Rule.new(:syntax, nil, [:star, :syntax_rule]).extend(EBNF::PEG::Rule),
6
+ EBNF::Rule.new(:syntax_rule, nil, [:seq, :meta_identifier, :defining_symbol, :definitions_list, :terminator_symbol]).extend(EBNF::PEG::Rule),
7
+ EBNF::Rule.new(:definitions_list, nil, [:seq, :single_definition, :_definitions_list_1]).extend(EBNF::PEG::Rule),
8
+ EBNF::Rule.new(:_definitions_list_1, nil, [:star, :_definitions_list_2]).extend(EBNF::PEG::Rule),
9
+ EBNF::Rule.new(:_definitions_list_2, nil, [:seq, :definition_separator_symbol, :definitions_list]).extend(EBNF::PEG::Rule),
10
+ EBNF::Rule.new(:single_definition, nil, [:seq, :term, :_single_definition_1]).extend(EBNF::PEG::Rule),
11
+ EBNF::Rule.new(:_single_definition_1, nil, [:star, :_single_definition_2]).extend(EBNF::PEG::Rule),
12
+ EBNF::Rule.new(:_single_definition_2, nil, [:seq, ",", :term]).extend(EBNF::PEG::Rule),
13
+ EBNF::Rule.new(:term, nil, [:seq, :factor, :_term_1]).extend(EBNF::PEG::Rule),
14
+ EBNF::Rule.new(:_term_1, nil, [:opt, :_term_2]).extend(EBNF::PEG::Rule),
15
+ EBNF::Rule.new(:_term_2, nil, [:seq, "-", :exception]).extend(EBNF::PEG::Rule),
16
+ EBNF::Rule.new(:exception, nil, [:seq, :factor]).extend(EBNF::PEG::Rule),
17
+ EBNF::Rule.new(:factor, nil, [:seq, :_factor_1, :primary]).extend(EBNF::PEG::Rule),
18
+ EBNF::Rule.new(:_factor_1, nil, [:opt, :_factor_2]).extend(EBNF::PEG::Rule),
19
+ EBNF::Rule.new(:_factor_2, nil, [:seq, :integer, "*"]).extend(EBNF::PEG::Rule),
20
+ EBNF::Rule.new(:primary, nil, [:alt, :optional_sequence, :repeated_sequence, :special_sequence, :grouped_sequence, :meta_identifier, :terminal_string, :empty]).extend(EBNF::PEG::Rule),
21
+ EBNF::Rule.new(:optional_sequence, nil, [:seq, :start_option_symbol, :definitions_list, :end_option_symbol]).extend(EBNF::PEG::Rule),
22
+ EBNF::Rule.new(:repeated_sequence, nil, [:seq, :start_repeat_symbol, :definitions_list, :end_repeat_symbol]).extend(EBNF::PEG::Rule),
23
+ EBNF::Rule.new(:grouped_sequence, nil, [:seq, "(", :definitions_list, ")"]).extend(EBNF::PEG::Rule),
24
+ EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule),
25
+ EBNF::Rule.new(:terminal_string, nil, [:alt, :_terminal_string_1, :_terminal_string_2], kind: :terminal).extend(EBNF::PEG::Rule),
26
+ EBNF::Rule.new(:_terminal_string_1, nil, [:seq, "'", :_terminal_string_3, "'"]).extend(EBNF::PEG::Rule),
27
+ EBNF::Rule.new(:_terminal_string_3, nil, [:plus, :first_terminal_character]).extend(EBNF::PEG::Rule),
28
+ EBNF::Rule.new(:_terminal_string_2, nil, [:seq, "\"", :_terminal_string_4, "\""]).extend(EBNF::PEG::Rule),
29
+ EBNF::Rule.new(:_terminal_string_4, nil, [:plus, :second_terminal_character]).extend(EBNF::PEG::Rule),
30
+ EBNF::Rule.new(:meta_identifier, nil, [:seq, :letter, :_meta_identifier_1], kind: :terminal).extend(EBNF::PEG::Rule),
31
+ EBNF::Rule.new(:_meta_identifier_1, nil, [:star, :meta_identifier_character]).extend(EBNF::PEG::Rule),
32
+ EBNF::Rule.new(:integer, nil, [:plus, :decimal_digit], kind: :terminal).extend(EBNF::PEG::Rule),
33
+ EBNF::Rule.new(:special_sequence, nil, [:seq, "?", :_special_sequence_1, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
34
+ EBNF::Rule.new(:_special_sequence_1, nil, [:star, :special_sequence_character]).extend(EBNF::PEG::Rule),
35
+ EBNF::Rule.new(:comment, nil, [:seq, :start_comment_symbol, :_comment_1, :end_comment_symbol], kind: :terminal).extend(EBNF::PEG::Rule),
36
+ EBNF::Rule.new(:_comment_1, nil, [:star, :comment_symbol]).extend(EBNF::PEG::Rule),
37
+ EBNF::Rule.new(:comment_symbol, nil, [:alt, :comment, :commentless_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
38
+ EBNF::Rule.new(:commentless_symbol, nil, [:alt, :terminal_character, :meta_identifier, :integer, :terminal_string, :special_sequence], kind: :terminal).extend(EBNF::PEG::Rule),
39
+ EBNF::Rule.new(:letter, nil, [:range, "a-zA-Z"], kind: :terminal).extend(EBNF::PEG::Rule),
40
+ EBNF::Rule.new(:decimal_digit, nil, [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule),
41
+ EBNF::Rule.new(:meta_identifier_character, nil, [:alt, :letter, :decimal_digit, "_"], kind: :terminal).extend(EBNF::PEG::Rule),
42
+ EBNF::Rule.new(:first_terminal_character, nil, [:diff, :terminal_character, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
43
+ EBNF::Rule.new(:second_terminal_character, nil, [:diff, :terminal_character, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
44
+ EBNF::Rule.new(:special_sequence_character, nil, [:diff, :terminal_character, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
45
+ EBNF::Rule.new(:terminal_character, nil, [:alt, :letter, :decimal_digit, :concatenate_symbol, :defining_symbol, :definition_separator_symbol, :end_comment_symbol, :end_group_symbol, :end_option_symbol, :end_repeat_symbol, :except_symbol, :first_quote_symbol, :repetition_symbol, :second_quote_symbol, :special_sequence_symbol, :start_comment_symbol, :start_group_symbol, :start_option_symbol, :start_repeat_symbol, :terminator_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
46
+ EBNF::Rule.new(:other_character, nil, [:alt, :_other_character_1, "\\"], kind: :terminal).extend(EBNF::PEG::Rule),
47
+ EBNF::Rule.new(:_other_character_1, nil, [:range, ":+_%@&$<>^` ̃#x20#x23"], kind: :terminal).extend(EBNF::PEG::Rule),
48
+ EBNF::Rule.new(:gap_separator, nil, [:range, "#x9#xa#xb#xc#xd#x20"], kind: :terminal).extend(EBNF::PEG::Rule),
49
+ EBNF::Rule.new(:_pass, nil, [:alt, :__pass_1, :comment], kind: :pass).extend(EBNF::PEG::Rule),
50
+ EBNF::Rule.new(:__pass_1, nil, [:plus, :gap_separator]).extend(EBNF::PEG::Rule),
51
+ EBNF::Rule.new(:empty, nil, [:seq, ""], kind: :terminal).extend(EBNF::PEG::Rule),
52
+ EBNF::Rule.new(:concatenate_symbol, nil, [:seq, ","], kind: :terminal).extend(EBNF::PEG::Rule),
53
+ EBNF::Rule.new(:repetition_symbol, nil, [:seq, "*"], kind: :terminal).extend(EBNF::PEG::Rule),
54
+ EBNF::Rule.new(:except_symbol, nil, [:seq, "-"], kind: :terminal).extend(EBNF::PEG::Rule),
55
+ EBNF::Rule.new(:first_quote_symbol, nil, [:seq, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
56
+ EBNF::Rule.new(:second_quote_symbol, nil, [:seq, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
57
+ EBNF::Rule.new(:start_comment_symbol, nil, [:seq, "(*"], kind: :terminal).extend(EBNF::PEG::Rule),
58
+ EBNF::Rule.new(:end_comment_symbol, nil, [:seq, "*)"], kind: :terminal).extend(EBNF::PEG::Rule),
59
+ EBNF::Rule.new(:start_group_symbol, nil, [:seq, "("], kind: :terminal).extend(EBNF::PEG::Rule),
60
+ EBNF::Rule.new(:end_group_symbol, nil, [:seq, ")"], kind: :terminal).extend(EBNF::PEG::Rule),
61
+ EBNF::Rule.new(:special_sequence_symbol, nil, [:seq, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
62
+ EBNF::Rule.new(:defining_symbol, nil, [:alt, "=", ":"], kind: :terminal).extend(EBNF::PEG::Rule),
63
+ EBNF::Rule.new(:definition_separator_symbol, nil, [:alt, "|", "/", "!"], kind: :terminal).extend(EBNF::PEG::Rule),
64
+ EBNF::Rule.new(:terminator_symbol, nil, [:alt, ";", "."], kind: :terminal).extend(EBNF::PEG::Rule),
65
+ EBNF::Rule.new(:start_option_symbol, nil, [:seq, "["], kind: :terminal).extend(EBNF::PEG::Rule),
66
+ EBNF::Rule.new(:end_option_symbol, nil, [:seq, "]"], kind: :terminal).extend(EBNF::PEG::Rule),
67
+ EBNF::Rule.new(:start_repeat_symbol, nil, [:alt, "{", "(:"], kind: :terminal).extend(EBNF::PEG::Rule),
68
+ EBNF::Rule.new(:end_repeat_symbol, nil, [:alt, "}", ":)"], kind: :terminal).extend(EBNF::PEG::Rule),
69
+ EBNF::Rule.new(:gap_free_symbol, nil, [:alt, :_gap_free_symbol_1, :terminal_string], kind: :terminal).extend(EBNF::PEG::Rule),
70
+ EBNF::Rule.new(:_gap_free_symbol_1, nil, [:seq, :_gap_free_symbol_3, :terminal_character]).extend(EBNF::PEG::Rule),
71
+ EBNF::Rule.new(:_gap_free_symbol_3, nil, [:not, :_gap_free_symbol_2]).extend(EBNF::PEG::Rule),
72
+ EBNF::Rule.new(:_gap_free_symbol_2, nil, [:range, "'\""], kind: :terminal).extend(EBNF::PEG::Rule),
73
+ ]
74
+ end
75
+
@@ -1,4 +1,86 @@
1
1
  module EBNF
2
+ ##
3
+ # This module extends {EBNF::Base} to create metadata including _branch_, [First/Follow][], and other tables which is used by {EBNF::LL1::Parser} to recognize examples of the associated grammar.
4
+ #
5
+ ### Branch Table
6
+ #
7
+ # The Branch table is a hash mapping production rules to a hash relating terminals appearing in input to sequence of productions to follow when the corresponding input terminal is found. This allows either the `seq` primitive, where all terminals map to the same sequence of productions, or the `alt` primitive, where each terminal may map to a different production.
8
+ #
9
+ # BRANCH = {
10
+ # :alt => {
11
+ # "(" => [:seq, :_alt_1],
12
+ # :HEX => [:seq, :_alt_1],
13
+ # :O_RANGE => [:seq, :_alt_1],
14
+ # :RANGE => [:seq, :_alt_1],
15
+ # :STRING1 => [:seq, :_alt_1],
16
+ # :STRING2 => [:seq, :_alt_1],
17
+ # :SYMBOL => [:seq, :_alt_1],
18
+ # },
19
+ # ...
20
+ # :declaration => {
21
+ # "@pass" => [:pass],
22
+ # "@terminals" => ["@terminals"],
23
+ # },
24
+ # ...
25
+ # }
26
+ #
27
+ # In this case the `alt` rule is `seq ('|' seq)*` can happen when any of the specified tokens appears on the input stream. The all cause the same token to be passed to the `seq` rule and follow with `_alt_1`, which handles the `('|' seq)*` portion of the rule, after the first sequence is matched.
28
+ #
29
+ # The `declaration` rule is `@terminals' | pass` using the `alt` primitive determining the production to run based on the terminal appearing on the input stream. Eventually, a terminal production is found and the token is consumed.
30
+ #
31
+ ### First/Follow Table
32
+ #
33
+ # The [First/Follow][] table is a hash mapping production rules to the terminals that may proceed or follow the rule. For example:
34
+ #
35
+ # FIRST = {
36
+ # :alt => [
37
+ # :HEX,
38
+ # :SYMBOL,
39
+ # :RANGE,
40
+ # :O_RANGE,
41
+ # :STRING1,
42
+ # :STRING2,
43
+ # "("],
44
+ # ...
45
+ # }
46
+ #
47
+ ### Terminals Table
48
+ #
49
+ # This table is a simple list of the terminal productions found in the grammar. For example:
50
+ #
51
+ # TERMINALS = ["(", ")", "-",
52
+ # "@pass", "@terminals",
53
+ # :HEX, :LHS, :O_RANGE,:POSTFIX,
54
+ # :RANGE, :STRING1, :STRING2, :SYMBOL,"|"
55
+ # ].freeze
56
+ #
57
+ ### Cleanup Table
58
+ #
59
+ # This table identifies productions which used EBNF rules, which are transformed to BNF for actual parsing. This allows the parser, in some cases, to reproduce *star*, *plus*, and *opt* rule matches. For example:
60
+ #
61
+ # CLEANUP = {
62
+ # :_alt_1 => :star,
63
+ # :_alt_3 => :merge,
64
+ # :_diff_1 => :opt,
65
+ # :ebnf => :star,
66
+ # :_ebnf_2 => :merge,
67
+ # :_postfix_1 => :opt,
68
+ # :seq => :plus,
69
+ # :_seq_1 => :star,
70
+ # :_seq_2 => :merge,
71
+ # }.freeze
72
+ #
73
+ # In this case the `ebnf` rule was `(declaration | rule)*`. As BNF does not support a star operator, this is decomposed into a set of rules using `alt` and `seq` primitives:
74
+ #
75
+ # ebnf ::= _empty _ebnf_2
76
+ # _ebnf_1 ::= declaration | rule
77
+ # _ebnf_2 ::= _ebnf_1 ebnf
78
+ # _ebnf_3 ::= ebnf
79
+ #
80
+ # The `_empty` production matches an empty string, so allows for now value. `_ebnf_2` matches `declaration | rule` (using the `alt` primitive) followed by `ebnf`, creating a sequence of zero or more `declaration` or `alt` members.
81
+ #
82
+ # [First/Follow]: https://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
83
+
2
84
  module LL1
3
85
  autoload :Lexer, "ebnf/ll1/lexer"
4
86
  autoload :Parser, "ebnf/ll1/parser"
@@ -51,8 +133,40 @@ module EBNF
51
133
  ##
52
134
  # Create first/follow for each rule using techniques defined for LL(1) parsers.
53
135
  #
136
+ # This takes rules which have transformed into BNF and adds first/follow and otehr information to the rules to allow the generation of metadata tables used for driving a parser.
137
+ #
138
+ # Given an initial rule in EBNF:
139
+ #
140
+ # (rule enbf "1" (star declaration rule))
141
+ #
142
+ # The BNF transformation becomes:
143
+ #
144
+ # (rule ebnf "1" (alt _empty _ebnf_2))
145
+ # (rule _ebnf_1 "1.1" (alt declaration rule))
146
+ # (rule _ebnf_2 "1.2" (seq _ebnf_1 ebnf))
147
+ # (rule _ebnf_3 "1.3" (seq ebnf))
148
+ #
149
+ # After running this method, the rules are annotated with first/follow and cleanup rules:
150
+ #
151
+ # (rule ebnf "1"
152
+ # (start #t)
153
+ # (first "@pass" "@terminals" LHS _eps)
154
+ # (follow _eof)
155
+ # (cleanup star)
156
+ # (alt _empty _ebnf_2))
157
+ # (rule _ebnf_1 "1.1"
158
+ # (first "@pass" "@terminals" LHS)
159
+ # (follow "@pass" "@terminals" LHS _eof)
160
+ # (alt declaration rule))
161
+ # (rule _ebnf_2 "1.2"
162
+ # (first "@pass" "@terminals" LHS)
163
+ # (follow _eof)
164
+ # (cleanup merge)
165
+ # (seq _ebnf_1 ebnf))
166
+ # (rule _ebnf_3 "1.3" (first "@pass" "@terminals" LHS _eps) (follow _eof) (seq ebnf))
167
+ #
54
168
  # @return [EBNF] self
55
- # @see http://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
169
+ # @see https://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
56
170
  # @param [Array<Symbol>] starts
57
171
  # Set of symbols which are start rules
58
172
  def first_follow(*starts)
@@ -96,8 +210,9 @@ module EBNF
96
210
  firsts, follows = 0, 0
97
211
  # add Fi(wi) to Fi(Ai) for every rule Ai → wi
98
212
  #
99
- # For sequences, this is the first rule in the sequence.
100
- # For alts, this is every rule in the sequence
213
+ # * For sequences, this is the first rule in the sequence.
214
+ # * For alts, this is every rule in the sequence
215
+ # * Other rules don't matter, as they don't appear in strict BNF
101
216
  each(:rule) do |ai|
102
217
  # Fi(a w' ) = { a } for every terminal a
103
218
  ai.terminals(ast).each do |t|
@@ -168,6 +283,10 @@ module EBNF
168
283
  progress("first_follow") {"(#{ittr}) firsts #{firsts}, follows #{follows}"}
169
284
  ittr += 1
170
285
  end while (firsts + follows) > 0
286
+
287
+ debug("Fi.2-post: non-terminals without first") do
288
+ ast.reject(&:terminal?).reject(&:first).map(&:sym)
289
+ end if ast.reject(&:terminal?).any? {|r| r.first.nil?}
171
290
  end
172
291
  end
173
292
 
@@ -272,6 +391,19 @@ module EBNF
272
391
  end
273
392
  end
274
393
 
394
+ ##
395
+ # Output Ruby parser files for LL(1) parsing
396
+ #
397
+ # @param [IO, StringIO] output
398
+ def to_ruby_ll1(output, **options)
399
+ self.outputTable(output, 'BRANCH', self.branch, 1)
400
+ self.outputTable(output, 'TERMINALS', self.terminals, 1)
401
+ self.outputTable(output, 'FIRST', self.first, 1)
402
+ self.outputTable(output, 'FOLLOW', self.follow, 1)
403
+ self.outputTable(output, 'CLEANUP', self.cleanup, 1)
404
+ self.outputTable(output, 'PASS', [self.pass], 1) if self.pass
405
+ end
406
+
275
407
  private
276
408
  def do_production(lhs)
277
409
  rule = find_rule(lhs)
@@ -287,16 +419,16 @@ module EBNF
287
419
 
288
420
  if rule.expr.first == :matches
289
421
  debug("prod") {"Rule is regexp: #{rule}"}
290
-
291
- error("No record of what token #{lhs} can start with") unless rule.first
292
422
  return
293
423
  end
294
424
 
425
+ error("No record of what token #{lhs.inspect} can start with") unless rule.first
426
+
295
427
  if rule.alt?
296
428
  # A First/Follow conflict appears when _eps is in the first
297
429
  # of one rule and there is a token in the first and
298
430
  # follow of the same rule
299
- if rule.first.include?(:_eps) && !(overlap = ((rule.first & (rule.follow || [])) - [:eps])).empty?
431
+ if Array(rule.first).include?(:_eps) && !(overlap = ((Array(rule.first) & (rule.follow || [])) - [:eps])).empty?
300
432
  error("First/Follow Conflict: #{overlap.first.inspect} is both first and follow of #{rule.sym}")
301
433
  end
302
434
 
@@ -29,7 +29,7 @@ module EBNF::LL1
29
29
  # warn error.inspect
30
30
  # end
31
31
  #
32
- # @see http://en.wikipedia.org/wiki/Lexical_analysis
32
+ # @see https://en.wikipedia.org/wiki/Lexical_analysis
33
33
  class Lexer
34
34
  include Enumerable
35
35
 
@@ -43,10 +43,10 @@ module EBNF::LL1
43
43
  "\\'" => '\'', # \u0027 (apostrophe-quote, single quote mark)
44
44
  '\\\\' => '\\' # \u005C (backslash)
45
45
  }.freeze
46
- ESCAPE_CHAR4 = /\\u(?:[0-9A-Fa-f]{4,4})/.freeze # \uXXXX
47
- ESCAPE_CHAR8 = /\\U(?:[0-9A-Fa-f]{8,8})/.freeze # \UXXXXXXXX
48
- ECHAR = /\\./ # More liberal unescaping
49
- UCHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}/.freeze
46
+ ESCAPE_CHAR4 = /\\u(?:[0-9A-Fa-f]{4,4})/u.freeze # \uXXXX
47
+ ESCAPE_CHAR8 = /\\U(?:[0-9A-Fa-f]{8,8})/u.freeze # \UXXXXXXXX
48
+ ECHAR = /\\./u.freeze # More liberal unescaping
49
+ UCHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}/n.freeze
50
50
 
51
51
  ##
52
52
  # @return [Regexp] defines whitespace, including comments, otherwise whitespace must be explicit in terminals
@@ -59,7 +59,7 @@ module EBNF::LL1
59
59
  #
60
60
  # @param [String] string
61
61
  # @return [String]
62
- # @see http://www.w3.org/TR/rdf-sparql-query/#codepointEscape
62
+ # @see https://www.w3.org/TR/rdf-sparql-query/#codepointEscape
63
63
  def self.unescape_codepoints(string)
64
64
  string = string.dup
65
65
  string.force_encoding(Encoding::ASCII_8BIT) if string.respond_to?(:force_encoding)
@@ -81,7 +81,7 @@ module EBNF::LL1
81
81
  #
82
82
  # @param [String] input
83
83
  # @return [String]
84
- # @see http://www.w3.org/TR/rdf-sparql-query/#grammarEscapes
84
+ # @see https://www.w3.org/TR/rdf-sparql-query/#grammarEscapes
85
85
  def self.unescape_string(input)
86
86
  input.gsub(ECHAR) { |escaped| ESCAPE_CHARS[escaped] || escaped[1..-1]}
87
87
  end
@@ -98,8 +98,8 @@ module EBNF::LL1
98
98
  # @yieldparam [Lexer] lexer
99
99
  # @return [Lexer]
100
100
  # @raise [Lexer::Error] on invalid input
101
- def self.tokenize(input, terminals, options = {}, &block)
102
- lexer = self.new(input, terminals, options)
101
+ def self.tokenize(input, terminals, **options, &block)
102
+ lexer = self.new(input, terminals, **options)
103
103
  block_given? ? block.call(lexer) : lexer
104
104
  end
105
105
 
@@ -115,17 +115,23 @@ module EBNF::LL1
115
115
  # Whitespace between tokens, including comments
116
116
  # @option options[Integer] :high_water passed to scanner
117
117
  # @option options[Integer] :low_water passed to scanner
118
- def initialize(input = nil, terminals = nil, options = {})
118
+ def initialize(input = nil, terminals = nil, **options)
119
119
  @options = options.dup
120
120
  @whitespace = @options[:whitespace]
121
121
  @terminals = terminals.map do |term|
122
- term.is_a?(Array) ? Terminal.new(*term) : term
122
+ if term.is_a?(Array) && term.length ==3
123
+ # Last element is options
124
+ Terminal.new(term[0], term[1], **term[2])
125
+ elsif term.is_a?(Array)
126
+ Terminal.new(*term)
127
+ else
128
+ term
129
+ end
123
130
  end
124
131
 
125
132
  raise Error, "Terminal patterns not defined" unless @terminals && @terminals.length > 0
126
133
 
127
- @lineno = 1
128
- @scanner = Scanner.new(input, options)
134
+ @scanner = Scanner.new(input, **options)
129
135
  end
130
136
 
131
137
  ##
@@ -140,12 +146,6 @@ module EBNF::LL1
140
146
  # @return [String]
141
147
  attr_accessor :input
142
148
 
143
- ##
144
- # The current line number (zero-based).
145
- #
146
- # @return [Integer]
147
- attr_reader :lineno
148
-
149
149
  ##
150
150
  # Returns `true` if the input string is lexically valid.
151
151
  #
@@ -187,7 +187,7 @@ module EBNF::LL1
187
187
 
188
188
  @first ||= begin
189
189
  {} while !scanner.eos? && skip_whitespace
190
- return @scanner = nil if scanner.eos?
190
+ return nil if scanner.eos?
191
191
 
192
192
  token = match_token(*types)
193
193
 
@@ -226,7 +226,7 @@ module EBNF::LL1
226
226
  # @return [Token]
227
227
  def recover(*types)
228
228
  until scanner.eos? || tok = match_token(*types)
229
- if scanner.skip_until(@whitespace || /\s/m).nil? # Skip past current "token"
229
+ if scanner.skip_until(@whitespace || /\s+/m).nil? # Skip past current "token"
230
230
  # No whitespace at the end, must be and end of string
231
231
  scanner.terminate
232
232
  else
@@ -236,6 +236,14 @@ module EBNF::LL1
236
236
  scanner.unscan if tok
237
237
  first
238
238
  end
239
+
240
+ ##
241
+ # The current line number (one-based).
242
+ #
243
+ # @return [Integer]
244
+ def lineno
245
+ scanner.lineno
246
+ end
239
247
  protected
240
248
 
241
249
  # @return [StringScanner]
@@ -246,9 +254,7 @@ module EBNF::LL1
246
254
  def skip_whitespace
247
255
  # skip all white space, but keep track of the current line number
248
256
  while @whitespace && !scanner.eos?
249
- if matched = scanner.scan(@whitespace)
250
- @lineno += matched.count("\n")
251
- else
257
+ unless scanner.scan(@whitespace)
252
258
  return
253
259
  end
254
260
  end
@@ -274,7 +280,6 @@ module EBNF::LL1
274
280
  if matched = scanner.scan(term.regexp)
275
281
  #STDERR.puts " matched #{term.type.inspect}: #{matched.inspect}"
276
282
  tok = token(term.type, term.canonicalize(matched))
277
- @lineno += matched.count("\n")
278
283
  return tok
279
284
  end
280
285
  end
@@ -300,7 +305,7 @@ module EBNF::LL1
300
305
  # Cause strings and codepoints to be unescaped.
301
306
  # @option options [Regexp] :partial_regexp
302
307
  # A regular expression matching the beginning of this terminal; useful for terminals that match things longer than the scanner low water mark.
303
- def initialize(type, regexp, options = {})
308
+ def initialize(type, regexp, **options)
304
309
  @type, @regexp, @options = type, regexp, options
305
310
  @partial_regexp = options[:partial_regexp]
306
311
  @map = options.fetch(:map, {})
@@ -353,8 +358,8 @@ module EBNF::LL1
353
358
  # Scanner instance with access to matched groups
354
359
  # @param [Hash{Symbol => Object}] options
355
360
  # @return [Token]
356
- def token(type, value, options = {})
357
- Token.new(type, value, options.merge(lineno: lineno))
361
+ def token(type, value, **options)
362
+ Token.new(type, value, lineno: lineno, **options)
358
363
  end
359
364
 
360
365
  ##
@@ -365,7 +370,7 @@ module EBNF::LL1
365
370
  # token.type #=> :LANGTAG
366
371
  # token.value #=> "en"
367
372
  #
368
- # @see http://en.wikipedia.org/wiki/Lexical_analysis#Token
373
+ # @see https://en.wikipedia.org/wiki/Lexical_analysis#Token
369
374
  class Token
370
375
  ##
371
376
  # The token's symbol type.
@@ -398,7 +403,7 @@ module EBNF::LL1
398
403
  # @param [String] value
399
404
  # @param [Hash{Symbol => Object}] options
400
405
  # @option options [Integer] :lineno (nil)
401
- def initialize(type, value, options = {})
406
+ def initialize(type, value, **options)
402
407
  @type = type.to_s.to_sym if type
403
408
  @value = value.to_s
404
409
  @options = options.dup
@@ -486,7 +491,7 @@ module EBNF::LL1
486
491
  # "invalid token '%' on line 10",
487
492
  # input: query, token: '%', lineno: 9)
488
493
  #
489
- # @see http://ruby-doc.org/core/classes/StandardError.html
494
+ # @see https://ruby-doc.org/core/classes/StandardError.html
490
495
  class Error < StandardError
491
496
  ##
492
497
  # The input string associated with the error.
@@ -514,7 +519,7 @@ module EBNF::LL1
514
519
  # @option options [String] :input (nil)
515
520
  # @option options [String] :token (nil)
516
521
  # @option options [Integer] :lineno (nil)
517
- def initialize(message, options = {})
522
+ def initialize(message, **options)
518
523
  @input = options[:input]
519
524
  @token = options[:token]
520
525
  @lineno = options[:lineno]