ebnf 1.2.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +160 -185
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/bin/ebnf +6 -3
- data/etc/doap.ttl +13 -12
- data/etc/ebnf.ebnf +13 -19
- data/etc/ebnf.html +205 -239
- data/etc/{ebnf.rb → ebnf.ll1.rb} +3 -4
- data/etc/ebnf.ll1.sxp +179 -183
- data/etc/ebnf.peg.rb +98 -0
- data/etc/ebnf.peg.sxp +93 -0
- data/etc/ebnf.sxp +37 -41
- data/etc/sparql.html +1603 -1751
- data/etc/sparql.ll1.sxp +7372 -7372
- data/etc/sparql.peg.rb +532 -0
- data/etc/sparql.peg.sxp +597 -0
- data/etc/sparql.sxp +362 -362
- data/etc/turtle.html +465 -517
- data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
- data/etc/turtle.ll1.sxp +425 -425
- data/etc/turtle.peg.rb +182 -0
- data/etc/turtle.peg.sxp +199 -0
- data/etc/turtle.sxp +101 -101
- data/lib/ebnf.rb +3 -1
- data/lib/ebnf/base.rb +30 -29
- data/lib/ebnf/bnf.rb +1 -26
- data/lib/ebnf/ll1.rb +132 -1
- data/lib/ebnf/ll1/lexer.rb +20 -22
- data/lib/ebnf/ll1/parser.rb +86 -61
- data/lib/ebnf/ll1/scanner.rb +83 -50
- data/lib/ebnf/peg.rb +39 -0
- data/lib/ebnf/peg/parser.rb +535 -0
- data/lib/ebnf/peg/rule.rb +222 -0
- data/lib/ebnf/rule.rb +118 -55
- data/lib/ebnf/terminals.rb +18 -0
- data/lib/ebnf/writer.rb +3 -2
- metadata +29 -6
- data/etc/sparql.rb +0 -45773
data/lib/ebnf/ll1/parser.rb
CHANGED
@@ -3,12 +3,52 @@ require 'ebnf/ll1/lexer'
|
|
3
3
|
module EBNF::LL1
|
4
4
|
##
|
5
5
|
# A Generic LL1 parser using a lexer and branch tables defined using the SWAP tool chain (modified).
|
6
|
+
#
|
7
|
+
# # Creating terminal definitions and parser rules to parse generated grammars
|
8
|
+
#
|
9
|
+
# The parser is initialized to callbacks invoked on entry and exit
|
10
|
+
# to each `terminal` and `production`. A trivial parser loop can be described as follows:
|
11
|
+
#
|
12
|
+
# require 'ebnf/ll1/parser'
|
13
|
+
# require 'meta'
|
14
|
+
#
|
15
|
+
# class Parser
|
16
|
+
# include Meta
|
17
|
+
# include EBNF::LL1::Parser
|
18
|
+
#
|
19
|
+
# terminal(:SYMBOL, /([a-z]|[A-Z]|[0-9]|_)+/) do |prod, token, input|
|
20
|
+
# # Add data based on scanned token to input
|
21
|
+
# input[:symbol] = token.value
|
22
|
+
# end
|
23
|
+
#
|
24
|
+
# start_production(:rule) do |input, current, callback|
|
25
|
+
# # Process on start of production
|
26
|
+
# # Set state for entry into recursed rules through current
|
27
|
+
#
|
28
|
+
# # Callback to parser loop with callback
|
29
|
+
# end
|
30
|
+
#
|
31
|
+
# production(:rule) do |input, current, callback|
|
32
|
+
# # Process on end of production
|
33
|
+
# # return results in input, retrieve results from recursed rules in current
|
34
|
+
#
|
35
|
+
# # Callback to parser loop with callback
|
36
|
+
# end
|
37
|
+
#
|
38
|
+
# def initialize(input)
|
39
|
+
# parse(input, start_symbol,
|
40
|
+
# branch: BRANCH,
|
41
|
+
# first: FIRST,
|
42
|
+
# follow: FOLLOW,
|
43
|
+
# cleanup: CLEANUP
|
44
|
+
# ) do |context, *data|
|
45
|
+
# # Process calls from callback from productions
|
46
|
+
#
|
47
|
+
# rescue ArgumentError, RDF::LL1::Parser::Error => e
|
48
|
+
# progress("Parsing completed with errors:\n\t#{e.message}")
|
49
|
+
# raise RDF::ReaderError, e.message if validate?
|
50
|
+
# end
|
6
51
|
module Parser
|
7
|
-
##
|
8
|
-
# @private
|
9
|
-
# level above which debug messages are supressed
|
10
|
-
DEBUG_LEVEL = 10
|
11
|
-
|
12
52
|
##
|
13
53
|
# @return [Integer] line number of current token
|
14
54
|
attr_reader :lineno
|
@@ -186,7 +226,7 @@ module EBNF::LL1
|
|
186
226
|
# def each_statement(&block)
|
187
227
|
# @callback = block
|
188
228
|
#
|
189
|
-
# parse(START.to_sym) do |context, *data|
|
229
|
+
# parse(input, START.to_sym) do |context, *data|
|
190
230
|
# case context
|
191
231
|
# when :statement
|
192
232
|
# yield *data
|
@@ -205,16 +245,13 @@ module EBNF::LL1
|
|
205
245
|
# Lists valid terminals that can precede each production (for error recovery).
|
206
246
|
# @option options [Hash{Symbol,String => Array<Symbol,String>}] :follow ({})
|
207
247
|
# Lists valid terminals that can follow each production (for error recovery).
|
208
|
-
# @option options [Boolean] :validate (false)
|
209
|
-
# whether to validate the parsed statements and values. If not validating, the parser will attempt to recover from errors.
|
210
|
-
# @option options [Boolean] :progress
|
211
|
-
# Show progress of parser productions
|
212
|
-
# @option options [Boolean] :debug
|
213
|
-
# Detailed debug output
|
214
|
-
# @option options [Boolean] :reset_on_start
|
215
|
-
# Reset the parser state if the start token set with `prod` is found in a production. This reduces the production stack depth growth, which is appropriate for some grammars.
|
216
248
|
# @option options[Integer] :high_water passed to lexer
|
249
|
+
# @option options [Logger] :logger for errors/progress/debug.
|
217
250
|
# @option options[Integer] :low_water passed to lexer
|
251
|
+
# @option options [Boolean] :reset_on_start
|
252
|
+
# Reset the parser state if the start token set with `prod` is found in a production. This reduces the production stack depth growth, which is appropriate for some grammars.
|
253
|
+
# @option options [Boolean] :validate (false)
|
254
|
+
# whether to validate the parsed statements and values. If not validating, the parser will attempt to recover from errors.
|
218
255
|
# @yield [context, *data]
|
219
256
|
# Yields for to return data to parser
|
220
257
|
# @yieldparam [:statement, :trace] context
|
@@ -225,13 +262,9 @@ module EBNF::LL1
|
|
225
262
|
# @raise [Exception] Raises exceptions for parsing errors
|
226
263
|
# or errors raised during processing callbacks. Internal
|
227
264
|
# errors are raised using {Error}.
|
228
|
-
# @see
|
265
|
+
# @see https://cs.adelaide.edu.au/~charles/lt/Lectures/07-ErrorRecovery.pdf
|
229
266
|
def parse(input = nil, start = nil, **options, &block)
|
230
267
|
@options = options.dup
|
231
|
-
@options[:debug] ||= case
|
232
|
-
when @options[:progress] then 2
|
233
|
-
when @options[:validate] then 1
|
234
|
-
end
|
235
268
|
@branch = options[:branch]
|
236
269
|
@first = options[:first] ||= {}
|
237
270
|
@follow = options[:follow] ||= {}
|
@@ -356,9 +389,9 @@ module EBNF::LL1
|
|
356
389
|
end
|
357
390
|
|
358
391
|
# Get the list of follows for this sequence, this production and the stacked productions.
|
359
|
-
debug("recovery", "stack follows:"
|
392
|
+
debug("recovery", "stack follows:")
|
360
393
|
todo_stack.reverse.each do |todo|
|
361
|
-
debug("recovery"
|
394
|
+
debug("recovery") {" #{todo[:prod]}: #{@follow[todo[:prod]].inspect}"}
|
362
395
|
end
|
363
396
|
|
364
397
|
# Find all follows to the top of the stack
|
@@ -466,14 +499,15 @@ module EBNF::LL1
|
|
466
499
|
protected
|
467
500
|
|
468
501
|
##
|
469
|
-
# Error information, used as level `
|
502
|
+
# Error information, used as level `3` logger messages.
|
503
|
+
# Messages may be logged and are saved for reporting at end of parsing.
|
470
504
|
#
|
471
505
|
# @param [String] node Relevant location associated with message
|
472
506
|
# @param [String] message Error string
|
473
|
-
# @param [Hash] options
|
507
|
+
# @param [Hash{Symbol => Object}] options
|
474
508
|
# @option options [URI, #to_s] :production
|
475
509
|
# @option options [Token] :token
|
476
|
-
# @see
|
510
|
+
# @see #debug
|
477
511
|
def error(node, message, **options)
|
478
512
|
lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
|
479
513
|
m = "ERROR "
|
@@ -483,83 +517,74 @@ module EBNF::LL1
|
|
483
517
|
m += ", production = #{options[:production].inspect}" if options[:production]
|
484
518
|
@error_log << m unless @recovering
|
485
519
|
@recovering = true
|
486
|
-
debug(node, m, level:
|
520
|
+
debug(node, m, level: options.fetch(:level, 3), **options)
|
487
521
|
if options[:raise] || @options[:validate]
|
488
522
|
raise Error.new(m, lineno: lineno, token: options[:token], production: options[:production])
|
489
523
|
end
|
490
524
|
end
|
491
525
|
|
492
526
|
##
|
493
|
-
# Warning information, used as level `
|
527
|
+
# Warning information, used as level `2` logger messages.
|
528
|
+
# Messages may be logged and are saved for reporting at end of parsing.
|
494
529
|
#
|
495
530
|
# @param [String] node Relevant location associated with message
|
496
531
|
# @param [String] message Error string
|
497
532
|
# @param [Hash] options
|
498
533
|
# @option options [URI, #to_s] :production
|
499
534
|
# @option options [Token] :token
|
500
|
-
# @see
|
535
|
+
# @see #debug
|
501
536
|
def warn(node, message, **options)
|
537
|
+
lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
|
502
538
|
m = "WARNING "
|
503
|
-
m += "[line: #{
|
539
|
+
m += "[line: #{lineno}] " if lineno
|
504
540
|
m += message
|
505
541
|
m += " (found #{options[:token].inspect})" if options[:token]
|
506
542
|
m += ", production = #{options[:production].inspect}" if options[:production]
|
507
543
|
@error_log << m unless @recovering
|
508
|
-
debug(node, m, level:
|
544
|
+
debug(node, m, level: 2, lineno: lineno, **options)
|
509
545
|
end
|
510
546
|
|
511
547
|
##
|
512
|
-
# Progress
|
548
|
+
# Progress logged when parsing. Passed as level `1` logger messages.
|
513
549
|
#
|
514
|
-
#
|
550
|
+
# The call is ignored, unless `@options[:logger]` is set.
|
551
|
+
#
|
552
|
+
# @overload progress(node, message, **options, &block)
|
515
553
|
# @param [String] node Relevant location associated with message
|
516
554
|
# @param [String] message ("")
|
517
555
|
# @param [Hash] options
|
518
556
|
# @option options [Integer] :depth
|
519
557
|
# Recursion depth for indenting output
|
520
|
-
# @see
|
558
|
+
# @see #debug
|
521
559
|
def progress(node, *args, &block)
|
522
|
-
return unless @options[:
|
560
|
+
return unless @options[:logger]
|
561
|
+
lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
|
523
562
|
args << {} unless args.last.is_a?(Hash)
|
524
|
-
args.last[:level] ||=
|
563
|
+
args.last[:level] ||= 1
|
564
|
+
args.last[:lineno] ||= lineno
|
525
565
|
debug(node, *args, &block)
|
526
566
|
end
|
527
567
|
|
528
568
|
##
|
529
|
-
#
|
569
|
+
# Debug logging.
|
530
570
|
#
|
531
|
-
# The call is ignored, unless `@options[:
|
532
|
-
# case it yields tracing information as indicated. Additionally,
|
533
|
-
# if `@options[:debug]` is an Integer, the call is aborted if the
|
534
|
-
# `:level` option is less than than `:level`.
|
571
|
+
# The call is ignored, unless `@options[:logger]` is set.
|
535
572
|
#
|
536
573
|
# @overload debug(node, message, **options)
|
537
574
|
# @param [Array<String>] args Relevant location associated with message
|
538
575
|
# @param [Hash] options
|
539
576
|
# @option options [Integer] :depth
|
540
577
|
# Recursion depth for indenting output
|
541
|
-
# @
|
542
|
-
# Level assigned to message, by convention, level `0` is for
|
543
|
-
# errors, level `1` is for warnings, level `2` is for parser
|
544
|
-
# progress information, and anything higher is for various levels
|
545
|
-
# of debug information.
|
546
|
-
#
|
547
|
-
# @yield trace, level, lineno, depth, args
|
548
|
-
# @yieldparam [:trace] trace
|
549
|
-
# @yieldparam [Integer] level
|
550
|
-
# @yieldparam [Integer] lineno
|
551
|
-
# @yieldparam [Integer] depth Recursive depth of productions
|
552
|
-
# @yieldparam [Array<String>] args
|
553
|
-
# @yieldreturn [String] added to message
|
578
|
+
# @yieldreturn [String] additional string appended to `message`.
|
554
579
|
def debug(*args)
|
555
|
-
return unless @options[:
|
580
|
+
return unless @options[:logger]
|
556
581
|
options = args.last.is_a?(Hash) ? args.pop : {}
|
557
|
-
|
558
|
-
|
582
|
+
lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
|
583
|
+
level = options.fetch(:level, 0)
|
559
584
|
|
560
585
|
depth = options[:depth] || self.depth
|
561
586
|
args << yield if block_given?
|
562
|
-
@
|
587
|
+
@options[:logger].add(level, "[#{@lineno}]" + (" " * depth) + args.join(" "))
|
563
588
|
end
|
564
589
|
|
565
590
|
private
|
@@ -570,7 +595,7 @@ module EBNF::LL1
|
|
570
595
|
if handler
|
571
596
|
# Create a new production data element, potentially allowing handler
|
572
597
|
# to customize before pushing on the @prod_data stack
|
573
|
-
|
598
|
+
debug("#{prod}(:start):#{@prod_data.length}") {@prod_data.last}
|
574
599
|
data = {}
|
575
600
|
begin
|
576
601
|
self.class.eval_with_binding(self) {
|
@@ -584,12 +609,12 @@ module EBNF::LL1
|
|
584
609
|
elsif [:merge, :star].include?(@cleanup[prod])
|
585
610
|
# Save current data to merge later
|
586
611
|
@prod_data << {}
|
587
|
-
|
612
|
+
debug("#{prod}(:start}:#{@prod_data.length}:cleanup:#{@cleanup[prod]}") { get_token.inspect + (@recovering ? ' recovering' : '')}
|
588
613
|
else
|
589
614
|
# Make sure we push as many was we pop, even if there is no
|
590
615
|
# explicit start handler
|
591
616
|
@prod_data << {} if self.class.production_handlers[prod]
|
592
|
-
|
617
|
+
debug("#{prod}(:start:#{@prod_data.length})") { get_token.inspect + (@recovering ? ' recovering' : '')}
|
593
618
|
end
|
594
619
|
#puts "prod_data(s): " + @prod_data.inspect
|
595
620
|
end
|
@@ -623,7 +648,7 @@ module EBNF::LL1
|
|
623
648
|
else Array(input[k]) + Array(v)
|
624
649
|
end
|
625
650
|
end
|
626
|
-
|
651
|
+
debug("#{prod}(:finish):#{@prod_data.length} cleanup:#{@cleanup[prod]}") {@prod_data.last}
|
627
652
|
else
|
628
653
|
progress("#{prod}(:finish):#{@prod_data.length}") { "recovering" if @recovering }
|
629
654
|
end
|
@@ -730,7 +755,7 @@ module EBNF::LL1
|
|
730
755
|
# "invalid token '%' on line 10",
|
731
756
|
# token: '%', lineno: 9, production: :turtleDoc)
|
732
757
|
#
|
733
|
-
# @see
|
758
|
+
# @see https://ruby-doc.org/core/classes/StandardError.html
|
734
759
|
class Error < StandardError
|
735
760
|
##
|
736
761
|
# The current production.
|
data/lib/ebnf/ll1/scanner.rb
CHANGED
@@ -3,7 +3,7 @@ require 'strscan' unless defined?(StringScanner)
|
|
3
3
|
|
4
4
|
module EBNF::LL1
|
5
5
|
##
|
6
|
-
# Overload StringScanner with file operations
|
6
|
+
# Overload StringScanner with file operations and line counting
|
7
7
|
#
|
8
8
|
# * Reloads scanner as required until EOF.
|
9
9
|
# * Loads to a high-water and reloads when remaining size reaches a low-water.
|
@@ -14,25 +14,14 @@ module EBNF::LL1
|
|
14
14
|
LOW_WATER = 4 * 1024
|
15
15
|
|
16
16
|
##
|
17
|
-
# @return [IO, StringIO]
|
17
|
+
# @return [String, IO, StringIO]
|
18
18
|
attr_reader :input
|
19
19
|
|
20
20
|
##
|
21
|
-
#
|
22
|
-
#
|
23
|
-
|
24
|
-
|
25
|
-
if input.respond_to?(:read)
|
26
|
-
scanner = self.allocate
|
27
|
-
scanner.send(:initialize, input, **options)
|
28
|
-
else
|
29
|
-
if input.encoding != Encoding::UTF_8
|
30
|
-
input = input.dup if input.frozen?
|
31
|
-
input.force_encoding(Encoding::UTF_8)
|
32
|
-
end
|
33
|
-
StringScanner.new(input)
|
34
|
-
end
|
35
|
-
end
|
21
|
+
# The current line number (one-based).
|
22
|
+
#
|
23
|
+
# @return [Integer]
|
24
|
+
attr_accessor :lineno
|
36
25
|
|
37
26
|
##
|
38
27
|
# Create a scanner, from an IO
|
@@ -45,32 +34,23 @@ module EBNF::LL1
|
|
45
34
|
def initialize(input, **options)
|
46
35
|
@options = options.merge(high_water: HIGH_WATER, low_water: LOW_WATER)
|
47
36
|
|
48
|
-
@
|
49
|
-
|
37
|
+
@previous_lineno = @lineno = 1
|
38
|
+
@input = input.is_a?(String) ? encode_utf8(input) : input
|
39
|
+
super(input.is_a?(String) ? @input : "")
|
50
40
|
feed_me
|
51
41
|
self
|
52
42
|
end
|
53
43
|
|
54
44
|
##
|
55
|
-
#
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
##
|
65
|
-
# Attempts to skip over the given `pattern` beginning with the scan pointer.
|
66
|
-
# If it matches, the scan pointer is advanced to the end of the match,
|
67
|
-
# and the length of the match is returned. Otherwise, `nil` is returned.
|
68
|
-
#
|
69
|
-
# similar to `scan`, but without returning the matched string.
|
70
|
-
# @param [Regexp] pattern
|
71
|
-
def skip(pattern)
|
72
|
-
feed_me
|
73
|
-
super
|
45
|
+
# Ensures that the input buffer is full to the high water mark, or end of file. Useful when matching tokens that may be longer than the low water mark
|
46
|
+
def ensure_buffer_full
|
47
|
+
# Read up to high-water mark ensuring we're at an end of line
|
48
|
+
if @input.respond_to?(:eof?) && !@input.eof?
|
49
|
+
diff = @options[:high_water] - rest_size
|
50
|
+
string = encode_utf8(@input.read(diff))
|
51
|
+
string << encode_utf8(@input.gets) unless @input.eof?
|
52
|
+
self << string if string
|
53
|
+
end
|
74
54
|
end
|
75
55
|
|
76
56
|
##
|
@@ -83,10 +63,14 @@ module EBNF::LL1
|
|
83
63
|
end
|
84
64
|
|
85
65
|
##
|
86
|
-
#
|
87
|
-
|
66
|
+
# Returns the "rest" of the line, or the next line if at EOL (i.e. everything after the scan pointer).
|
67
|
+
# If there is no more data (eos? = true), it returns "".
|
68
|
+
#
|
69
|
+
# @return [String]
|
70
|
+
def rest
|
88
71
|
feed_me
|
89
|
-
|
72
|
+
@lineno += 1 if eos?
|
73
|
+
encode_utf8 super
|
90
74
|
end
|
91
75
|
|
92
76
|
##
|
@@ -108,19 +92,68 @@ module EBNF::LL1
|
|
108
92
|
# @return [String]
|
109
93
|
def scan(pattern)
|
110
94
|
feed_me
|
111
|
-
|
95
|
+
@previous_lineno = @lineno
|
96
|
+
if matched = encode_utf8(super)
|
97
|
+
@lineno += matched.count("\n")
|
98
|
+
end
|
99
|
+
matched
|
112
100
|
end
|
113
101
|
|
114
102
|
##
|
115
|
-
#
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
103
|
+
# Scans the string until the pattern is matched. Returns the substring up to and including the end of the match, advancing the scan pointer to that location. If there is no match, nil is returned.
|
104
|
+
#
|
105
|
+
# @example
|
106
|
+
# s = StringScanner.new("Fri Dec 12 1975 14:39")
|
107
|
+
# s.scan_until(/1/) # -> "Fri Dec 1"
|
108
|
+
# s.pre_match # -> "Fri Dec "
|
109
|
+
# s.scan_until(/XYZ/) # -> nil
|
110
|
+
#
|
111
|
+
# @param [Regexp] pattern
|
112
|
+
# @return [String]
|
113
|
+
def scan_until(pattern)
|
114
|
+
feed_me
|
115
|
+
@previous_lineno = @lineno
|
116
|
+
if matched = encode_utf8(super)
|
117
|
+
@lineno += matched.count("\n")
|
123
118
|
end
|
119
|
+
matched
|
120
|
+
end
|
121
|
+
|
122
|
+
##
|
123
|
+
# Attempts to skip over the given `pattern` beginning with the scan pointer.
|
124
|
+
# If it matches, the scan pointer is advanced to the end of the match,
|
125
|
+
# and the length of the match is returned. Otherwise, `nil` is returned.
|
126
|
+
#
|
127
|
+
# similar to `scan`, but without returning the matched string.
|
128
|
+
# @param [Regexp] pattern
|
129
|
+
def skip(pattern)
|
130
|
+
scan(pattern)
|
131
|
+
nil
|
132
|
+
end
|
133
|
+
|
134
|
+
##
|
135
|
+
# Advances the scan pointer until pattern is matched and consumed. Returns the number of bytes advanced, or nil if no match was found.
|
136
|
+
#
|
137
|
+
# Look ahead to match pattern, and advance the scan pointer to the end of the match. Return the number of characters advanced, or nil if the match was unsuccessful.
|
138
|
+
#
|
139
|
+
# It’s similar to scan_until, but without returning the intervening string.
|
140
|
+
# @param [Regexp] pattern
|
141
|
+
def skip_until(pattern)
|
142
|
+
(matched = scan_until(pattern)) && matched.length
|
143
|
+
end
|
144
|
+
|
145
|
+
##
|
146
|
+
# Sets the scan pointer to the previous position. Only one previous position is remembered, and it changes with each scanning operation.
|
147
|
+
def unscan
|
148
|
+
@lineno = @previous_lineno
|
149
|
+
super
|
150
|
+
end
|
151
|
+
|
152
|
+
##
|
153
|
+
# Set the scan pointer to the end of the string and clear matching data
|
154
|
+
def terminate
|
155
|
+
feed_me
|
156
|
+
super
|
124
157
|
end
|
125
158
|
|
126
159
|
private
|