ebnf 1.2.0 → 2.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +223 -199
  3. data/UNLICENSE +1 -1
  4. data/VERSION +1 -1
  5. data/bin/ebnf +38 -19
  6. data/etc/abnf-core.ebnf +52 -0
  7. data/etc/abnf.abnf +121 -0
  8. data/etc/abnf.ebnf +124 -0
  9. data/etc/abnf.sxp +45 -0
  10. data/etc/doap.ttl +23 -18
  11. data/etc/ebnf.ebnf +21 -33
  12. data/etc/ebnf.html +76 -160
  13. data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
  14. data/etc/ebnf.ll1.sxp +182 -183
  15. data/etc/ebnf.peg.rb +90 -0
  16. data/etc/ebnf.peg.sxp +84 -0
  17. data/etc/ebnf.sxp +40 -41
  18. data/etc/iso-ebnf.ebnf +140 -0
  19. data/etc/iso-ebnf.isoebnf +138 -0
  20. data/etc/iso-ebnf.sxp +65 -0
  21. data/etc/sparql.ebnf +4 -4
  22. data/etc/sparql.html +1603 -1751
  23. data/etc/sparql.ll1.sxp +7372 -7372
  24. data/etc/sparql.peg.rb +532 -0
  25. data/etc/sparql.peg.sxp +597 -0
  26. data/etc/sparql.sxp +363 -362
  27. data/etc/turtle.ebnf +3 -3
  28. data/etc/turtle.html +465 -517
  29. data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
  30. data/etc/turtle.ll1.sxp +425 -425
  31. data/etc/turtle.peg.rb +182 -0
  32. data/etc/turtle.peg.sxp +199 -0
  33. data/etc/turtle.sxp +103 -101
  34. data/lib/ebnf.rb +6 -1
  35. data/lib/ebnf/abnf.rb +301 -0
  36. data/lib/ebnf/abnf/core.rb +23 -0
  37. data/lib/ebnf/abnf/meta.rb +111 -0
  38. data/lib/ebnf/base.rb +114 -69
  39. data/lib/ebnf/bnf.rb +1 -26
  40. data/lib/ebnf/ebnf/meta.rb +90 -0
  41. data/lib/ebnf/isoebnf.rb +229 -0
  42. data/lib/ebnf/isoebnf/meta.rb +75 -0
  43. data/lib/ebnf/ll1.rb +131 -3
  44. data/lib/ebnf/ll1/lexer.rb +20 -22
  45. data/lib/ebnf/ll1/parser.rb +97 -64
  46. data/lib/ebnf/ll1/scanner.rb +82 -50
  47. data/lib/ebnf/native.rb +320 -0
  48. data/lib/ebnf/parser.rb +285 -302
  49. data/lib/ebnf/peg.rb +39 -0
  50. data/lib/ebnf/peg/parser.rb +561 -0
  51. data/lib/ebnf/peg/rule.rb +250 -0
  52. data/lib/ebnf/rule.rb +442 -148
  53. data/lib/ebnf/terminals.rb +21 -0
  54. data/lib/ebnf/writer.rb +587 -82
  55. metadata +125 -18
  56. data/etc/sparql.rb +0 -45773
@@ -0,0 +1,250 @@
1
+ module EBNF::PEG
2
+ # Behaviior for parsing a PEG rule
3
+ module Rule
4
+ ##
5
+ # Initialized by parser when loading rules.
6
+ # Used for finding rules and invoking elements of the parse process.
7
+ #
8
+ # @return [EBNF::PEG::Parser] parser
9
+ attr_accessor :parser
10
+
11
+ ##
12
+ # Parse a rule or terminal, invoking callbacks, as appropriate
13
+
14
+ # If there is are `start_production` and/or `production`,
15
+ # they are invoked with a `prod_data` stack, the input stream and offset.
16
+ # Otherwise, the results are added as an array value
17
+ # to a hash indexed by the rule name.
18
+ #
19
+ # If matched, the input position is updated and the results returned in a Hash.
20
+ #
21
+ # * `alt`: returns the value of the matched production or `:unmatched`.
22
+ # * `diff`: returns the value matched, or `:unmatched`.
23
+ # * `hex`: returns a string composed of the matched hex character, or `:unmatched`.
24
+ # * `opt`: returns the value matched, or `nil` if unmatched.
25
+ # * `plus`: returns an array of the values matched for the specified production, or `:unmatched`, if none are matched. For Terminals, these are concatenated into a single string.
26
+ # * `range`: returns a string composed of the values matched, or `:unmatched`, if less than `min` are matched.
27
+ # * `rept`: returns an array of the values matched for the speficied production, or `:unmatched`, if none are matched. For Terminals, these are concatenated into a single string.
28
+ # * `seq`: returns an array composed of single-entry hashes for each matched production indexed by the production name, or `:unmatched` if any production fails to match. For Terminals, returns a string created by concatenating these values. Via option in a `production` or definition, the result can be a single hash with values for each matched production; note that this is not always possible due to the possibility of repeated productions within the sequence.
29
+ # * `star`: returns an array of the values matched for the specified production. For Terminals, these are concatenated into a single string.
30
+ #
31
+ # @param [Scanner] input
32
+ # @return [Hash{Symbol => Object}, :unmatched] A hash with keys for matched component of the expression. Returns :unmatched if the input does not match the production.
33
+ def parse(input)
34
+ # Save position and linenumber for backtracking
35
+ pos, lineno = input.pos, input.lineno
36
+
37
+ parser.packrat[sym] ||= {}
38
+ if parser.packrat[sym][pos]
39
+ parser.debug("#{sym}(:memo)", lineno: lineno) { "#{parser.packrat[sym][pos].inspect}(@#{pos})"}
40
+ input.pos, input.lineno = parser.packrat[sym][pos][:pos], parser.packrat[sym][pos][:lineno]
41
+ return parser.packrat[sym][pos][:result]
42
+ end
43
+
44
+ if terminal?
45
+ # If the terminal is defined with a regular expression,
46
+ # use that to match the input,
47
+ # otherwise,
48
+ if regexp = parser.find_terminal_regexp(sym)
49
+ matched = input.scan(regexp)
50
+ result = parser.onTerminal(sym, (matched ? matched : :unmatched))
51
+ # Update furthest failure for strings and terminals
52
+ parser.update_furthest_failure(input.pos, input.lineno, sym) if result == :unmatched
53
+ parser.packrat[sym][pos] = {
54
+ pos: input.pos,
55
+ lineno: input.lineno,
56
+ result: result
57
+ }
58
+ return parser.packrat[sym][pos][:result]
59
+ end
60
+ else
61
+ eat_whitespace(input)
62
+ end
63
+ start_options = parser.onStart(sym)
64
+
65
+ result = case expr.first
66
+ when :alt
67
+ # Return the first expression to match.
68
+ # Result is either :unmatched, or the value of the matching rule
69
+ alt = :unmatched
70
+ expr[1..-1].each do |prod|
71
+ alt = case prod
72
+ when Symbol
73
+ rule = parser.find_rule(prod)
74
+ raise "No rule found for #{prod}" unless rule
75
+ rule.parse(input)
76
+ when String
77
+ input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
78
+ end
79
+ if alt == :unmatched
80
+ # Update furthest failure for strings and terminals
81
+ parser.update_furthest_failure(input.pos, input.lineno, prod) if prod.is_a?(String) || rule.terminal?
82
+ else
83
+ break
84
+ end
85
+ end
86
+ alt
87
+ when :diff
88
+ # matches any string that matches A but does not match B.
89
+ # (Note, this is only used for Terminal rules, non-terminals will use :not)
90
+ raise "Diff used on non-terminal #{prod}" unless terminal?
91
+ re1, re2 = Regexp.new(translate_codepoints(expr[1])), Regexp.new(translate_codepoints(expr[2]))
92
+ matched = input.scan(re1)
93
+ if !matched || re2.match?(matched)
94
+ # Update furthest failure for terminals
95
+ parser.update_furthest_failure(input.pos, input.lineno, sym)
96
+ :unmatched
97
+ else
98
+ matched
99
+ end
100
+ when :hex
101
+ # Matches the given hex character if expression matches the character whose number (code point) in ISO/IEC 10646 is N. The number of leading zeros in the #xN form is insignificant.
102
+ input.scan(to_regexp) || begin
103
+ # Update furthest failure for terminals
104
+ parser.update_furthest_failure(input.pos, input.lineno, expr.last)
105
+ :unmatched
106
+ end
107
+ when :not
108
+ # matches any string that does not match B.
109
+ res = case prod = expr[1]
110
+ when Symbol
111
+ rule = parser.find_rule(prod)
112
+ raise "No rule found for #{prod}" unless rule
113
+ rule.parse(input)
114
+ when String
115
+ input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
116
+ end
117
+ if res != :unmatched
118
+ # Update furthest failure for terminals
119
+ parser.update_furthest_failure(input.pos, input.lineno, sym) if terminal?
120
+ :unmatched
121
+ else
122
+ nil
123
+ end
124
+ when :opt
125
+ # Result is the matched value or nil
126
+ opt = rept(input, 0, 1, expr[1])
127
+
128
+ # Update furthest failure for strings and terminals
129
+ parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
130
+ opt.first
131
+ when :plus
132
+ # Result is an array of all expressions while they match,
133
+ # at least one must match
134
+ plus = rept(input, 1, '*', expr[1])
135
+
136
+ # Update furthest failure for strings and terminals
137
+ parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
138
+ plus.is_a?(Array) && terminal? ? plus.join("") : plus
139
+ when :range, :istr
140
+ # Matches the specified character range
141
+ input.scan(to_regexp) || begin
142
+ # Update furthest failure for strings and terminals
143
+ parser.update_furthest_failure(input.pos, input.lineno, expr[1])
144
+ :unmatched
145
+ end
146
+ when :rept
147
+ # Result is an array of all expressions while they match,
148
+ # an empty array of none match
149
+ rept = rept(input, expr[1], expr[2], expr[3])
150
+
151
+ # # Update furthest failure for strings and terminals
152
+ parser.update_furthest_failure(input.pos, input.lineno, expr[3]) if terminal?
153
+ rept.is_a?(Array) && terminal? ? rept.join("") : rept
154
+ when :seq
155
+ # Evaluate each expression into an array of hashes where each hash contains a key from the associated production and the value is the parsed value of that production. Returns :unmatched if the input does not match the production. Value ordering is ensured by native Hash ordering.
156
+ seq = expr[1..-1].each_with_object([]) do |prod, accumulator|
157
+ eat_whitespace(input) unless accumulator.empty? || terminal?
158
+ res = case prod
159
+ when Symbol
160
+ rule = parser.find_rule(prod)
161
+ raise "No rule found for #{prod}" unless rule
162
+ rule.parse(input)
163
+ when String
164
+ input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
165
+ end
166
+ if res == :unmatched
167
+ # Update furthest failure for strings and terminals
168
+ parser.update_furthest_failure(input.pos, input.lineno, prod)
169
+ break :unmatched
170
+ end
171
+ accumulator << {prod.to_sym => res}
172
+ end
173
+ if seq == :unmatched
174
+ :unmatched
175
+ elsif terminal?
176
+ seq.map(&:values).compact.join("") # Concat values for terminal production
177
+ elsif start_options[:as_hash]
178
+ seq.inject {|memo, h| memo.merge(h)}
179
+ else
180
+ seq
181
+ end
182
+ when :star
183
+ # Result is an array of all expressions while they match,
184
+ # an empty array of none match
185
+ star = rept(input, 0, '*', expr[1])
186
+
187
+ # Update furthest failure for strings and terminals
188
+ parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
189
+ star.is_a?(Array) && terminal? ? star.join("") : star
190
+ else
191
+ raise "attempt to parse unknown rule type: #{expr.first}"
192
+ end
193
+
194
+ if result == :unmatched
195
+ input.pos, input.lineno = pos, lineno
196
+ end
197
+
198
+ result = parser.onFinish(result)
199
+ (parser.packrat[sym] ||= {})[pos] = {
200
+ pos: input.pos,
201
+ lineno: input.lineno,
202
+ result: result
203
+ }
204
+ return parser.packrat[sym][pos][:result]
205
+ end
206
+
207
+ ##
208
+ # Repitition, 0-1, 0-n, 1-n, ...
209
+ #
210
+ # Note, nil results are removed from the result, but count towards min/max calculations
211
+ #
212
+ # @param [Scanner] input
213
+ # @param [Integer] min
214
+ # @param [Integer] max
215
+ # If it is an integer, it stops matching after max entries.
216
+ # @param [Symbol, String] prod
217
+ # @return [:unmatched, Array]
218
+ def rept(input, min, max, prod)
219
+ result = []
220
+
221
+ case prod
222
+ when Symbol
223
+ rule = parser.find_rule(prod)
224
+ raise "No rule found for #{prod}" unless rule
225
+ while (max == '*' || result.length < max) && (res = rule.parse(input)) != :unmatched
226
+ eat_whitespace(input) unless terminal?
227
+ result << res
228
+ end
229
+ when String
230
+ while (res = input.scan(Regexp.new(Regexp.quote(prod)))) && (max == '*' || result.length < max)
231
+ eat_whitespace(input) unless terminal?
232
+ result << res
233
+ end
234
+ end
235
+
236
+ result.length < min ? :unmatched : result.compact
237
+ end
238
+
239
+ ##
240
+ # Eat whitespace between non-terminal rules
241
+ def eat_whitespace(input)
242
+ if parser.whitespace.is_a?(Regexp)
243
+ # Eat whitespace before a non-terminal
244
+ input.skip(parser.whitespace)
245
+ elsif parser.whitespace.is_a?(Rule)
246
+ parser.whitespace.parse(input) # throw away result
247
+ end
248
+ end
249
+ end
250
+ end
data/lib/ebnf/rule.rb CHANGED
@@ -1,15 +1,33 @@
1
+ require 'scanf'
2
+ require 'strscan'
3
+
1
4
  module EBNF
2
5
  # Represent individual parsed rules
3
6
  class Rule
4
- # Operations which are flattened to seprate rules in to_bnf
7
+ # Operations which are flattened to seprate rules in to_bnf.
5
8
  BNF_OPS = %w{
6
- alt opt plus seq star
9
+ alt diff not opt plus rept seq star
7
10
  }.map(&:to_sym).freeze
8
11
 
9
12
  TERM_OPS = %w{
10
- diff hex range
13
+ hex istr range
11
14
  }.map(&:to_sym).freeze
12
15
 
16
+ # The number of arguments expected per operator. `nil` for unspecified
17
+ OP_ARGN = {
18
+ alt: nil,
19
+ diff: 2,
20
+ hex: 1,
21
+ istr: 1,
22
+ not: 1,
23
+ opt: 1,
24
+ plus: 1,
25
+ range: 1,
26
+ rept: 3,
27
+ seq: nil,
28
+ star: 1
29
+ }
30
+
13
31
  # Symbol of rule
14
32
  #
15
33
  # @return [Symbol]
@@ -26,7 +44,7 @@ module EBNF
26
44
 
27
45
  # Kind of rule
28
46
  #
29
- # @return [:rule, :terminal, or :pass]
47
+ # @return [:rule, :terminal, :terminals, or :pass]
30
48
  attr_accessor :kind
31
49
 
32
50
  # Rule expression
@@ -57,19 +75,38 @@ module EBNF
57
75
  # Determines preparation and cleanup rules for reconstituting EBNF ? * + from BNF
58
76
  attr_accessor :cleanup
59
77
 
60
- # @param [Integer] id
61
- # @param [Symbol] sym
78
+ # @param [Symbol, nil] sym
79
+ # `nil` is allowed only for @pass or @terminals
80
+ # @param [Integer, nil] id
62
81
  # @param [Array] expr
63
- # @param [Symbol] :kind
64
- # @param [String] :ebnf
65
- # @param [Array] :first
66
- # @param [Array] :follow
67
- # @param [Boolean] :start
68
- # @param [Rule] :top_rule
69
- # @param [Boolean] :cleanup
82
+ # The expression is an internal-representation of an S-Expression with one of the following oparators:
83
+ #
84
+ # * `alt` A list of alternative rules, which are attempted in order. It terminates with the first matching rule, or is terminated as unmatched, if no such rule is found.
85
+ # * `diff` matches any string that matches `A` but does not match `B`.
86
+ # * `hex` A single character represented using the hexadecimal notation `#xnn`.
87
+ # * `istr` A string which matches in a case-insensitive manner, so that `(istr "fOo")` will match either of the strings `"foo"`, `"FOO"` or any other combination.
88
+ # * `opt` An optional rule or terminal. It either results in the matching rule or returns `nil`.
89
+ # * `plus` – A sequence of one or more of the matching rule. If there is no such rule, it is terminated as unmatched; otherwise, the result is an array containing all matched input.
90
+ # * `range` – A range of characters, possibly repeated, of the form `(range "a-z")`. May also use hexadecimal notation.
91
+ # * `rept m n` – A sequence of at lest `m` and at most `n` of the matching rule. It will always return an array.
92
+ # * `seq` – A sequence of rules or terminals. If any (other than `opt` or `star`) to not parse, the rule is terminated as unmatched.
93
+ # * `star` – A sequence of zero or more of the matching rule. It will always return an array.
94
+ # @param [:rule, :terminal, :terminals, :pass] kind (nil)
95
+ # @param [String] ebnf (nil)
96
+ # When parsing, records the EBNF string used to create the rule.
97
+ # @param [Array] first (nil)
98
+ # Recorded set of terminals that can proceed this rule (LL(1))
99
+ # @param [Array] follow (nil)
100
+ # Recorded set of terminals that can follow this rule (LL(1))
101
+ # @param [Boolean] start (nil)
102
+ # Is this the starting rule for the grammar?
103
+ # @param [Rule] top_rule (nil)
104
+ # The top-most rule. All expressed rules are top-rules, derived rules have the original rule as their top-rule.
105
+ # @param [Boolean] cleanup (nil)
106
+ # Records information useful for cleaning up converted :plus, and :star expansions (LL(1)).
70
107
  def initialize(sym, id, expr, kind: nil, ebnf: nil, first: nil, follow: nil, start: nil, top_rule: nil, cleanup: nil)
71
108
  @sym, @id = sym, id
72
- @expr = expr.is_a?(Array) ? expr : [:seq, expr]
109
+ @expr = expr.is_a?(Array) ? expr : [:seq, expr].compact
73
110
  @ebnf, @kind, @first, @follow, @start, @cleanup, @top_rule = ebnf, kind, first, follow, start, cleanup, top_rule
74
111
  @top_rule ||= self
75
112
  @kind ||= case
@@ -77,21 +114,53 @@ module EBNF
77
114
  when !BNF_OPS.include?(@expr.first) then :terminal
78
115
  else :rule
79
116
  end
117
+
118
+ # Allow @pass and @terminals to not be named
119
+ @sym ||= :_pass if @kind == :pass
120
+ @sym ||= :_terminals if @kind == :terminals
121
+
122
+ raise ArgumentError, "Rule sym must be a symbol, was #{@sym.inspect}" unless @sym.is_a?(Symbol)
123
+ raise ArgumentError, "Rule id must be a string or nil, was #{@id.inspect}" unless (@id || "").is_a?(String)
124
+ raise ArgumentError, "Rule kind must be one of :rule, :terminal, :terminals, or :pass, was #{@kind.inspect}" unless
125
+ @kind.is_a?(Symbol) && %w(rule terminal terminals pass).map(&:to_sym).include?(@kind)
126
+
127
+ case @expr.first
128
+ when :alt
129
+ raise ArgumentError, "#{@expr.first} operation must have at least one operand, had #{@expr.length - 1}" unless @expr.length > 1
130
+ when :diff
131
+ raise ArgumentError, "#{@expr.first} operation must have exactly two operands, had #{@expr.length - 1}" unless @expr.length == 3
132
+ when :hex, :istr, :not, :opt, :plus, :range, :star
133
+ raise ArgumentError, "#{@expr.first} operation must have exactly one operand, had #{@expr.length - 1}" unless @expr.length == 2
134
+ when :rept
135
+ raise ArgumentError, "#{@expr.first} operation must have exactly three, had #{@expr.length - 1}" unless @expr.length == 4
136
+ raise ArgumentError, "#{@expr.first} operation must an non-negative integer minimum, was #{@expr[1]}" unless
137
+ @expr[1].is_a?(Integer) && @expr[1] >= 0
138
+ raise ArgumentError, "#{@expr.first} operation must an non-negative integer maximum or '*', was #{@expr[2]}" unless
139
+ @expr[2] == '*' || @expr[2].is_a?(Integer) && @expr[2] >= 0
140
+ when :seq
141
+ # It's legal to have a zero-length sequence
142
+ else
143
+ raise ArgumentError, "Rule expression must be an array using a known operator, was #{@expr.first}"
144
+ end
80
145
  end
81
146
 
82
147
  ##
83
148
  # Return a rule from its SXP representation:
84
149
  #
85
150
  # @example inputs
86
- # (pass (plus (range "#x20\\t\\r\\n")))
151
+ # (pass _pass (plus (range "#x20\\t\\r\\n")))
87
152
  # (rule ebnf "1" (star (alt declaration rule)))
88
- # (terminal O_ENUM "17" (seq "[^" (plus CHAR) "]"))
153
+ # (terminal R_CHAR "19" (diff CHAR (alt "]" "-")))
89
154
  #
90
- # Also may have (first ...), (follow ...), or (start #t)
155
+ # Also may have `(first ...)`, `(follow ...)`, or `(start #t)`.
91
156
  #
92
- # @param [Array] sxp
157
+ # @param [String, Array] sxp
93
158
  # @return [Rule]
94
159
  def self.from_sxp(sxp)
160
+ if sxp.is_a?(String)
161
+ require 'sxp' unless defined?(SXP)
162
+ sxp = SXP.parse(sxp)
163
+ end
95
164
  expr = sxp.detect {|e| e.is_a?(Array) && ![:first, :follow, :start].include?(e.first.to_sym)}
96
165
  first = sxp.detect {|e| e.is_a?(Array) && e.first.to_sym == :first}
97
166
  first = first[1..-1] if first
@@ -102,26 +171,28 @@ module EBNF
102
171
  start = sxp.any? {|e| e.is_a?(Array) && e.first.to_sym == :start}
103
172
  sym = sxp[1] if sxp[1].is_a?(Symbol)
104
173
  id = sxp[2] if sxp[2].is_a?(String)
105
- Rule.new(sym, id, expr, kind: sxp.first, first: first, follow: follow, cleanup: cleanup, start: start)
174
+ self.new(sym, id, expr, kind: sxp.first, first: first, follow: follow, cleanup: cleanup, start: start)
106
175
  end
107
176
 
108
177
  # Build a new rule creating a symbol and numbering from the current rule
109
- # Symbol and number creation is handled by the top-most rule in such a chain
178
+ # Symbol and number creation is handled by the top-most rule in such a chain.
110
179
  #
111
180
  # @param [Array] expr
181
+ # @param [Symbol] kind (nil)
182
+ # @param [Hash{Symbol => Symbol}] cleanup (nil)
112
183
  # @param [Hash{Symbol => Object}] options
113
- # @param [Symbol] :kind
114
184
  def build(expr, kind: nil, cleanup: nil, **options)
115
- new_sym, new_id = (@top_rule ||self).send(:make_sym_id)
116
- Rule.new(new_sym, new_id, expr,
117
- kind: kind,
118
- ebnf: @ebnf,
119
- top_rule: (@top_rule || self),
120
- cleanup: cleanup,
121
- **options)
185
+ new_sym, new_id = @top_rule.send(:make_sym_id)
186
+ self.class.new(new_sym, new_id, expr,
187
+ kind: kind,
188
+ ebnf: @ebnf,
189
+ top_rule: @top_rule,
190
+ cleanup: cleanup,
191
+ **options)
122
192
  end
123
193
 
124
- # Return representation for building S-Expressions
194
+ # Return representation for building S-Expressions.
195
+ #
125
196
  # @return [Array]
126
197
  def for_sxp
127
198
  elements = [kind, sym]
@@ -143,35 +214,45 @@ module EBNF
143
214
 
144
215
  alias_method :to_s, :to_sxp
145
216
 
146
- # Serializes this rule to an Turtle
217
+ # Serializes this rule to an Turtle.
218
+ #
147
219
  # @return [String]
148
220
  def to_ttl
149
221
  @ebnf.debug("to_ttl") {inspect} if @ebnf
150
- comment = orig.to_s.strip.
151
- gsub(/"""/, '\"\"\"').
152
- gsub("\\", "\\\\").
153
- sub(/^\"/, '\"').
154
- sub(/\"$/m, '\"')
155
- statements = [
156
- %{:#{id} rdfs:label "#{id}"; rdf:value "#{sym}";},
157
- %{ rdfs:comment #{comment.inspect};},
158
- ]
222
+ statements = [%{:#{sym} rdfs:label "#{sym}";}]
223
+ if orig
224
+ comment = orig.to_s.strip.
225
+ gsub(/"""/, '\"\"\"').
226
+ gsub("\\", "\\\\").
227
+ sub(/^\"/, '\"').
228
+ sub(/\"$/m, '\"')
229
+ statements << %{ rdfs:comment #{comment.inspect};}
230
+ end
231
+ statements << %{ dc:identifier "#{id}";} if id
159
232
 
160
233
  statements += ttl_expr(expr, terminal? ? "re" : "g", 1, false)
161
234
  "\n" + statements.join("\n")
162
235
  end
163
236
 
237
+ # Return a Ruby representation of this rule
238
+ # @return [String]
239
+ def to_ruby
240
+ "EBNF::Rule.new(#{sym.inspect}, #{id.inspect}, #{expr.inspect}#{', kind: ' + kind.inspect unless kind == :rule})"
241
+ end
242
+
164
243
  ##
165
244
  # Transform EBNF rule to BNF rules:
166
245
  #
167
- # * Transform (a [n] rule (op1 (op2))) into two rules:
168
- # (a [n] rule (op1 _a_1))
169
- # (_a_1 [n.1] rule (op2))
170
- # * Transform (a rule (opt b)) into (a rule (alt _empty b))
171
- # * Transform (a rule (star b)) into (a rule (alt _empty (seq b a)))
172
- # * Transform (a rule (plus b)) into (a rule (seq b (star b)
246
+ # * Transform `(rule a "n" (op1 (op2)))` into two rules:
247
+ #
248
+ # (rule a "n" (op1 _a_1))
249
+ # (rule _a_1 "n.1" (op2))
250
+ # * Transform `(rule a (opt b))` into `(rule a (alt _empty b))`
251
+ # * Transform `(rule a (star b))` into `(rule a (alt _empty (seq b a)))`
252
+ # * Transform `(rule a (plus b))` into `(rule a (seq b (star b)`
253
+ #
254
+ # Transformation includes information used to re-construct non-transformed.
173
255
  #
174
- # Transformation includes information used to re-construct non-transformed
175
256
  # AST representation
176
257
  # @return [Array<Rule>]
177
258
  def to_bnf
@@ -198,19 +279,19 @@ module EBNF
198
279
  new_rules = new_rules.map {|r| r.to_bnf}.flatten
199
280
  elsif expr.first == :opt
200
281
  this = dup
201
- # * Transform (a rule (opt b)) into (a rule (alt _empty b))
282
+ # * Transform (rule a (opt b)) into (rule a (alt _empty b))
202
283
  this.expr = [:alt, :_empty, expr.last]
203
284
  this.cleanup = :opt
204
285
  new_rules = this.to_bnf
205
286
  elsif expr.first == :star
206
- # * Transform (a rule (star b)) into (a rule (alt _empty (seq b a)))
287
+ # * Transform (rule a (star b)) into (rule a (alt _empty (seq b a)))
207
288
  this = dup
208
289
  this.cleanup = :star
209
290
  new_rule = this.build([:seq, expr.last, this.sym], cleanup: :merge)
210
291
  this.expr = [:alt, :_empty, new_rule.sym]
211
292
  new_rules = [this] + new_rule.to_bnf
212
293
  elsif expr.first == :plus
213
- # * Transform (a rule (plus b)) into (a rule (seq b (star b)
294
+ # * Transform (rule a (plus b)) into (rule a (seq b (star b)
214
295
  this = dup
215
296
  this.cleanup = :plus
216
297
  this.expr = [:seq, expr.last, [:star, expr.last]]
@@ -219,7 +300,7 @@ module EBNF
219
300
  # Otherwise, no further transformation necessary
220
301
  new_rules << self
221
302
  elsif [:diff, :hex, :range].include?(expr.first)
222
- # This rules are fine, the just need to be terminals
303
+ # This rules are fine, they just need to be terminals
223
304
  raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.terminal?
224
305
  new_rules << self
225
306
  else
@@ -230,89 +311,73 @@ module EBNF
230
311
  return new_rules
231
312
  end
232
313
 
233
- # Return the non-terminals for this rule. For seq, this is the first
234
- # non-terminals in the seq. For alt, this is every non-terminal ni the alt
235
- # @param [Array<Rule>] ast
236
- # The set of rules, used to turn symbols into rules
314
+ ##
315
+ # Transform EBNF rule for PEG:
316
+ #
317
+ # * Transform `(rule a "n" (op1 ... (op2 y) ...z))` into two rules:
318
+ #
319
+ # (rule a "n" (op1 ... _a_1 ... z))
320
+ # (rule _a_1 "n.1" (op2 y))
321
+ # * Transform `(rule a "n" (diff op1 op2))` into two rules:
322
+ #
323
+ # (rule a "n" (seq _a_1 op1))
324
+ # (rule _a_1 "n.1" (not op1))
325
+ #
237
326
  # @return [Array<Rule>]
238
- def non_terminals(ast)
239
- @non_terms ||= (alt? ? expr[1..-1] : expr[1,1]).map do |sym|
240
- case sym
241
- when Symbol
242
- r = ast.detect {|r| r.sym == sym}
243
- r if r && r.rule?
244
- else
245
- nil
246
- end
247
- end.compact
248
- end
327
+ def to_peg
328
+ new_rules = []
249
329
 
250
- # Return the terminals for this rule. For seq, this is the first
251
- # terminals or strings in the seq. For alt, this is every non-terminal ni the alt
252
- # @param [Array<Rule>] ast
253
- # The set of rules, used to turn symbols into rules
254
- # @return [Array<Rule>]
255
- def terminals(ast)
256
- @terms ||= (alt? ? expr[1..-1] : expr[1,1]).map do |sym|
257
- case sym
258
- when Symbol
259
- r = ast.detect {|r| r.sym == sym}
260
- r if r && r.terminal?
261
- when String
262
- sym
263
- else
264
- nil
330
+ # Look for rules containing sub-sequences
331
+ if expr.any? {|e| e.is_a?(Array) && e.first.is_a?(Symbol)}
332
+ # duplicate ourselves for rewriting
333
+ this = dup
334
+ new_rules << this
335
+
336
+ expr.each_with_index do |e, index|
337
+ next unless e.is_a?(Array) && e.first.is_a?(Symbol)
338
+ new_rule = build(e)
339
+ this.expr[index] = new_rule.sym
340
+ new_rules << new_rule
265
341
  end
266
- end.compact
267
- end
268
342
 
269
- # Does this rule start with a sym? It does if expr is that sym,
270
- # expr starts with alt and contains that sym, or
271
- # expr starts with seq and the next element is that sym
272
- # @param [Symbol, class] sym
273
- # Symbol matching any start element, or if it is String, any start element which is a String
274
- # @return [Array<Symbol, String>] list of symbol (singular), or strings which are start symbol, or nil if there are none
275
- def starts_with?(sym)
276
- if seq? && sym === (v = expr.fetch(1, nil))
277
- [v]
278
- elsif alt? && expr.any? {|e| sym === e}
279
- expr.select {|e| sym === e}
343
+ # Return new rules after recursively applying #to_bnf
344
+ new_rules = new_rules.map {|r| r.to_peg}.flatten
345
+ elsif expr.first == :diff && !terminal?
346
+ this = dup
347
+ new_rule = build([:not, expr[2]])
348
+ this.expr = [:seq, new_rule.sym, expr[1]]
349
+ new_rules << this
350
+ new_rules << new_rule
351
+ elsif [:hex, :istr, :range].include?(expr.first)
352
+ # This rules are fine, they just need to be terminals
353
+ raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.terminal?
354
+ new_rules << self
280
355
  else
281
- nil
356
+ new_rules << self
282
357
  end
358
+
359
+ return new_rules.map {|r| r.extend(EBNF::PEG::Rule)}
283
360
  end
284
361
 
285
- # Do the firsts of this rule include the empty string?
286
- # @return [Boolean]
287
- def first_includes_eps?
288
- @first && @first.include?(:_eps)
289
- end
290
-
291
- # Add terminal as proceding this rule
292
- # @param [Array<Rule, Symbol, String>] terminals
293
- # @return [Integer] if number of terminals added
294
- def add_first(terminals)
295
- @first ||= []
296
- terminals = terminals.map {|t| t.is_a?(Rule) ? t.sym : t} - @first
297
- @first += terminals
298
- terminals.length
299
- end
300
-
301
- # Add terminal as following this rule. Don't add _eps as a follow
362
+ ##
363
+ # For :hex or :range, create a regular expression.
302
364
  #
303
- # @param [Array<Rule, Symbol, String>] terminals
304
- # @return [Integer] if number of terminals added
305
- def add_follow(terminals)
306
- # Remove terminals already in follows, and empty string
307
- terminals = terminals.map {|t| t.is_a?(Rule) ? t.sym : t} - (@follow || []) - [:_eps]
308
- unless terminals.empty?
309
- @follow ||= []
310
- @follow += terminals
365
+ # @return [Regexp]
366
+ def to_regexp
367
+ case expr.first
368
+ when :hex
369
+ Regexp.new(translate_codepoints(expr[1]))
370
+ when :istr
371
+ /#{expr.last}/ui
372
+ when :range
373
+ Regexp.new("[#{translate_codepoints(expr[1])}]")
374
+ else
375
+ raise "Can't turn #{expr.inspect} into a regexp"
311
376
  end
312
- terminals.length
313
377
  end
314
378
 
315
379
  # Is this a terminal?
380
+ #
316
381
  # @return [Boolean]
317
382
  def terminal?
318
383
  kind == :terminal
@@ -340,18 +405,14 @@ module EBNF
340
405
  expr.is_a?(Array) && expr.first == :seq
341
406
  end
342
407
 
343
- # Is this rule of the form (alt ...)?
344
- def alt?
345
- expr.is_a?(Array) && expr.first == :alt
346
- end
347
-
348
408
  def inspect
349
409
  "#<EBNF::Rule:#{object_id} " +
350
410
  {sym: sym, id: id, kind: kind, expr: expr}.inspect +
351
411
  ">"
352
412
  end
353
413
 
354
- # Two rules are equal if they have the same {#sym}, {#kind} and {#expr}
414
+ # Two rules are equal if they have the same {#sym}, {#kind} and {#expr}.
415
+ #
355
416
  # @param [Rule] other
356
417
  # @return [Boolean]
357
418
  def ==(other)
@@ -360,37 +421,259 @@ module EBNF
360
421
  expr == other.expr
361
422
  end
362
423
 
363
- # Two rules are equivalent if they have the same {#expr}
424
+ # Two rules are equivalent if they have the same {#expr}.
425
+ #
364
426
  # @param [Rule] other
365
427
  # @return [Boolean]
366
- def equivalent?(other)
367
- expr == other.expr
428
+ def eql?(other)
429
+ expr == other.expr
368
430
  end
369
431
 
370
- # Rewrite the rule substituting src_rule for dst_rule wherever
371
- # it is used in the production (first level only).
372
- # @param [Rule] src_rule
373
- # @param [Rule] dst_rule
374
- # @return [Rule]
375
- def rewrite(src_rule, dst_rule)
376
- case @expr
377
- when Array
378
- @expr = @expr.map {|e| e == src_rule.sym ? dst_rule.sym : e}
432
+ # Rules compare using their ids
433
+ def <=>(other)
434
+ if id && other.id
435
+ if id == other.id
436
+ id.to_s <=> other.id.to_s
437
+ else
438
+ id.to_f <=> other.id.to_f
439
+ end
379
440
  else
380
- @expr = dst_rule.sym if @expr == src_rule.sym
441
+ sym.to_s <=> other.sym.to_s
381
442
  end
382
- self
383
443
  end
384
444
 
385
- # Rules compare using their ids
386
- def <=>(other)
387
- if id.to_i == other.id.to_i
388
- id.to_s <=> other.id.to_s
445
+ ##
446
+ # Utility function to translate code points of the form '#xN' into ruby unicode characters
447
+ def translate_codepoints(str)
448
+ str.gsub(/#x\h+/) {|c| c[2..-1].scanf("%x").first.chr(Encoding::UTF_8)}
449
+ end
450
+
451
+ # Return the non-terminals for this rule.
452
+ #
453
+ # * `alt` => this is every non-terminal.
454
+ # * `diff` => this is every non-terminal.
455
+ # * `hex` => nil
456
+ # * `istr` => nil
457
+ # * `not` => this is the last expression, if any.
458
+ # * `opt` => this is the last expression, if any.
459
+ # * `plus` => this is the last expression, if any.
460
+ # * `range` => nil
461
+ # * `rept` => this is the last expression, if any.
462
+ # * `seq` => this is the first expression in the sequence, if any.
463
+ # * `star` => this is the last expression, if any.
464
+ #
465
+ # @param [Array<Rule>] ast
466
+ # The set of rules, used to turn symbols into rules
467
+ # @param [Array<Symbol,String,Array>] expr (@expr)
468
+ # The expression to check, defaults to the rule expression.
469
+ # Typically, if the expression is recursive, the embedded expression is called recursively.
470
+ # @return [Array<Rule>]
471
+ # @note this is used for LL(1) tansformation, so rule types are limited
472
+ def non_terminals(ast, expr = @expr)
473
+ ([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).map do |sym|
474
+ case sym
475
+ when Symbol
476
+ r = ast.detect {|r| r.sym == sym}
477
+ r if r && r.rule?
478
+ when Array
479
+ non_terminals(ast, sym)
480
+ else
481
+ nil
482
+ end
483
+ end.flatten.compact.uniq
484
+ end
485
+
486
+ # Return the terminals for this rule.
487
+ #
488
+ # * `alt` => this is every terminal.
489
+ # * `diff` => this is every terminal.
490
+ # * `hex` => nil
491
+ # * `istr` => nil
492
+ # * `not` => this is the last expression, if any.
493
+ # * `opt` => this is the last expression, if any.
494
+ # * `plus` => this is the last expression, if any.
495
+ # * `range` => nil
496
+ # * `rept` => this is the last expression, if any.
497
+ # * `seq` => this is the first expression in the sequence, if any.
498
+ # * `star` => this is the last expression, if any.
499
+ #
500
+ # @param [Array<Rule>] ast
501
+ # The set of rules, used to turn symbols into rules
502
+ # @param [Array<Symbol,String,Array>] expr (@expr)
503
+ # The expression to check, defaults to the rule expression.
504
+ # Typically, if the expression is recursive, the embedded expression is called recursively.
505
+ # @return [Array<Rule>]
506
+ # @note this is used for LL(1) tansformation, so rule types are limited
507
+ def terminals(ast, expr = @expr)
508
+ ([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).map do |sym|
509
+ case sym
510
+ when Symbol
511
+ r = ast.detect {|r| r.sym == sym}
512
+ r if r && r.terminal?
513
+ when String
514
+ sym
515
+ when Array
516
+ terminals(ast, sym)
517
+ end
518
+ end.flatten.compact.uniq
519
+ end
520
+
521
+ # Return the symbols used in the rule.
522
+ #
523
+ # @param [Array<Symbol,String,Array>] expr (@expr)
524
+ # The expression to check, defaults to the rule expression.
525
+ # Typically, if the expression is recursive, the embedded expression is called recursively.
526
+ # @return [Array<Rule>]
527
+ def symbols(expr = @expr)
528
+ expr[1..-1].map do |sym|
529
+ case sym
530
+ when Symbol
531
+ sym
532
+ when Array
533
+ symbols(sym)
534
+ end
535
+ end.flatten.compact.uniq
536
+ end
537
+
538
+ ##
539
+ # The following are used for LL(1) transformation.
540
+ ##
541
+
542
+ # Does this rule start with `sym`? It does if expr is that sym,
543
+ # expr starts with alt and contains that sym,
544
+ # or expr starts with seq and the next element is that sym.
545
+ #
546
+ # @param [Symbol, class] sym
547
+ # Symbol matching any start element, or if it is String, any start element which is a String
548
+ # @return [Array<Symbol, String>] list of symbol (singular), or strings which are start symbol, or nil if there are none
549
+ def starts_with?(sym)
550
+ if seq? && sym === (v = expr.fetch(1, nil))
551
+ [v]
552
+ elsif alt? && expr.any? {|e| sym === e}
553
+ expr.select {|e| sym === e}
554
+ else
555
+ nil
556
+ end
557
+ end
558
+
559
+ ##
560
+ # Validate the rule, with respect to an AST.
561
+ #
562
+ # @param [Array<Rule>] ast
563
+ # The set of rules, used to turn symbols into rules
564
+ # @param [Array<Symbol,String,Array>] expr (@expr)
565
+ # The expression to check, defaults to the rule expression.
566
+ # Typically, if the expression is recursive, the embedded expression is called recursively.
567
+ # @raise [RangeError]
568
+ def validate!(ast, expr = @expr)
569
+ op = expr.first
570
+ raise SyntaxError, "Unknown operator: #{op}" unless OP_ARGN.key?(op)
571
+ raise SyntaxError, "Argument count missmatch on operator #{op}, had #{expr.length - 1} expected #{OP_ARGN[op]}" if
572
+ OP_ARGN[op] && OP_ARGN[op] != expr.length - 1
573
+
574
+ # rept operator needs min and max
575
+ if op == :alt
576
+ raise SyntaxError, "alt operation must have at least one operand, had #{expr.length - 1}" unless expr.length > 1
577
+ elsif op == :rept
578
+ raise SyntaxError, "rept operation must an non-negative integer minimum, was #{expr[1]}" unless
579
+ expr[1].is_a?(Integer) && expr[1] >= 0
580
+ raise SyntaxError, "rept operation must an non-negative integer maximum or '*', was #{expr[2]}" unless
581
+ expr[2] == '*' || expr[2].is_a?(Integer) && expr[2] >= 0
582
+ end
583
+
584
+ case op
585
+ when :hex
586
+ raise SyntaxError, "Hex operand must be of form '#xN+': #{sym}" unless expr.last.match?(/^#x\h+$/)
587
+ when :range
588
+ str = expr.last.dup
589
+ str = str[1..-1] if str.start_with?('^')
590
+ str = str[0..-2] if str.end_with?('-') # Allowed at end of range
591
+ scanner = StringScanner.new(str)
592
+ hex = rchar = in_range = false
593
+ while !scanner.eos?
594
+ begin
595
+ if scanner.scan(Terminals::HEX)
596
+ raise SyntaxError if in_range && rchar
597
+ rchar = in_range = false
598
+ hex = true
599
+ elsif scanner.scan(Terminals::R_CHAR)
600
+ raise SyntaxError if in_range && hex
601
+ hex = in_range = false
602
+ rchar = true
603
+ else
604
+ raise(SyntaxError, "Range contains illegal components at offset #{scanner.pos}: was #{expr.last}")
605
+ end
606
+
607
+ if scanner.scan(/\-/)
608
+ raise SyntaxError if in_range
609
+ in_range = true
610
+ end
611
+ rescue SyntaxError
612
+ raise(SyntaxError, "Range contains illegal components at offset #{scanner.pos}: was #{expr.last}")
613
+ end
614
+ end
389
615
  else
390
- id.to_i <=> other.id.to_i
616
+ ([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).each do |sym|
617
+ case sym
618
+ when Symbol
619
+ r = ast.detect {|r| r.sym == sym}
620
+ raise SyntaxError, "No rule found for #{sym}" unless r
621
+ when Array
622
+ validate!(ast, sym)
623
+ when String
624
+ raise SyntaxError, "String must be of the form CHAR*" unless sym.match?(/^#{Terminals::CHAR}*$/)
625
+ end
626
+ end
391
627
  end
392
628
  end
393
629
 
630
+ ##
631
+ # Validate the rule, with respect to an AST.
632
+ #
633
+ # Uses `#validate!` and catches `RangeError`
634
+ #
635
+ # @param [Array<Rule>] ast
636
+ # The set of rules, used to turn symbols into rules
637
+ # @return [Boolean]
638
+ def valid?(ast)
639
+ validate!(ast)
640
+ true
641
+ rescue SyntaxError
642
+ false
643
+ end
644
+
645
+ # Do the firsts of this rule include the empty string?
646
+ #
647
+ # @return [Boolean]
648
+ def first_includes_eps?
649
+ @first && @first.include?(:_eps)
650
+ end
651
+
652
+ # Add terminal as proceding this rule.
653
+ #
654
+ # @param [Array<Rule, Symbol, String>] terminals
655
+ # @return [Integer] if number of terminals added
656
+ def add_first(terminals)
657
+ @first ||= []
658
+ terminals = terminals.map {|t| t.is_a?(Rule) ? t.sym : t} - @first
659
+ @first += terminals
660
+ terminals.length
661
+ end
662
+
663
+ # Add terminal as following this rule. Don't add _eps as a follow
664
+ #
665
+ # @param [Array<Rule, Symbol, String>] terminals
666
+ # @return [Integer] if number of terminals added
667
+ def add_follow(terminals)
668
+ # Remove terminals already in follows, and empty string
669
+ terminals = terminals.map {|t| t.is_a?(Rule) ? t.sym : t} - (@follow || []) - [:_eps]
670
+ unless terminals.empty?
671
+ @follow ||= []
672
+ @follow += terminals
673
+ end
674
+ terminals.length
675
+ end
676
+
394
677
  private
395
678
  def ttl_expr(expr, pfx, depth, is_obj = true)
396
679
  indent = ' ' * depth
@@ -406,17 +689,28 @@ module EBNF
406
689
 
407
690
  case op
408
691
  when :seq, :alt, :diff
692
+ # Multiple operands
409
693
  statements << %{#{indent}#{bra}#{pfx}:#{op} (}
410
694
  expr.each {|a| statements += ttl_expr(a, pfx, depth + 1)}
411
695
  statements << %{#{indent} )#{ket}}
412
- when :opt, :plus, :star
696
+ when :opt, :plus, :star, :not
697
+ # Single operand
413
698
  statements << %{#{indent}#{bra}#{pfx}:#{op} }
414
699
  statements += ttl_expr(expr.first, pfx, depth + 1)
415
700
  statements << %{#{indent} #{ket}} unless ket.empty?
416
- when :_empty, :_eps, :_empty
701
+ when :rept
702
+ # Three operands (min, max and expr)
703
+ statements << %{ #{indent}#{pfx}:min #{expr[0].inspect};}
704
+ statements << %{ #{indent}#{pfx}:max #{expr[1].inspect};}
705
+ statements << %{#{indent}#{bra}#{pfx}:#{op} }
706
+ statements += ttl_expr(expr.last, pfx, depth + 1)
707
+ statements << %{#{indent} #{ket}} unless ket.empty?
708
+ when :_empty, :_eps
417
709
  statements << %{#{indent}"g:#{op.to_s[1..-1]}"}
418
710
  when :"'"
419
711
  statements << %{#{indent}"#{esc(expr)}"}
712
+ when :istr
713
+ statements << %{#{indent}#{bra} re:matches #{expr.first.inspect} #{ket}}
420
714
  when :range
421
715
  statements << %{#{indent}#{bra} re:matches #{cclass(expr.first).inspect} #{ket}}
422
716
  when :hex
@@ -472,7 +766,7 @@ module EBNF
472
766
  def make_sym_id(variation = nil)
473
767
  @id_seq ||= 0
474
768
  @id_seq += 1
475
- ["_#{@sym}_#{@id_seq}#{variation}".to_sym, "#{@id}.#{@id_seq}#{variation}"]
769
+ ["_#{@sym}_#{@id_seq}#{variation}".to_sym, ("#{@id}.#{@id_seq}#{variation}" if @id)]
476
770
  end
477
771
  end
478
772
  end