ebnf 1.2.0 → 2.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +223 -199
  3. data/UNLICENSE +1 -1
  4. data/VERSION +1 -1
  5. data/bin/ebnf +38 -19
  6. data/etc/abnf-core.ebnf +52 -0
  7. data/etc/abnf.abnf +121 -0
  8. data/etc/abnf.ebnf +124 -0
  9. data/etc/abnf.sxp +45 -0
  10. data/etc/doap.ttl +23 -18
  11. data/etc/ebnf.ebnf +21 -33
  12. data/etc/ebnf.html +76 -160
  13. data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
  14. data/etc/ebnf.ll1.sxp +182 -183
  15. data/etc/ebnf.peg.rb +90 -0
  16. data/etc/ebnf.peg.sxp +84 -0
  17. data/etc/ebnf.sxp +40 -41
  18. data/etc/iso-ebnf.ebnf +140 -0
  19. data/etc/iso-ebnf.isoebnf +138 -0
  20. data/etc/iso-ebnf.sxp +65 -0
  21. data/etc/sparql.ebnf +4 -4
  22. data/etc/sparql.html +1603 -1751
  23. data/etc/sparql.ll1.sxp +7372 -7372
  24. data/etc/sparql.peg.rb +532 -0
  25. data/etc/sparql.peg.sxp +597 -0
  26. data/etc/sparql.sxp +363 -362
  27. data/etc/turtle.ebnf +3 -3
  28. data/etc/turtle.html +465 -517
  29. data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
  30. data/etc/turtle.ll1.sxp +425 -425
  31. data/etc/turtle.peg.rb +182 -0
  32. data/etc/turtle.peg.sxp +199 -0
  33. data/etc/turtle.sxp +103 -101
  34. data/lib/ebnf.rb +6 -1
  35. data/lib/ebnf/abnf.rb +301 -0
  36. data/lib/ebnf/abnf/core.rb +23 -0
  37. data/lib/ebnf/abnf/meta.rb +111 -0
  38. data/lib/ebnf/base.rb +114 -69
  39. data/lib/ebnf/bnf.rb +1 -26
  40. data/lib/ebnf/ebnf/meta.rb +90 -0
  41. data/lib/ebnf/isoebnf.rb +229 -0
  42. data/lib/ebnf/isoebnf/meta.rb +75 -0
  43. data/lib/ebnf/ll1.rb +131 -3
  44. data/lib/ebnf/ll1/lexer.rb +20 -22
  45. data/lib/ebnf/ll1/parser.rb +97 -64
  46. data/lib/ebnf/ll1/scanner.rb +82 -50
  47. data/lib/ebnf/native.rb +320 -0
  48. data/lib/ebnf/parser.rb +285 -302
  49. data/lib/ebnf/peg.rb +39 -0
  50. data/lib/ebnf/peg/parser.rb +561 -0
  51. data/lib/ebnf/peg/rule.rb +250 -0
  52. data/lib/ebnf/rule.rb +442 -148
  53. data/lib/ebnf/terminals.rb +21 -0
  54. data/lib/ebnf/writer.rb +587 -82
  55. metadata +125 -18
  56. data/etc/sparql.rb +0 -45773
@@ -0,0 +1,250 @@
1
+ module EBNF::PEG
2
+ # Behaviior for parsing a PEG rule
3
+ module Rule
4
+ ##
5
+ # Initialized by parser when loading rules.
6
+ # Used for finding rules and invoking elements of the parse process.
7
+ #
8
+ # @return [EBNF::PEG::Parser] parser
9
+ attr_accessor :parser
10
+
11
+ ##
12
+ # Parse a rule or terminal, invoking callbacks, as appropriate
13
+
14
+ # If there is are `start_production` and/or `production`,
15
+ # they are invoked with a `prod_data` stack, the input stream and offset.
16
+ # Otherwise, the results are added as an array value
17
+ # to a hash indexed by the rule name.
18
+ #
19
+ # If matched, the input position is updated and the results returned in a Hash.
20
+ #
21
+ # * `alt`: returns the value of the matched production or `:unmatched`.
22
+ # * `diff`: returns the value matched, or `:unmatched`.
23
+ # * `hex`: returns a string composed of the matched hex character, or `:unmatched`.
24
+ # * `opt`: returns the value matched, or `nil` if unmatched.
25
+ # * `plus`: returns an array of the values matched for the specified production, or `:unmatched`, if none are matched. For Terminals, these are concatenated into a single string.
26
+ # * `range`: returns a string composed of the values matched, or `:unmatched`, if less than `min` are matched.
27
+ # * `rept`: returns an array of the values matched for the speficied production, or `:unmatched`, if none are matched. For Terminals, these are concatenated into a single string.
28
+ # * `seq`: returns an array composed of single-entry hashes for each matched production indexed by the production name, or `:unmatched` if any production fails to match. For Terminals, returns a string created by concatenating these values. Via option in a `production` or definition, the result can be a single hash with values for each matched production; note that this is not always possible due to the possibility of repeated productions within the sequence.
29
+ # * `star`: returns an array of the values matched for the specified production. For Terminals, these are concatenated into a single string.
30
+ #
31
+ # @param [Scanner] input
32
+ # @return [Hash{Symbol => Object}, :unmatched] A hash with keys for matched component of the expression. Returns :unmatched if the input does not match the production.
33
+ def parse(input)
34
+ # Save position and linenumber for backtracking
35
+ pos, lineno = input.pos, input.lineno
36
+
37
+ parser.packrat[sym] ||= {}
38
+ if parser.packrat[sym][pos]
39
+ parser.debug("#{sym}(:memo)", lineno: lineno) { "#{parser.packrat[sym][pos].inspect}(@#{pos})"}
40
+ input.pos, input.lineno = parser.packrat[sym][pos][:pos], parser.packrat[sym][pos][:lineno]
41
+ return parser.packrat[sym][pos][:result]
42
+ end
43
+
44
+ if terminal?
45
+ # If the terminal is defined with a regular expression,
46
+ # use that to match the input,
47
+ # otherwise,
48
+ if regexp = parser.find_terminal_regexp(sym)
49
+ matched = input.scan(regexp)
50
+ result = parser.onTerminal(sym, (matched ? matched : :unmatched))
51
+ # Update furthest failure for strings and terminals
52
+ parser.update_furthest_failure(input.pos, input.lineno, sym) if result == :unmatched
53
+ parser.packrat[sym][pos] = {
54
+ pos: input.pos,
55
+ lineno: input.lineno,
56
+ result: result
57
+ }
58
+ return parser.packrat[sym][pos][:result]
59
+ end
60
+ else
61
+ eat_whitespace(input)
62
+ end
63
+ start_options = parser.onStart(sym)
64
+
65
+ result = case expr.first
66
+ when :alt
67
+ # Return the first expression to match.
68
+ # Result is either :unmatched, or the value of the matching rule
69
+ alt = :unmatched
70
+ expr[1..-1].each do |prod|
71
+ alt = case prod
72
+ when Symbol
73
+ rule = parser.find_rule(prod)
74
+ raise "No rule found for #{prod}" unless rule
75
+ rule.parse(input)
76
+ when String
77
+ input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
78
+ end
79
+ if alt == :unmatched
80
+ # Update furthest failure for strings and terminals
81
+ parser.update_furthest_failure(input.pos, input.lineno, prod) if prod.is_a?(String) || rule.terminal?
82
+ else
83
+ break
84
+ end
85
+ end
86
+ alt
87
+ when :diff
88
+ # matches any string that matches A but does not match B.
89
+ # (Note, this is only used for Terminal rules, non-terminals will use :not)
90
+ raise "Diff used on non-terminal #{prod}" unless terminal?
91
+ re1, re2 = Regexp.new(translate_codepoints(expr[1])), Regexp.new(translate_codepoints(expr[2]))
92
+ matched = input.scan(re1)
93
+ if !matched || re2.match?(matched)
94
+ # Update furthest failure for terminals
95
+ parser.update_furthest_failure(input.pos, input.lineno, sym)
96
+ :unmatched
97
+ else
98
+ matched
99
+ end
100
+ when :hex
101
+ # Matches the given hex character if expression matches the character whose number (code point) in ISO/IEC 10646 is N. The number of leading zeros in the #xN form is insignificant.
102
+ input.scan(to_regexp) || begin
103
+ # Update furthest failure for terminals
104
+ parser.update_furthest_failure(input.pos, input.lineno, expr.last)
105
+ :unmatched
106
+ end
107
+ when :not
108
+ # matches any string that does not match B.
109
+ res = case prod = expr[1]
110
+ when Symbol
111
+ rule = parser.find_rule(prod)
112
+ raise "No rule found for #{prod}" unless rule
113
+ rule.parse(input)
114
+ when String
115
+ input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
116
+ end
117
+ if res != :unmatched
118
+ # Update furthest failure for terminals
119
+ parser.update_furthest_failure(input.pos, input.lineno, sym) if terminal?
120
+ :unmatched
121
+ else
122
+ nil
123
+ end
124
+ when :opt
125
+ # Result is the matched value or nil
126
+ opt = rept(input, 0, 1, expr[1])
127
+
128
+ # Update furthest failure for strings and terminals
129
+ parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
130
+ opt.first
131
+ when :plus
132
+ # Result is an array of all expressions while they match,
133
+ # at least one must match
134
+ plus = rept(input, 1, '*', expr[1])
135
+
136
+ # Update furthest failure for strings and terminals
137
+ parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
138
+ plus.is_a?(Array) && terminal? ? plus.join("") : plus
139
+ when :range, :istr
140
+ # Matches the specified character range
141
+ input.scan(to_regexp) || begin
142
+ # Update furthest failure for strings and terminals
143
+ parser.update_furthest_failure(input.pos, input.lineno, expr[1])
144
+ :unmatched
145
+ end
146
+ when :rept
147
+ # Result is an array of all expressions while they match,
148
+ # an empty array of none match
149
+ rept = rept(input, expr[1], expr[2], expr[3])
150
+
151
+ # # Update furthest failure for strings and terminals
152
+ parser.update_furthest_failure(input.pos, input.lineno, expr[3]) if terminal?
153
+ rept.is_a?(Array) && terminal? ? rept.join("") : rept
154
+ when :seq
155
+ # Evaluate each expression into an array of hashes where each hash contains a key from the associated production and the value is the parsed value of that production. Returns :unmatched if the input does not match the production. Value ordering is ensured by native Hash ordering.
156
+ seq = expr[1..-1].each_with_object([]) do |prod, accumulator|
157
+ eat_whitespace(input) unless accumulator.empty? || terminal?
158
+ res = case prod
159
+ when Symbol
160
+ rule = parser.find_rule(prod)
161
+ raise "No rule found for #{prod}" unless rule
162
+ rule.parse(input)
163
+ when String
164
+ input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
165
+ end
166
+ if res == :unmatched
167
+ # Update furthest failure for strings and terminals
168
+ parser.update_furthest_failure(input.pos, input.lineno, prod)
169
+ break :unmatched
170
+ end
171
+ accumulator << {prod.to_sym => res}
172
+ end
173
+ if seq == :unmatched
174
+ :unmatched
175
+ elsif terminal?
176
+ seq.map(&:values).compact.join("") # Concat values for terminal production
177
+ elsif start_options[:as_hash]
178
+ seq.inject {|memo, h| memo.merge(h)}
179
+ else
180
+ seq
181
+ end
182
+ when :star
183
+ # Result is an array of all expressions while they match,
184
+ # an empty array of none match
185
+ star = rept(input, 0, '*', expr[1])
186
+
187
+ # Update furthest failure for strings and terminals
188
+ parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
189
+ star.is_a?(Array) && terminal? ? star.join("") : star
190
+ else
191
+ raise "attempt to parse unknown rule type: #{expr.first}"
192
+ end
193
+
194
+ if result == :unmatched
195
+ input.pos, input.lineno = pos, lineno
196
+ end
197
+
198
+ result = parser.onFinish(result)
199
+ (parser.packrat[sym] ||= {})[pos] = {
200
+ pos: input.pos,
201
+ lineno: input.lineno,
202
+ result: result
203
+ }
204
+ return parser.packrat[sym][pos][:result]
205
+ end
206
+
207
+ ##
208
+ # Repitition, 0-1, 0-n, 1-n, ...
209
+ #
210
+ # Note, nil results are removed from the result, but count towards min/max calculations
211
+ #
212
+ # @param [Scanner] input
213
+ # @param [Integer] min
214
+ # @param [Integer] max
215
+ # If it is an integer, it stops matching after max entries.
216
+ # @param [Symbol, String] prod
217
+ # @return [:unmatched, Array]
218
+ def rept(input, min, max, prod)
219
+ result = []
220
+
221
+ case prod
222
+ when Symbol
223
+ rule = parser.find_rule(prod)
224
+ raise "No rule found for #{prod}" unless rule
225
+ while (max == '*' || result.length < max) && (res = rule.parse(input)) != :unmatched
226
+ eat_whitespace(input) unless terminal?
227
+ result << res
228
+ end
229
+ when String
230
+ while (res = input.scan(Regexp.new(Regexp.quote(prod)))) && (max == '*' || result.length < max)
231
+ eat_whitespace(input) unless terminal?
232
+ result << res
233
+ end
234
+ end
235
+
236
+ result.length < min ? :unmatched : result.compact
237
+ end
238
+
239
+ ##
240
+ # Eat whitespace between non-terminal rules
241
+ def eat_whitespace(input)
242
+ if parser.whitespace.is_a?(Regexp)
243
+ # Eat whitespace before a non-terminal
244
+ input.skip(parser.whitespace)
245
+ elsif parser.whitespace.is_a?(Rule)
246
+ parser.whitespace.parse(input) # throw away result
247
+ end
248
+ end
249
+ end
250
+ end
data/lib/ebnf/rule.rb CHANGED
@@ -1,15 +1,33 @@
1
+ require 'scanf'
2
+ require 'strscan'
3
+
1
4
  module EBNF
2
5
  # Represent individual parsed rules
3
6
  class Rule
4
- # Operations which are flattened to seprate rules in to_bnf
7
+ # Operations which are flattened to seprate rules in to_bnf.
5
8
  BNF_OPS = %w{
6
- alt opt plus seq star
9
+ alt diff not opt plus rept seq star
7
10
  }.map(&:to_sym).freeze
8
11
 
9
12
  TERM_OPS = %w{
10
- diff hex range
13
+ hex istr range
11
14
  }.map(&:to_sym).freeze
12
15
 
16
+ # The number of arguments expected per operator. `nil` for unspecified
17
+ OP_ARGN = {
18
+ alt: nil,
19
+ diff: 2,
20
+ hex: 1,
21
+ istr: 1,
22
+ not: 1,
23
+ opt: 1,
24
+ plus: 1,
25
+ range: 1,
26
+ rept: 3,
27
+ seq: nil,
28
+ star: 1
29
+ }
30
+
13
31
  # Symbol of rule
14
32
  #
15
33
  # @return [Symbol]
@@ -26,7 +44,7 @@ module EBNF
26
44
 
27
45
  # Kind of rule
28
46
  #
29
- # @return [:rule, :terminal, or :pass]
47
+ # @return [:rule, :terminal, :terminals, or :pass]
30
48
  attr_accessor :kind
31
49
 
32
50
  # Rule expression
@@ -57,19 +75,38 @@ module EBNF
57
75
  # Determines preparation and cleanup rules for reconstituting EBNF ? * + from BNF
58
76
  attr_accessor :cleanup
59
77
 
60
- # @param [Integer] id
61
- # @param [Symbol] sym
78
+ # @param [Symbol, nil] sym
79
+ # `nil` is allowed only for @pass or @terminals
80
+ # @param [Integer, nil] id
62
81
  # @param [Array] expr
63
- # @param [Symbol] :kind
64
- # @param [String] :ebnf
65
- # @param [Array] :first
66
- # @param [Array] :follow
67
- # @param [Boolean] :start
68
- # @param [Rule] :top_rule
69
- # @param [Boolean] :cleanup
82
+ # The expression is an internal-representation of an S-Expression with one of the following oparators:
83
+ #
84
+ # * `alt` A list of alternative rules, which are attempted in order. It terminates with the first matching rule, or is terminated as unmatched, if no such rule is found.
85
+ # * `diff` matches any string that matches `A` but does not match `B`.
86
+ # * `hex` A single character represented using the hexadecimal notation `#xnn`.
87
+ # * `istr` A string which matches in a case-insensitive manner, so that `(istr "fOo")` will match either of the strings `"foo"`, `"FOO"` or any other combination.
88
+ # * `opt` An optional rule or terminal. It either results in the matching rule or returns `nil`.
89
+ # * `plus` – A sequence of one or more of the matching rule. If there is no such rule, it is terminated as unmatched; otherwise, the result is an array containing all matched input.
90
+ # * `range` – A range of characters, possibly repeated, of the form `(range "a-z")`. May also use hexadecimal notation.
91
+ # * `rept m n` – A sequence of at lest `m` and at most `n` of the matching rule. It will always return an array.
92
+ # * `seq` – A sequence of rules or terminals. If any (other than `opt` or `star`) to not parse, the rule is terminated as unmatched.
93
+ # * `star` – A sequence of zero or more of the matching rule. It will always return an array.
94
+ # @param [:rule, :terminal, :terminals, :pass] kind (nil)
95
+ # @param [String] ebnf (nil)
96
+ # When parsing, records the EBNF string used to create the rule.
97
+ # @param [Array] first (nil)
98
+ # Recorded set of terminals that can proceed this rule (LL(1))
99
+ # @param [Array] follow (nil)
100
+ # Recorded set of terminals that can follow this rule (LL(1))
101
+ # @param [Boolean] start (nil)
102
+ # Is this the starting rule for the grammar?
103
+ # @param [Rule] top_rule (nil)
104
+ # The top-most rule. All expressed rules are top-rules, derived rules have the original rule as their top-rule.
105
+ # @param [Boolean] cleanup (nil)
106
+ # Records information useful for cleaning up converted :plus, and :star expansions (LL(1)).
70
107
  def initialize(sym, id, expr, kind: nil, ebnf: nil, first: nil, follow: nil, start: nil, top_rule: nil, cleanup: nil)
71
108
  @sym, @id = sym, id
72
- @expr = expr.is_a?(Array) ? expr : [:seq, expr]
109
+ @expr = expr.is_a?(Array) ? expr : [:seq, expr].compact
73
110
  @ebnf, @kind, @first, @follow, @start, @cleanup, @top_rule = ebnf, kind, first, follow, start, cleanup, top_rule
74
111
  @top_rule ||= self
75
112
  @kind ||= case
@@ -77,21 +114,53 @@ module EBNF
77
114
  when !BNF_OPS.include?(@expr.first) then :terminal
78
115
  else :rule
79
116
  end
117
+
118
+ # Allow @pass and @terminals to not be named
119
+ @sym ||= :_pass if @kind == :pass
120
+ @sym ||= :_terminals if @kind == :terminals
121
+
122
+ raise ArgumentError, "Rule sym must be a symbol, was #{@sym.inspect}" unless @sym.is_a?(Symbol)
123
+ raise ArgumentError, "Rule id must be a string or nil, was #{@id.inspect}" unless (@id || "").is_a?(String)
124
+ raise ArgumentError, "Rule kind must be one of :rule, :terminal, :terminals, or :pass, was #{@kind.inspect}" unless
125
+ @kind.is_a?(Symbol) && %w(rule terminal terminals pass).map(&:to_sym).include?(@kind)
126
+
127
+ case @expr.first
128
+ when :alt
129
+ raise ArgumentError, "#{@expr.first} operation must have at least one operand, had #{@expr.length - 1}" unless @expr.length > 1
130
+ when :diff
131
+ raise ArgumentError, "#{@expr.first} operation must have exactly two operands, had #{@expr.length - 1}" unless @expr.length == 3
132
+ when :hex, :istr, :not, :opt, :plus, :range, :star
133
+ raise ArgumentError, "#{@expr.first} operation must have exactly one operand, had #{@expr.length - 1}" unless @expr.length == 2
134
+ when :rept
135
+ raise ArgumentError, "#{@expr.first} operation must have exactly three, had #{@expr.length - 1}" unless @expr.length == 4
136
+ raise ArgumentError, "#{@expr.first} operation must an non-negative integer minimum, was #{@expr[1]}" unless
137
+ @expr[1].is_a?(Integer) && @expr[1] >= 0
138
+ raise ArgumentError, "#{@expr.first} operation must an non-negative integer maximum or '*', was #{@expr[2]}" unless
139
+ @expr[2] == '*' || @expr[2].is_a?(Integer) && @expr[2] >= 0
140
+ when :seq
141
+ # It's legal to have a zero-length sequence
142
+ else
143
+ raise ArgumentError, "Rule expression must be an array using a known operator, was #{@expr.first}"
144
+ end
80
145
  end
81
146
 
82
147
  ##
83
148
  # Return a rule from its SXP representation:
84
149
  #
85
150
  # @example inputs
86
- # (pass (plus (range "#x20\\t\\r\\n")))
151
+ # (pass _pass (plus (range "#x20\\t\\r\\n")))
87
152
  # (rule ebnf "1" (star (alt declaration rule)))
88
- # (terminal O_ENUM "17" (seq "[^" (plus CHAR) "]"))
153
+ # (terminal R_CHAR "19" (diff CHAR (alt "]" "-")))
89
154
  #
90
- # Also may have (first ...), (follow ...), or (start #t)
155
+ # Also may have `(first ...)`, `(follow ...)`, or `(start #t)`.
91
156
  #
92
- # @param [Array] sxp
157
+ # @param [String, Array] sxp
93
158
  # @return [Rule]
94
159
  def self.from_sxp(sxp)
160
+ if sxp.is_a?(String)
161
+ require 'sxp' unless defined?(SXP)
162
+ sxp = SXP.parse(sxp)
163
+ end
95
164
  expr = sxp.detect {|e| e.is_a?(Array) && ![:first, :follow, :start].include?(e.first.to_sym)}
96
165
  first = sxp.detect {|e| e.is_a?(Array) && e.first.to_sym == :first}
97
166
  first = first[1..-1] if first
@@ -102,26 +171,28 @@ module EBNF
102
171
  start = sxp.any? {|e| e.is_a?(Array) && e.first.to_sym == :start}
103
172
  sym = sxp[1] if sxp[1].is_a?(Symbol)
104
173
  id = sxp[2] if sxp[2].is_a?(String)
105
- Rule.new(sym, id, expr, kind: sxp.first, first: first, follow: follow, cleanup: cleanup, start: start)
174
+ self.new(sym, id, expr, kind: sxp.first, first: first, follow: follow, cleanup: cleanup, start: start)
106
175
  end
107
176
 
108
177
  # Build a new rule creating a symbol and numbering from the current rule
109
- # Symbol and number creation is handled by the top-most rule in such a chain
178
+ # Symbol and number creation is handled by the top-most rule in such a chain.
110
179
  #
111
180
  # @param [Array] expr
181
+ # @param [Symbol] kind (nil)
182
+ # @param [Hash{Symbol => Symbol}] cleanup (nil)
112
183
  # @param [Hash{Symbol => Object}] options
113
- # @param [Symbol] :kind
114
184
  def build(expr, kind: nil, cleanup: nil, **options)
115
- new_sym, new_id = (@top_rule ||self).send(:make_sym_id)
116
- Rule.new(new_sym, new_id, expr,
117
- kind: kind,
118
- ebnf: @ebnf,
119
- top_rule: (@top_rule || self),
120
- cleanup: cleanup,
121
- **options)
185
+ new_sym, new_id = @top_rule.send(:make_sym_id)
186
+ self.class.new(new_sym, new_id, expr,
187
+ kind: kind,
188
+ ebnf: @ebnf,
189
+ top_rule: @top_rule,
190
+ cleanup: cleanup,
191
+ **options)
122
192
  end
123
193
 
124
- # Return representation for building S-Expressions
194
+ # Return representation for building S-Expressions.
195
+ #
125
196
  # @return [Array]
126
197
  def for_sxp
127
198
  elements = [kind, sym]
@@ -143,35 +214,45 @@ module EBNF
143
214
 
144
215
  alias_method :to_s, :to_sxp
145
216
 
146
- # Serializes this rule to an Turtle
217
+ # Serializes this rule to an Turtle.
218
+ #
147
219
  # @return [String]
148
220
  def to_ttl
149
221
  @ebnf.debug("to_ttl") {inspect} if @ebnf
150
- comment = orig.to_s.strip.
151
- gsub(/"""/, '\"\"\"').
152
- gsub("\\", "\\\\").
153
- sub(/^\"/, '\"').
154
- sub(/\"$/m, '\"')
155
- statements = [
156
- %{:#{id} rdfs:label "#{id}"; rdf:value "#{sym}";},
157
- %{ rdfs:comment #{comment.inspect};},
158
- ]
222
+ statements = [%{:#{sym} rdfs:label "#{sym}";}]
223
+ if orig
224
+ comment = orig.to_s.strip.
225
+ gsub(/"""/, '\"\"\"').
226
+ gsub("\\", "\\\\").
227
+ sub(/^\"/, '\"').
228
+ sub(/\"$/m, '\"')
229
+ statements << %{ rdfs:comment #{comment.inspect};}
230
+ end
231
+ statements << %{ dc:identifier "#{id}";} if id
159
232
 
160
233
  statements += ttl_expr(expr, terminal? ? "re" : "g", 1, false)
161
234
  "\n" + statements.join("\n")
162
235
  end
163
236
 
237
+ # Return a Ruby representation of this rule
238
+ # @return [String]
239
+ def to_ruby
240
+ "EBNF::Rule.new(#{sym.inspect}, #{id.inspect}, #{expr.inspect}#{', kind: ' + kind.inspect unless kind == :rule})"
241
+ end
242
+
164
243
  ##
165
244
  # Transform EBNF rule to BNF rules:
166
245
  #
167
- # * Transform (a [n] rule (op1 (op2))) into two rules:
168
- # (a [n] rule (op1 _a_1))
169
- # (_a_1 [n.1] rule (op2))
170
- # * Transform (a rule (opt b)) into (a rule (alt _empty b))
171
- # * Transform (a rule (star b)) into (a rule (alt _empty (seq b a)))
172
- # * Transform (a rule (plus b)) into (a rule (seq b (star b)
246
+ # * Transform `(rule a "n" (op1 (op2)))` into two rules:
247
+ #
248
+ # (rule a "n" (op1 _a_1))
249
+ # (rule _a_1 "n.1" (op2))
250
+ # * Transform `(rule a (opt b))` into `(rule a (alt _empty b))`
251
+ # * Transform `(rule a (star b))` into `(rule a (alt _empty (seq b a)))`
252
+ # * Transform `(rule a (plus b))` into `(rule a (seq b (star b)`
253
+ #
254
+ # Transformation includes information used to re-construct non-transformed.
173
255
  #
174
- # Transformation includes information used to re-construct non-transformed
175
256
  # AST representation
176
257
  # @return [Array<Rule>]
177
258
  def to_bnf
@@ -198,19 +279,19 @@ module EBNF
198
279
  new_rules = new_rules.map {|r| r.to_bnf}.flatten
199
280
  elsif expr.first == :opt
200
281
  this = dup
201
- # * Transform (a rule (opt b)) into (a rule (alt _empty b))
282
+ # * Transform (rule a (opt b)) into (rule a (alt _empty b))
202
283
  this.expr = [:alt, :_empty, expr.last]
203
284
  this.cleanup = :opt
204
285
  new_rules = this.to_bnf
205
286
  elsif expr.first == :star
206
- # * Transform (a rule (star b)) into (a rule (alt _empty (seq b a)))
287
+ # * Transform (rule a (star b)) into (rule a (alt _empty (seq b a)))
207
288
  this = dup
208
289
  this.cleanup = :star
209
290
  new_rule = this.build([:seq, expr.last, this.sym], cleanup: :merge)
210
291
  this.expr = [:alt, :_empty, new_rule.sym]
211
292
  new_rules = [this] + new_rule.to_bnf
212
293
  elsif expr.first == :plus
213
- # * Transform (a rule (plus b)) into (a rule (seq b (star b)
294
+ # * Transform (rule a (plus b)) into (rule a (seq b (star b)
214
295
  this = dup
215
296
  this.cleanup = :plus
216
297
  this.expr = [:seq, expr.last, [:star, expr.last]]
@@ -219,7 +300,7 @@ module EBNF
219
300
  # Otherwise, no further transformation necessary
220
301
  new_rules << self
221
302
  elsif [:diff, :hex, :range].include?(expr.first)
222
- # This rules are fine, the just need to be terminals
303
+ # This rules are fine, they just need to be terminals
223
304
  raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.terminal?
224
305
  new_rules << self
225
306
  else
@@ -230,89 +311,73 @@ module EBNF
230
311
  return new_rules
231
312
  end
232
313
 
233
- # Return the non-terminals for this rule. For seq, this is the first
234
- # non-terminals in the seq. For alt, this is every non-terminal ni the alt
235
- # @param [Array<Rule>] ast
236
- # The set of rules, used to turn symbols into rules
314
+ ##
315
+ # Transform EBNF rule for PEG:
316
+ #
317
+ # * Transform `(rule a "n" (op1 ... (op2 y) ...z))` into two rules:
318
+ #
319
+ # (rule a "n" (op1 ... _a_1 ... z))
320
+ # (rule _a_1 "n.1" (op2 y))
321
+ # * Transform `(rule a "n" (diff op1 op2))` into two rules:
322
+ #
323
+ # (rule a "n" (seq _a_1 op1))
324
+ # (rule _a_1 "n.1" (not op1))
325
+ #
237
326
  # @return [Array<Rule>]
238
- def non_terminals(ast)
239
- @non_terms ||= (alt? ? expr[1..-1] : expr[1,1]).map do |sym|
240
- case sym
241
- when Symbol
242
- r = ast.detect {|r| r.sym == sym}
243
- r if r && r.rule?
244
- else
245
- nil
246
- end
247
- end.compact
248
- end
327
+ def to_peg
328
+ new_rules = []
249
329
 
250
- # Return the terminals for this rule. For seq, this is the first
251
- # terminals or strings in the seq. For alt, this is every non-terminal ni the alt
252
- # @param [Array<Rule>] ast
253
- # The set of rules, used to turn symbols into rules
254
- # @return [Array<Rule>]
255
- def terminals(ast)
256
- @terms ||= (alt? ? expr[1..-1] : expr[1,1]).map do |sym|
257
- case sym
258
- when Symbol
259
- r = ast.detect {|r| r.sym == sym}
260
- r if r && r.terminal?
261
- when String
262
- sym
263
- else
264
- nil
330
+ # Look for rules containing sub-sequences
331
+ if expr.any? {|e| e.is_a?(Array) && e.first.is_a?(Symbol)}
332
+ # duplicate ourselves for rewriting
333
+ this = dup
334
+ new_rules << this
335
+
336
+ expr.each_with_index do |e, index|
337
+ next unless e.is_a?(Array) && e.first.is_a?(Symbol)
338
+ new_rule = build(e)
339
+ this.expr[index] = new_rule.sym
340
+ new_rules << new_rule
265
341
  end
266
- end.compact
267
- end
268
342
 
269
- # Does this rule start with a sym? It does if expr is that sym,
270
- # expr starts with alt and contains that sym, or
271
- # expr starts with seq and the next element is that sym
272
- # @param [Symbol, class] sym
273
- # Symbol matching any start element, or if it is String, any start element which is a String
274
- # @return [Array<Symbol, String>] list of symbol (singular), or strings which are start symbol, or nil if there are none
275
- def starts_with?(sym)
276
- if seq? && sym === (v = expr.fetch(1, nil))
277
- [v]
278
- elsif alt? && expr.any? {|e| sym === e}
279
- expr.select {|e| sym === e}
343
+ # Return new rules after recursively applying #to_bnf
344
+ new_rules = new_rules.map {|r| r.to_peg}.flatten
345
+ elsif expr.first == :diff && !terminal?
346
+ this = dup
347
+ new_rule = build([:not, expr[2]])
348
+ this.expr = [:seq, new_rule.sym, expr[1]]
349
+ new_rules << this
350
+ new_rules << new_rule
351
+ elsif [:hex, :istr, :range].include?(expr.first)
352
+ # This rules are fine, they just need to be terminals
353
+ raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.terminal?
354
+ new_rules << self
280
355
  else
281
- nil
356
+ new_rules << self
282
357
  end
358
+
359
+ return new_rules.map {|r| r.extend(EBNF::PEG::Rule)}
283
360
  end
284
361
 
285
- # Do the firsts of this rule include the empty string?
286
- # @return [Boolean]
287
- def first_includes_eps?
288
- @first && @first.include?(:_eps)
289
- end
290
-
291
- # Add terminal as proceding this rule
292
- # @param [Array<Rule, Symbol, String>] terminals
293
- # @return [Integer] if number of terminals added
294
- def add_first(terminals)
295
- @first ||= []
296
- terminals = terminals.map {|t| t.is_a?(Rule) ? t.sym : t} - @first
297
- @first += terminals
298
- terminals.length
299
- end
300
-
301
- # Add terminal as following this rule. Don't add _eps as a follow
362
+ ##
363
+ # For :hex or :range, create a regular expression.
302
364
  #
303
- # @param [Array<Rule, Symbol, String>] terminals
304
- # @return [Integer] if number of terminals added
305
- def add_follow(terminals)
306
- # Remove terminals already in follows, and empty string
307
- terminals = terminals.map {|t| t.is_a?(Rule) ? t.sym : t} - (@follow || []) - [:_eps]
308
- unless terminals.empty?
309
- @follow ||= []
310
- @follow += terminals
365
+ # @return [Regexp]
366
+ def to_regexp
367
+ case expr.first
368
+ when :hex
369
+ Regexp.new(translate_codepoints(expr[1]))
370
+ when :istr
371
+ /#{expr.last}/ui
372
+ when :range
373
+ Regexp.new("[#{translate_codepoints(expr[1])}]")
374
+ else
375
+ raise "Can't turn #{expr.inspect} into a regexp"
311
376
  end
312
- terminals.length
313
377
  end
314
378
 
315
379
  # Is this a terminal?
380
+ #
316
381
  # @return [Boolean]
317
382
  def terminal?
318
383
  kind == :terminal
@@ -340,18 +405,14 @@ module EBNF
340
405
  expr.is_a?(Array) && expr.first == :seq
341
406
  end
342
407
 
343
- # Is this rule of the form (alt ...)?
344
- def alt?
345
- expr.is_a?(Array) && expr.first == :alt
346
- end
347
-
348
408
  def inspect
349
409
  "#<EBNF::Rule:#{object_id} " +
350
410
  {sym: sym, id: id, kind: kind, expr: expr}.inspect +
351
411
  ">"
352
412
  end
353
413
 
354
- # Two rules are equal if they have the same {#sym}, {#kind} and {#expr}
414
+ # Two rules are equal if they have the same {#sym}, {#kind} and {#expr}.
415
+ #
355
416
  # @param [Rule] other
356
417
  # @return [Boolean]
357
418
  def ==(other)
@@ -360,37 +421,259 @@ module EBNF
360
421
  expr == other.expr
361
422
  end
362
423
 
363
- # Two rules are equivalent if they have the same {#expr}
424
+ # Two rules are equivalent if they have the same {#expr}.
425
+ #
364
426
  # @param [Rule] other
365
427
  # @return [Boolean]
366
- def equivalent?(other)
367
- expr == other.expr
428
+ def eql?(other)
429
+ expr == other.expr
368
430
  end
369
431
 
370
- # Rewrite the rule substituting src_rule for dst_rule wherever
371
- # it is used in the production (first level only).
372
- # @param [Rule] src_rule
373
- # @param [Rule] dst_rule
374
- # @return [Rule]
375
- def rewrite(src_rule, dst_rule)
376
- case @expr
377
- when Array
378
- @expr = @expr.map {|e| e == src_rule.sym ? dst_rule.sym : e}
432
+ # Rules compare using their ids
433
+ def <=>(other)
434
+ if id && other.id
435
+ if id == other.id
436
+ id.to_s <=> other.id.to_s
437
+ else
438
+ id.to_f <=> other.id.to_f
439
+ end
379
440
  else
380
- @expr = dst_rule.sym if @expr == src_rule.sym
441
+ sym.to_s <=> other.sym.to_s
381
442
  end
382
- self
383
443
  end
384
444
 
385
- # Rules compare using their ids
386
- def <=>(other)
387
- if id.to_i == other.id.to_i
388
- id.to_s <=> other.id.to_s
445
+ ##
446
+ # Utility function to translate code points of the form '#xN' into ruby unicode characters
447
+ def translate_codepoints(str)
448
+ str.gsub(/#x\h+/) {|c| c[2..-1].scanf("%x").first.chr(Encoding::UTF_8)}
449
+ end
450
+
451
+ # Return the non-terminals for this rule.
452
+ #
453
+ # * `alt` => this is every non-terminal.
454
+ # * `diff` => this is every non-terminal.
455
+ # * `hex` => nil
456
+ # * `istr` => nil
457
+ # * `not` => this is the last expression, if any.
458
+ # * `opt` => this is the last expression, if any.
459
+ # * `plus` => this is the last expression, if any.
460
+ # * `range` => nil
461
+ # * `rept` => this is the last expression, if any.
462
+ # * `seq` => this is the first expression in the sequence, if any.
463
+ # * `star` => this is the last expression, if any.
464
+ #
465
+ # @param [Array<Rule>] ast
466
+ # The set of rules, used to turn symbols into rules
467
+ # @param [Array<Symbol,String,Array>] expr (@expr)
468
+ # The expression to check, defaults to the rule expression.
469
+ # Typically, if the expression is recursive, the embedded expression is called recursively.
470
+ # @return [Array<Rule>]
471
+ # @note this is used for LL(1) tansformation, so rule types are limited
472
+ def non_terminals(ast, expr = @expr)
473
+ ([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).map do |sym|
474
+ case sym
475
+ when Symbol
476
+ r = ast.detect {|r| r.sym == sym}
477
+ r if r && r.rule?
478
+ when Array
479
+ non_terminals(ast, sym)
480
+ else
481
+ nil
482
+ end
483
+ end.flatten.compact.uniq
484
+ end
485
+
486
+ # Return the terminals for this rule.
487
+ #
488
+ # * `alt` => this is every terminal.
489
+ # * `diff` => this is every terminal.
490
+ # * `hex` => nil
491
+ # * `istr` => nil
492
+ # * `not` => this is the last expression, if any.
493
+ # * `opt` => this is the last expression, if any.
494
+ # * `plus` => this is the last expression, if any.
495
+ # * `range` => nil
496
+ # * `rept` => this is the last expression, if any.
497
+ # * `seq` => this is the first expression in the sequence, if any.
498
+ # * `star` => this is the last expression, if any.
499
+ #
500
+ # @param [Array<Rule>] ast
501
+ # The set of rules, used to turn symbols into rules
502
+ # @param [Array<Symbol,String,Array>] expr (@expr)
503
+ # The expression to check, defaults to the rule expression.
504
+ # Typically, if the expression is recursive, the embedded expression is called recursively.
505
+ # @return [Array<Rule>]
506
+ # @note this is used for LL(1) tansformation, so rule types are limited
507
+ def terminals(ast, expr = @expr)
508
+ ([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).map do |sym|
509
+ case sym
510
+ when Symbol
511
+ r = ast.detect {|r| r.sym == sym}
512
+ r if r && r.terminal?
513
+ when String
514
+ sym
515
+ when Array
516
+ terminals(ast, sym)
517
+ end
518
+ end.flatten.compact.uniq
519
+ end
520
+
521
+ # Return the symbols used in the rule.
522
+ #
523
+ # @param [Array<Symbol,String,Array>] expr (@expr)
524
+ # The expression to check, defaults to the rule expression.
525
+ # Typically, if the expression is recursive, the embedded expression is called recursively.
526
+ # @return [Array<Rule>]
527
+ def symbols(expr = @expr)
528
+ expr[1..-1].map do |sym|
529
+ case sym
530
+ when Symbol
531
+ sym
532
+ when Array
533
+ symbols(sym)
534
+ end
535
+ end.flatten.compact.uniq
536
+ end
537
+
538
+ ##
539
+ # The following are used for LL(1) transformation.
540
+ ##
541
+
542
+ # Does this rule start with `sym`? It does if expr is that sym,
543
+ # expr starts with alt and contains that sym,
544
+ # or expr starts with seq and the next element is that sym.
545
+ #
546
+ # @param [Symbol, class] sym
547
+ # Symbol matching any start element, or if it is String, any start element which is a String
548
+ # @return [Array<Symbol, String>] list of symbol (singular), or strings which are start symbol, or nil if there are none
549
+ def starts_with?(sym)
550
+ if seq? && sym === (v = expr.fetch(1, nil))
551
+ [v]
552
+ elsif alt? && expr.any? {|e| sym === e}
553
+ expr.select {|e| sym === e}
554
+ else
555
+ nil
556
+ end
557
+ end
558
+
559
+ ##
560
+ # Validate the rule, with respect to an AST.
561
+ #
562
+ # @param [Array<Rule>] ast
563
+ # The set of rules, used to turn symbols into rules
564
+ # @param [Array<Symbol,String,Array>] expr (@expr)
565
+ # The expression to check, defaults to the rule expression.
566
+ # Typically, if the expression is recursive, the embedded expression is called recursively.
567
+ # @raise [RangeError]
568
+ def validate!(ast, expr = @expr)
569
+ op = expr.first
570
+ raise SyntaxError, "Unknown operator: #{op}" unless OP_ARGN.key?(op)
571
+ raise SyntaxError, "Argument count missmatch on operator #{op}, had #{expr.length - 1} expected #{OP_ARGN[op]}" if
572
+ OP_ARGN[op] && OP_ARGN[op] != expr.length - 1
573
+
574
+ # rept operator needs min and max
575
+ if op == :alt
576
+ raise SyntaxError, "alt operation must have at least one operand, had #{expr.length - 1}" unless expr.length > 1
577
+ elsif op == :rept
578
+ raise SyntaxError, "rept operation must an non-negative integer minimum, was #{expr[1]}" unless
579
+ expr[1].is_a?(Integer) && expr[1] >= 0
580
+ raise SyntaxError, "rept operation must an non-negative integer maximum or '*', was #{expr[2]}" unless
581
+ expr[2] == '*' || expr[2].is_a?(Integer) && expr[2] >= 0
582
+ end
583
+
584
+ case op
585
+ when :hex
586
+ raise SyntaxError, "Hex operand must be of form '#xN+': #{sym}" unless expr.last.match?(/^#x\h+$/)
587
+ when :range
588
+ str = expr.last.dup
589
+ str = str[1..-1] if str.start_with?('^')
590
+ str = str[0..-2] if str.end_with?('-') # Allowed at end of range
591
+ scanner = StringScanner.new(str)
592
+ hex = rchar = in_range = false
593
+ while !scanner.eos?
594
+ begin
595
+ if scanner.scan(Terminals::HEX)
596
+ raise SyntaxError if in_range && rchar
597
+ rchar = in_range = false
598
+ hex = true
599
+ elsif scanner.scan(Terminals::R_CHAR)
600
+ raise SyntaxError if in_range && hex
601
+ hex = in_range = false
602
+ rchar = true
603
+ else
604
+ raise(SyntaxError, "Range contains illegal components at offset #{scanner.pos}: was #{expr.last}")
605
+ end
606
+
607
+ if scanner.scan(/\-/)
608
+ raise SyntaxError if in_range
609
+ in_range = true
610
+ end
611
+ rescue SyntaxError
612
+ raise(SyntaxError, "Range contains illegal components at offset #{scanner.pos}: was #{expr.last}")
613
+ end
614
+ end
389
615
  else
390
- id.to_i <=> other.id.to_i
616
+ ([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).each do |sym|
617
+ case sym
618
+ when Symbol
619
+ r = ast.detect {|r| r.sym == sym}
620
+ raise SyntaxError, "No rule found for #{sym}" unless r
621
+ when Array
622
+ validate!(ast, sym)
623
+ when String
624
+ raise SyntaxError, "String must be of the form CHAR*" unless sym.match?(/^#{Terminals::CHAR}*$/)
625
+ end
626
+ end
391
627
  end
392
628
  end
393
629
 
630
+ ##
631
+ # Validate the rule, with respect to an AST.
632
+ #
633
+ # Uses `#validate!` and catches `RangeError`
634
+ #
635
+ # @param [Array<Rule>] ast
636
+ # The set of rules, used to turn symbols into rules
637
+ # @return [Boolean]
638
+ def valid?(ast)
639
+ validate!(ast)
640
+ true
641
+ rescue SyntaxError
642
+ false
643
+ end
644
+
645
+ # Do the firsts of this rule include the empty string?
646
+ #
647
+ # @return [Boolean]
648
+ def first_includes_eps?
649
+ @first && @first.include?(:_eps)
650
+ end
651
+
652
+ # Add terminal as proceding this rule.
653
+ #
654
+ # @param [Array<Rule, Symbol, String>] terminals
655
+ # @return [Integer] if number of terminals added
656
+ def add_first(terminals)
657
+ @first ||= []
658
+ terminals = terminals.map {|t| t.is_a?(Rule) ? t.sym : t} - @first
659
+ @first += terminals
660
+ terminals.length
661
+ end
662
+
663
+ # Add terminal as following this rule. Don't add _eps as a follow
664
+ #
665
+ # @param [Array<Rule, Symbol, String>] terminals
666
+ # @return [Integer] if number of terminals added
667
+ def add_follow(terminals)
668
+ # Remove terminals already in follows, and empty string
669
+ terminals = terminals.map {|t| t.is_a?(Rule) ? t.sym : t} - (@follow || []) - [:_eps]
670
+ unless terminals.empty?
671
+ @follow ||= []
672
+ @follow += terminals
673
+ end
674
+ terminals.length
675
+ end
676
+
394
677
  private
395
678
  def ttl_expr(expr, pfx, depth, is_obj = true)
396
679
  indent = ' ' * depth
@@ -406,17 +689,28 @@ module EBNF
406
689
 
407
690
  case op
408
691
  when :seq, :alt, :diff
692
+ # Multiple operands
409
693
  statements << %{#{indent}#{bra}#{pfx}:#{op} (}
410
694
  expr.each {|a| statements += ttl_expr(a, pfx, depth + 1)}
411
695
  statements << %{#{indent} )#{ket}}
412
- when :opt, :plus, :star
696
+ when :opt, :plus, :star, :not
697
+ # Single operand
413
698
  statements << %{#{indent}#{bra}#{pfx}:#{op} }
414
699
  statements += ttl_expr(expr.first, pfx, depth + 1)
415
700
  statements << %{#{indent} #{ket}} unless ket.empty?
416
- when :_empty, :_eps, :_empty
701
+ when :rept
702
+ # Three operands (min, max and expr)
703
+ statements << %{ #{indent}#{pfx}:min #{expr[0].inspect};}
704
+ statements << %{ #{indent}#{pfx}:max #{expr[1].inspect};}
705
+ statements << %{#{indent}#{bra}#{pfx}:#{op} }
706
+ statements += ttl_expr(expr.last, pfx, depth + 1)
707
+ statements << %{#{indent} #{ket}} unless ket.empty?
708
+ when :_empty, :_eps
417
709
  statements << %{#{indent}"g:#{op.to_s[1..-1]}"}
418
710
  when :"'"
419
711
  statements << %{#{indent}"#{esc(expr)}"}
712
+ when :istr
713
+ statements << %{#{indent}#{bra} re:matches #{expr.first.inspect} #{ket}}
420
714
  when :range
421
715
  statements << %{#{indent}#{bra} re:matches #{cclass(expr.first).inspect} #{ket}}
422
716
  when :hex
@@ -472,7 +766,7 @@ module EBNF
472
766
  def make_sym_id(variation = nil)
473
767
  @id_seq ||= 0
474
768
  @id_seq += 1
475
- ["_#{@sym}_#{@id_seq}#{variation}".to_sym, "#{@id}.#{@id_seq}#{variation}"]
769
+ ["_#{@sym}_#{@id_seq}#{variation}".to_sym, ("#{@id}.#{@id_seq}#{variation}" if @id)]
476
770
  end
477
771
  end
478
772
  end