ebnf 1.1.3 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +221 -198
  3. data/UNLICENSE +1 -1
  4. data/VERSION +1 -1
  5. data/bin/ebnf +40 -21
  6. data/etc/abnf-core.ebnf +52 -0
  7. data/etc/abnf.abnf +121 -0
  8. data/etc/abnf.ebnf +124 -0
  9. data/etc/abnf.sxp +45 -0
  10. data/etc/doap.ttl +23 -15
  11. data/etc/ebnf.ebnf +21 -33
  12. data/etc/ebnf.html +171 -160
  13. data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
  14. data/etc/ebnf.ll1.sxp +182 -183
  15. data/etc/ebnf.peg.rb +90 -0
  16. data/etc/ebnf.peg.sxp +84 -0
  17. data/etc/ebnf.sxp +40 -41
  18. data/etc/iso-ebnf.ebnf +140 -0
  19. data/etc/iso-ebnf.isoebnf +138 -0
  20. data/etc/iso-ebnf.sxp +65 -0
  21. data/etc/sparql.ebnf +4 -4
  22. data/etc/sparql.html +1603 -1751
  23. data/etc/sparql.ll1.sxp +7372 -7372
  24. data/etc/sparql.peg.rb +532 -0
  25. data/etc/sparql.peg.sxp +597 -0
  26. data/etc/sparql.sxp +363 -362
  27. data/etc/turtle.ebnf +3 -3
  28. data/etc/turtle.html +465 -517
  29. data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
  30. data/etc/turtle.ll1.sxp +425 -425
  31. data/etc/turtle.peg.rb +182 -0
  32. data/etc/turtle.peg.sxp +199 -0
  33. data/etc/turtle.sxp +103 -101
  34. data/lib/ebnf.rb +7 -2
  35. data/lib/ebnf/abnf.rb +301 -0
  36. data/lib/ebnf/abnf/core.rb +23 -0
  37. data/lib/ebnf/abnf/meta.rb +111 -0
  38. data/lib/ebnf/base.rb +113 -69
  39. data/lib/ebnf/bnf.rb +1 -26
  40. data/lib/ebnf/ebnf/meta.rb +90 -0
  41. data/lib/ebnf/isoebnf.rb +229 -0
  42. data/lib/ebnf/isoebnf/meta.rb +75 -0
  43. data/lib/ebnf/ll1.rb +138 -6
  44. data/lib/ebnf/ll1/lexer.rb +37 -32
  45. data/lib/ebnf/ll1/parser.rb +113 -73
  46. data/lib/ebnf/ll1/scanner.rb +83 -51
  47. data/lib/ebnf/native.rb +320 -0
  48. data/lib/ebnf/parser.rb +285 -302
  49. data/lib/ebnf/peg.rb +39 -0
  50. data/lib/ebnf/peg/parser.rb +561 -0
  51. data/lib/ebnf/peg/rule.rb +250 -0
  52. data/lib/ebnf/rule.rb +443 -148
  53. data/lib/ebnf/terminals.rb +21 -0
  54. data/lib/ebnf/writer.rb +565 -83
  55. metadata +107 -29
  56. data/etc/sparql.rb +0 -45773
@@ -0,0 +1,320 @@
1
+ module EBNF
2
+ module Native
3
+ ##
4
+ # Native parser for EBNF; less accurate, but appropriate when changing EBNF grammar, itself.
5
+ #
6
+ # Iterate over rule strings.
7
+ # a line that starts with '\[' or '@' starts a new rule
8
+ #
9
+ # @param [StringScanner] scanner
10
+ # @yield rule_string
11
+ # @yieldparam [String] rule_string
12
+ def eachRule(scanner)
13
+ cur_lineno = 1
14
+ r = ''
15
+ until scanner.eos?
16
+ case
17
+ when s = scanner.scan(%r(\s+)m)
18
+ # Eat whitespace
19
+ cur_lineno += s.count("\n")
20
+ #debug("eachRule(ws)") { "[#{cur_lineno}] #{s.inspect}" }
21
+ when s = scanner.scan(%r(/\*([^\*]|\*[^\/])*\*/)m)
22
+ # Eat comments /* .. */
23
+ cur_lineno += s.count("\n")
24
+ debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
25
+ when s = scanner.scan(%r(\(\*([^\*]|\*[^\)])*\*\))m)
26
+ # Eat comments (* .. *)
27
+ cur_lineno += s.count("\n")
28
+ debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
29
+ when s = scanner.scan(%r((#(?!x)|//).*$))
30
+ # Eat comments // & #
31
+ cur_lineno += s.count("\n")
32
+ debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
33
+ when s = scanner.scan(/\A["']/)
34
+ # Found a quote, scan until end of matching quote
35
+ s += scanner.scan_until(/#{scanner.matched}|$/)
36
+ r += s
37
+ when s = scanner.scan(%r(^@terminals))
38
+ #debug("eachRule(@terminals)") { "[#{cur_lineno}] #{s.inspect}" }
39
+ yield(r) unless r.empty?
40
+ @lineno = cur_lineno
41
+ yield(s)
42
+ r = ''
43
+ when s = scanner.scan(/@pass/)
44
+ # Found rule start, if we've already collected a rule, yield it
45
+ #debug("eachRule(@pass)") { "[#{cur_lineno}] #{s.inspect}" }
46
+ yield r unless r.empty?
47
+ @lineno = cur_lineno
48
+ r = s
49
+ when s = scanner.scan(EBNF::Terminals::LHS)
50
+ # Found rule start, if we've already collected a rule, yield it
51
+ yield r unless r.empty?
52
+ #debug("eachRule(rule)") { "[#{cur_lineno}] #{s.inspect}" }
53
+ @lineno = cur_lineno
54
+ r = s
55
+ else
56
+ # Collect until end of line, or start of comment or quote
57
+ s = scanner.scan_until(%r{(?:[/\(]\*)|#(?!x)|//|["']|$})
58
+ if scanner.matched.length > 0
59
+ # Back up scan head before ending match
60
+ scanner.pos = scanner.pos - scanner.matched.length
61
+
62
+ # Remove matched from end of string
63
+ s = s[0..-(scanner.matched.length+1)]
64
+ end
65
+ cur_lineno += s.count("\n")
66
+ #debug("eachRule(rest)") { "[#{cur_lineno}] #{s.inspect}" }
67
+ r += s
68
+ end
69
+ end
70
+ yield r unless r.empty?
71
+ end
72
+
73
+ ##
74
+ # Parse a rule into an optional rule number, a symbol and an expression
75
+ #
76
+ # @param [String] rule
77
+ # @return [Rule]
78
+ def ruleParts(rule)
79
+ num_sym, expr = rule.split('::=', 2).map(&:strip)
80
+ num, sym = num_sym.split(']', 2).map(&:strip)
81
+ num, sym = "", num if sym.nil?
82
+ num = num[1..-1]
83
+ r = Rule.new(sym && sym.to_sym, num, expression(expr).first, ebnf: self)
84
+ debug("ruleParts") { r.inspect }
85
+ r
86
+ end
87
+
88
+ ##
89
+ # Parse a string into an expression tree and a remaining string
90
+ #
91
+ # @example
92
+ # >>> expression("a b c")
93
+ # ((seq a b c) '')
94
+ #
95
+ # >>> expression("a? b+ c*")
96
+ # ((seq (opt a) (plus b) (star c)) '')
97
+ #
98
+ # >>> expression(" | x xlist")
99
+ # ((alt (seq) (seq x xlist)) '')
100
+ #
101
+ # >>> expression("a | (b - c)")
102
+ # ((alt a (diff b c)) '')
103
+ #
104
+ # >>> expression("a b | c d")
105
+ # ((alt (seq a b) (seq c d)) '')
106
+ #
107
+ # >>> expression("a | b | c")
108
+ # ((alt a b c) '')
109
+ #
110
+ # >>> expression("a) b c")
111
+ # (a ' b c')
112
+ #
113
+ # >>> expression("BaseDecl? PrefixDecl*")
114
+ # ((seq (opt BaseDecl) (star PrefixDecl)) '')
115
+ #
116
+ # >>> expression("NCCHAR1 | diff | [0-9] | #x00B7 | [#x0300-#x036F] | \[#x203F-#x2040\]")
117
+ # ((alt NCCHAR1 diff
118
+ # (range '0-9')
119
+ # (hex '#x00B7')
120
+ # (range '#x0300-#x036F')
121
+ # (range, '#x203F-#x2040')) '')
122
+ #
123
+ # @param [String] s
124
+ # @return [Array]
125
+ def expression(s)
126
+ debug("expression") {"(#{s.inspect})"}
127
+ e, s = depth {alt(s)}
128
+ debug {"=> alt returned #{[e, s].inspect}"}
129
+ unless s.to_s.empty?
130
+ t, ss = depth {terminal(s)}
131
+ debug {"=> terminal returned #{[t, ss].inspect}"}
132
+ return [e, ss] if t.is_a?(Array) && t.first == :")"
133
+ end
134
+ [e, s]
135
+ end
136
+
137
+ ##
138
+ # Parse alt
139
+ # >>> alt("a | b | c")
140
+ # ((alt a b c) '')
141
+ # @param [String] s
142
+ # @return [Array]
143
+ def alt(s)
144
+ debug("alt") {"(#{s.inspect})"}
145
+ args = []
146
+ while !s.to_s.empty?
147
+ e, s = depth {seq(s)}
148
+ debug {"=> seq returned #{[e, s].inspect}"}
149
+ if e.to_s.empty?
150
+ break unless args.empty?
151
+ e = [:seq, []] # empty sequence
152
+ end
153
+ args << e
154
+ unless s.to_s.empty?
155
+ t, ss = depth {terminal(s)}
156
+ break unless t[0] == :alt
157
+ s = ss
158
+ end
159
+ end
160
+ args.length > 1 ? [args.unshift(:alt), s] : [e, s]
161
+ end
162
+
163
+ ##
164
+ # parse seq
165
+ #
166
+ # >>> seq("a b c")
167
+ # ((seq a b c) '')
168
+ #
169
+ # >>> seq("a b? c")
170
+ # ((seq a (opt b) c) '')
171
+ def seq(s)
172
+ debug("seq") {"(#{s.inspect})"}
173
+ args = []
174
+ while !s.to_s.empty?
175
+ e, ss = depth {diff(s)}
176
+ debug {"=> diff returned #{[e, ss].inspect}"}
177
+ unless e.to_s.empty?
178
+ args << e
179
+ s = ss
180
+ else
181
+ break;
182
+ end
183
+ end
184
+ if args.length > 1
185
+ [args.unshift(:seq), s]
186
+ elsif args.length == 1
187
+ args + [s]
188
+ else
189
+ ["", s]
190
+ end
191
+ end
192
+
193
+ ##
194
+ # parse diff
195
+ #
196
+ # >>> diff("a - b")
197
+ # ((diff a b) '')
198
+ def diff(s)
199
+ debug("diff") {"(#{s.inspect})"}
200
+ e1, s = depth {postfix(s)}
201
+ debug {"=> postfix returned #{[e1, s].inspect}"}
202
+ unless e1.to_s.empty?
203
+ unless s.to_s.empty?
204
+ t, ss = depth {terminal(s)}
205
+ debug {"diff #{[t, ss].inspect}"}
206
+ if t.is_a?(Array) && t.first == :diff
207
+ s = ss
208
+ e2, s = primary(s)
209
+ unless e2.to_s.empty?
210
+ return [[:diff, e1, e2], s]
211
+ else
212
+ error("diff", "Syntax Error")
213
+ raise SyntaxError, "diff missing second operand"
214
+ end
215
+ end
216
+ end
217
+ end
218
+ [e1, s]
219
+ end
220
+
221
+ ##
222
+ # parse postfix
223
+ #
224
+ # >>> postfix("a b c")
225
+ # (a ' b c')
226
+ #
227
+ # >>> postfix("a? b c")
228
+ # ((opt, a) ' b c')
229
+ def postfix(s)
230
+ debug("postfix") {"(#{s.inspect})"}
231
+ e, s = depth {primary(s)}
232
+ debug {"=> primary returned #{[e, s].inspect}"}
233
+ return ["", s] if e.to_s.empty?
234
+ if !s.to_s.empty?
235
+ t, ss = depth {terminal(s)}
236
+ debug {"=> #{[t, ss].inspect}"}
237
+ if t.is_a?(Array) && [:opt, :star, :plus].include?(t.first)
238
+ return [[t.first, e], ss]
239
+ end
240
+ end
241
+ [e, s]
242
+ end
243
+
244
+ ##
245
+ # parse primary
246
+ #
247
+ # >>> primary("a b c")
248
+ # (a ' b c')
249
+ def primary(s)
250
+ debug("primary") {"(#{s.inspect})"}
251
+ t, s = depth {terminal(s)}
252
+ debug {"=> terminal returned #{[t, s].inspect}"}
253
+ if t.is_a?(Symbol) || t.is_a?(String)
254
+ [t, s]
255
+ elsif %w(range hex).map(&:to_sym).include?(t.first)
256
+ [t, s]
257
+ elsif t.first == :"("
258
+ e, s = depth {expression(s)}
259
+ debug {"=> expression returned #{[e, s].inspect}"}
260
+ [e, s]
261
+ else
262
+ ["", s]
263
+ end
264
+ end
265
+
266
+ ##
267
+ # parse one terminal; return the terminal and the remaining string
268
+ #
269
+ # A terminal is represented as a tuple whose 1st item gives the type;
270
+ # some types have additional info in the tuple.
271
+ #
272
+ # @example
273
+ # >>> terminal("'abc' def")
274
+ # ('abc' ' def')
275
+ #
276
+ # >>> terminal("[0-9]")
277
+ # ((range '0-9') '')
278
+ # >>> terminal("#x00B7")
279
+ # ((hex '#x00B7') '')
280
+ # >>> terminal ("\[#x0300-#x036F\]")
281
+ # ((range '#x0300-#x036F') '')
282
+ # >>> terminal("\[^<>'{}|^`\]-\[#x00-#x20\]")
283
+ # ((range "^<>'{}|^`") '-\[#x00-#x20\]')
284
+ def terminal(s)
285
+ s = s.strip
286
+ #STDERR.puts s.inspect
287
+ case m = s[0,1]
288
+ when '"', "'" # STRING1 or STRING2
289
+ l, s = s[1..-1].split(m.rstrip, 2)
290
+ [LL1::Lexer.unescape_string(l), s]
291
+ when '[' # RANGE, O_RANGE
292
+ l, s = s[1..-1].split(/(?<=[^\\])\]/, 2)
293
+ [[:range, LL1::Lexer.unescape_string(l)], s]
294
+ when '#' # HEX
295
+ s.match(/(#x\h+)(.*)$/)
296
+ l, s = $1, $2
297
+ [[:hex, l], s]
298
+ when /[\w\.]/ # SYMBOL
299
+ s.match(/([\w\.]+)(.*)$/)
300
+ l, s = $1, $2
301
+ [l.to_sym, s]
302
+ when '-'
303
+ [[:diff], s[1..-1]]
304
+ when '?'
305
+ [[:opt], s[1..-1]]
306
+ when '|'
307
+ [[:alt], s[1..-1]]
308
+ when '+'
309
+ [[:plus], s[1..-1]]
310
+ when '*'
311
+ [[:star], s[1..-1]]
312
+ when /[\(\)]/ # '(' or ')'
313
+ [[m.to_sym], s[1..-1]]
314
+ else
315
+ error("terminal", "unrecognized terminal: #{s.inspect}")
316
+ raise SyntaxError, "unrecognized terminal: #{s.inspect}"
317
+ end
318
+ end
319
+ end
320
+ end
@@ -1,322 +1,305 @@
1
+ require_relative 'ebnf/meta'
2
+ require 'logger'
3
+
1
4
  module EBNF
2
- module Parser
3
- ##
4
- # Iterate over rule strings.
5
- # a line that starts with '\[' or '@' starts a new rule
6
- #
7
- # @param [StringScanner] scanner
8
- # @yield rule_string
9
- # @yieldparam [String] rule_string
10
- def eachRule(scanner)
11
- cur_lineno = 1
12
- r = ''
13
- until scanner.eos?
14
- case
15
- when s = scanner.scan(%r(\s+)m)
16
- # Eat whitespace
17
- cur_lineno += s.count("\n")
18
- #debug("eachRule(ws)") { "[#{cur_lineno}] #{s.inspect}" }
19
- when s = scanner.scan(%r(/\*([^\*]|\*[^\/])*\*/)m)
20
- # Eat comments /* .. */
21
- cur_lineno += s.count("\n")
22
- debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
23
- when s = scanner.scan(%r(\(\*([^\*]|\*[^\)])*\*\))m)
24
- # Eat comments (* .. *)
25
- cur_lineno += s.count("\n")
26
- debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
27
- when s = scanner.scan(%r((#(?!x)|//).*$))
28
- # Eat comments // & #
29
- cur_lineno += s.count("\n")
30
- debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
31
- when s = scanner.scan(/\A["']/)
32
- # Found a quote, scan until end of matching quote
33
- s += scanner.scan_until(/#{scanner.matched}|$/)
34
- r += s
35
- when s = scanner.scan(%r(^@terminals))
36
- #debug("eachRule(@terminals)") { "[#{cur_lineno}] #{s.inspect}" }
37
- yield(r) unless r.empty?
38
- @lineno = cur_lineno
39
- yield(s)
40
- r = ''
41
- when s = scanner.scan(/@pass/)
42
- # Found rule start, if we've already collected a rule, yield it
43
- #debug("eachRule(@pass)") { "[#{cur_lineno}] #{s.inspect}" }
44
- yield r unless r.empty?
45
- @lineno = cur_lineno
46
- r = s
47
- when s = scanner.scan(/(?:\[[\w\.]+\])\s*[\w\.]+\s*::=/)
48
- # Found rule start, if we've already collected a rule, yield it
49
- yield r unless r.empty?
50
- #debug("eachRule(rule)") { "[#{cur_lineno}] #{s.inspect}" }
51
- @lineno = cur_lineno
52
- r = s
53
- else
54
- # Collect until end of line, or start of comment or quote
55
- s = scanner.scan_until(%r{(?:[/\(]\*)|#(?!x)|//|["']|$})
56
- if scanner.matched.length > 0
57
- # Back up scan head before ending match
58
- scanner.pos = scanner.pos - scanner.matched.length
5
+ class Parser
6
+ include EBNF::PEG::Parser
7
+ include EBNF::Terminals
59
8
 
60
- # Remove matched from end of string
61
- s = s[0..-(scanner.matched.length+1)]
62
- end
63
- cur_lineno += s.count("\n")
64
- #debug("eachRule(rest)") { "[#{cur_lineno}] #{s.inspect}" }
65
- r += s
66
- end
67
- end
68
- yield r unless r.empty?
9
+ # Abstract syntax tree from parse
10
+ #
11
+ # @return [Array<EBNF::Rule>]
12
+ attr_reader :ast
13
+
14
+ # ## Terminals
15
+ # Define rules for Terminals, placing results on the input stack, making them available to upstream non-Terminal rules.
16
+ #
17
+ # Terminals are defined with a symbol matching the associated rule name, and an optional (although strongly encouraged) regular expression used to match the head of the input stream.
18
+ #
19
+ # The result of the terminal block is the semantic value of that terminal, which if often a string, but may be any instance which reflects the semantic interpretation of that terminal.
20
+ #
21
+ # The `value` parameter is the value matched by the regexp, if defined, or by the sub-terminal rules otherwise.
22
+ #
23
+ # The `prod` parameter is the name of the parent rule for which this terminal is matched, which may have a bearing in some circumstances, although not used in this example.
24
+ #
25
+ # If no block is provided, then the value which would have been passed to the block is used as the result directly.
26
+
27
+ # Match the Left hand side of a rule or terminal
28
+ #
29
+ # [11] LHS ::= ('[' SYMBOL+ ']' ' '+)? SYMBOL ' '* '::='
30
+ terminal(:LHS, LHS) do |value, prod|
31
+ value.to_s.scan(/(?:\[([^\]]+)\])?\s*(\w+)\s*::=/).first
32
+ end
33
+
34
+ # Match `SYMBOL` terminal
35
+ #
36
+ # [12] SYMBOL ::= ([a-z] | [A-Z] | [0-9] | '_' | '.')+
37
+ terminal(:SYMBOL, SYMBOL) do |value|
38
+ value.to_sym
69
39
  end
70
-
71
- ##
72
- # Parse a rule into an optional rule number, a symbol and an expression
73
- #
74
- # @param [String] rule
75
- # @return [Rule]
76
- def ruleParts(rule)
77
- num_sym, expr = rule.split('::=', 2).map(&:strip)
78
- num, sym = num_sym.split(']', 2).map(&:strip)
79
- num, sym = "", num if sym.nil?
80
- num = num[1..-1]
81
- r = Rule.new(sym && sym.to_sym, num, expression(expr).first, ebnf: self)
82
- debug("ruleParts") { r.inspect }
83
- r
40
+
41
+ # Match `HEX` terminal
42
+ #
43
+ # [13] HEX ::= #x' ([a-f] | [A-F] | [0-9])+
44
+ terminal(:HEX, HEX) do |value|
45
+ [:hex, value]
84
46
  end
85
47
 
86
- ##
87
- # Parse a string into an expression tree and a remaining string
88
- #
89
- # @example
90
- # >>> expression("a b c")
91
- # ((seq a b c) '')
92
- #
93
- # >>> expression("a? b+ c*")
94
- # ((seq (opt a) (plus b) (star c)) '')
95
- #
96
- # >>> expression(" | x xlist")
97
- # ((alt (seq) (seq x xlist)) '')
98
- #
99
- # >>> expression("a | (b - c)")
100
- # ((alt a (diff b c)) '')
101
- #
102
- # >>> expression("a b | c d")
103
- # ((alt (seq a b) (seq c d)) '')
104
- #
105
- # >>> expression("a | b | c")
106
- # ((alt a b c) '')
107
- #
108
- # >>> expression("a) b c")
109
- # (a ' b c')
110
- #
111
- # >>> expression("BaseDecl? PrefixDecl*")
112
- # ((seq (opt BaseDecl) (star PrefixDecl)) '')
113
- #
114
- # >>> expression("NCCHAR1 | diff | [0-9] | #x00B7 | [#x0300-#x036F] | \[#x203F-#x2040\]")
115
- # ((alt NCCHAR1 diff
116
- # (range '0-9')
117
- # (hex '#x00B7')
118
- # (range '#x0300-#x036F')
119
- # (range, '#x203F-#x2040')) '')
120
- #
121
- # @param [String] s
122
- # @return [Array]
123
- def expression(s)
124
- debug("expression") {"(#{s.inspect})"}
125
- e, s = depth {alt(s)}
126
- debug {"=> alt returned #{[e, s].inspect}"}
127
- unless s.to_s.empty?
128
- t, ss = depth {terminal(s)}
129
- debug {"=> terminal returned #{[t, ss].inspect}"}
130
- return [e, ss] if t.is_a?(Array) && t.first == :")"
131
- end
132
- [e, s]
48
+ # Terminal for `RANGE` is matched as part of a `primary` rule.
49
+ #
50
+ # [14] RANGE ::= '[' ((R_CHAR '-' R_CHAR) | (HEX '-' HEX) | R_CHAR | HEX)+ '-'? ']' - LHS
51
+ terminal(:RANGE, RANGE) do |value|
52
+ [:range, value[1..-2]]
133
53
  end
134
-
135
- ##
136
- # Parse alt
137
- # >>> alt("a | b | c")
138
- # ((alt a b c) '')
139
- # @param [String] s
140
- # @return [Array]
141
- def alt(s)
142
- debug("alt") {"(#{s.inspect})"}
143
- args = []
144
- while !s.to_s.empty?
145
- e, s = depth {seq(s)}
146
- debug {"=> seq returned #{[e, s].inspect}"}
147
- if e.to_s.empty?
148
- break unless args.empty?
149
- e = [:seq, []] # empty sequence
150
- end
151
- args << e
152
- unless s.to_s.empty?
153
- t, ss = depth {terminal(s)}
154
- break unless t[0] == :alt
155
- s = ss
156
- end
157
- end
158
- args.length > 1 ? [args.unshift(:alt), s] : [e, s]
54
+
55
+ # Terminal for `O_RANGE` is matched as part of a `primary` rule.
56
+ #
57
+ # [15] O_RANGE ::= '[^' ((R_CHAR '-' R_CHAR) | (HEX '-' HEX) | R_CHAR | HEX)+ '-'? ']'
58
+ terminal(:O_RANGE, O_RANGE) do |value|
59
+ [:range, value[1..-2]]
159
60
  end
160
-
161
- ##
162
- # parse seq
163
- #
164
- # >>> seq("a b c")
165
- # ((seq a b c) '')
166
- #
167
- # >>> seq("a b? c")
168
- # ((seq a (opt b) c) '')
169
- def seq(s)
170
- debug("seq") {"(#{s.inspect})"}
171
- args = []
172
- while !s.to_s.empty?
173
- e, ss = depth {diff(s)}
174
- debug {"=> diff returned #{[e, ss].inspect}"}
175
- unless e.to_s.empty?
176
- args << e
177
- s = ss
178
- else
179
- break;
180
- end
181
- end
182
- if args.length > 1
183
- [args.unshift(:seq), s]
184
- elsif args.length == 1
185
- args + [s]
61
+
62
+ # Match double quote string
63
+ #
64
+ # [16] STRING1 ::= '"' (CHAR - '"')* '"'
65
+ terminal(:STRING1, STRING1) do |value|
66
+ value[1..-2]
67
+ end
68
+
69
+ # Match single quote string
70
+ #
71
+ # [17] STRING2 ::= "'" (CHAR - "'")* "'"
72
+ terminal(:STRING2, STRING2) do |value|
73
+ value[1..-2]
74
+ end
75
+
76
+ # The `CHAR` and `R_CHAR` productions are not used explicitly
77
+
78
+ # Match `POSTFIX` terminal
79
+ #
80
+ # [20] POSTFIX ::= [?*+]
81
+ terminal(:POSTFIX, POSTFIX)
82
+
83
+ # The `PASS` productions is not used explicitly
84
+
85
+ # ## Non-terminal productions
86
+ # Define productions for non-Termainals. This can include `start_production` as well as `production` to hook into rule start and end. In some cases, we need to use sub-productions as generated when turning EBNF into PEG.
87
+ #
88
+ # Productions are defined with a symbol matching the associated rule name.
89
+ #
90
+ # The result of the productions is typically the abstract syntax tree matched by the rule, so far, but could be a specific semantic value, or could be ignored with the result being returned via the `callback`.
91
+ #
92
+ # The `value` parameter is the result returned from child productions
93
+ #
94
+ # The `data` parameter other data which may be returned by child productions placing information onto their input (unused in this example).
95
+ #
96
+ # The `callback` parameter provides access to a callback defined in the call to `parse`).
97
+
98
+ # Production for end of `declaration` non-terminal.
99
+ #
100
+ # Look for `@terminals` to change parser state to parsing terminals.
101
+ #
102
+ # Clears the packrat parser when called.
103
+ #
104
+ # `@pass` is ignored here.
105
+ #
106
+ # [2] declaration ::= '@terminals' | pass
107
+ production(:declaration, clear_packrat: true) do |value, data, callback|
108
+ # value contains a declaration.
109
+ # Invoke callback
110
+ callback.call(:terminals) if value == '@terminals'
111
+ nil
112
+ end
113
+
114
+ # Production for end of `rule` non-terminal.
115
+ #
116
+ # By setting `as_hash: true` in the `start_production`, the `value` parameter will be in the form `{LHS: "v", expression: "v"}`. Otherwise, it would be expressed using an array of hashes of the form `[{LHS: "v"}, {expression: "v"}]`.
117
+ #
118
+ # Clears the packrat parser when called.
119
+ #
120
+ # Create rule from expression value and pass to callback
121
+ #
122
+ # [3] rule ::= LHS expression
123
+ start_production(:rule, as_hash: true)
124
+ production(:rule, clear_packrat: true) do |value, data, callback|
125
+ # value contains an expression.
126
+ # Invoke callback
127
+ id, sym = value[:LHS]
128
+ expression = value[:expression]
129
+ callback.call(:rule, EBNF::Rule.new(sym.to_sym, id, expression))
130
+ nil
131
+ end
132
+
133
+ # Production for end of `expression` non-terminal.
134
+ # Passes through the optimized value of the alt production as follows:
135
+ #
136
+ # The `value` parameter, is of the form `[{alt: "v"}]`.
137
+ #
138
+ # [:alt foo] => foo
139
+ # [:alt foo bar] => [:alt foo bar]
140
+ #
141
+ # [4] expression ::= alt
142
+ production(:expression) do |value|
143
+ value.first[:alt]
144
+ end
145
+
146
+ # Production for end of `alt` non-terminal.
147
+ # Passes through the optimized value of the seq production as follows:
148
+ #
149
+ # The `value` parameter, is of the form `{seq: "v", _alt_1: "v"}`.
150
+ #
151
+ # [:seq foo] => foo
152
+ # [:seq foo bar] => [:seq foo bar]
153
+ #
154
+ # Note that this also may just pass through from `_alt_1`
155
+ #
156
+ # [5] alt ::= seq ('|' seq)*
157
+ start_production(:alt, as_hash: true)
158
+ production(:alt) do |value|
159
+ if value[:_alt_1].length > 0
160
+ [:alt, value[:seq]] + value[:_alt_1]
186
161
  else
187
- ["", s]
162
+ value[:seq]
188
163
  end
189
164
  end
190
-
191
- ##
192
- # parse diff
193
- #
194
- # >>> diff("a - b")
195
- # ((diff a b) '')
196
- def diff(s)
197
- debug("diff") {"(#{s.inspect})"}
198
- e1, s = depth {postfix(s)}
199
- debug {"=> postfix returned #{[e1, s].inspect}"}
200
- unless e1.to_s.empty?
201
- unless s.to_s.empty?
202
- t, ss = depth {terminal(s)}
203
- debug {"diff #{[t, ss].inspect}"}
204
- if t.is_a?(Array) && t.first == :diff
205
- s = ss
206
- e2, s = primary(s)
207
- unless e2.to_s.empty?
208
- return [[:diff, e1, e2], s]
209
- else
210
- error("diff", "Syntax Error")
211
- raise "Syntax Error"
212
- end
213
- end
214
- end
215
- end
216
- [e1, s]
165
+
166
+ # Production for end of `_alt_1` non-terminal.
167
+ # Used to collect the `('|' seq)*` portion of the `alt` non-terminal:
168
+ #
169
+ # The `value` parameter, is of the form `[{seq: ["v"]}]`.
170
+ #
171
+ # [5] _alt_1 ::= ('|' seq)*
172
+ production(:_alt_1) do |value|
173
+ value.map {|a1| a1.last[:seq]}.compact # Get rid of '|'
217
174
  end
218
-
219
- ##
220
- # parse postfix
221
- #
222
- # >>> postfix("a b c")
223
- # (a ' b c')
224
- #
225
- # >>> postfix("a? b c")
226
- # ((opt, a) ' b c')
227
- def postfix(s)
228
- debug("postfix") {"(#{s.inspect})"}
229
- e, s = depth {primary(s)}
230
- debug {"=> primary returned #{[e, s].inspect}"}
231
- return ["", s] if e.to_s.empty?
232
- if !s.to_s.empty?
233
- t, ss = depth {terminal(s)}
234
- debug {"=> #{[t, ss].inspect}"}
235
- if t.is_a?(Array) && [:opt, :star, :plus].include?(t.first)
236
- return [[t.first, e], ss]
237
- end
238
- end
239
- [e, s]
175
+
176
+ # Production for end of `seq` non-terminal.
177
+ # Passes through the optimized value of the `diff` production as follows:
178
+ #
179
+ # The `value` parameter, is an array of values, which cannot be empty.
180
+ #
181
+ # [:diff foo] => foo
182
+ # [:diff foo bar] => [:diff foo bar]
183
+ #
184
+ # Note that this also may just pass through from `_seq_1`
185
+ #
186
+ # [6] seq ::= diff+
187
+ production(:seq) do |value|
188
+ value.length == 1 ? value.first : ([:seq] + value)
240
189
  end
241
190
 
242
- ##
243
- # parse primary
244
- #
245
- # >>> primary("a b c")
246
- # (a ' b c')
247
- def primary(s)
248
- debug("primary") {"(#{s.inspect})"}
249
- t, s = depth {terminal(s)}
250
- debug {"=> terminal returned #{[t, s].inspect}"}
251
- if t.is_a?(Symbol) || t.is_a?(String)
252
- [t, s]
253
- elsif %w(range hex).map(&:to_sym).include?(t.first)
254
- [t, s]
255
- elsif t.first == :"("
256
- e, s = depth {expression(s)}
257
- debug {"=> expression returned #{[e, s].inspect}"}
258
- [e, s]
191
+ # `Diff` production returns concatenated postfix values
192
+ #
193
+ # The `value` parameter, is of the form `{postfix: "v", _diff_1: "v"}`.
194
+ #
195
+ # [7] diff ::= postfix ('-' postfix)?
196
+ start_production(:diff, as_hash: true)
197
+ production(:diff) do |value|
198
+ if value[:_diff_1]
199
+ [:diff, value[:postfix], value[:_diff_1]]
259
200
  else
260
- ["", s]
201
+ value[:postfix]
261
202
  end
262
203
  end
263
-
264
- ##
265
- # parse one terminal; return the terminal and the remaining string
266
- #
267
- # A terminal is represented as a tuple whose 1st item gives the type;
268
- # some types have additional info in the tuple.
269
- #
270
- # @example
271
- # >>> terminal("'abc' def")
272
- # ('abc' ' def')
273
- #
274
- # >>> terminal("[0-9]")
275
- # ((range '0-9') '')
276
- # >>> terminal("#x00B7")
277
- # ((hex '#x00B7') '')
278
- # >>> terminal ("\[#x0300-#x036F\]")
279
- # ((range '#x0300-#x036F') '')
280
- # >>> terminal("\[^<>'{}|^`\]-\[#x00-#x20\]")
281
- # ((range "^<>'{}|^`") '-\[#x00-#x20\]')
282
- def terminal(s)
283
- s = s.strip
284
- #STDERR.puts s.inspect
285
- case m = s[0,1]
286
- when '"', "'" # STRING1 or STRING2
287
- l, s = s[1..-1].split(m.rstrip, 2)
288
- [LL1::Lexer.unescape_string(l), s]
289
- when '[' # RANGE, O_RANGE
290
- l, s = s[1..-1].split(/(?<=[^\\])\]/, 2)
291
- [[:range, LL1::Lexer.unescape_string(l)], s]
292
- when '#' # HEX
293
- s.match(/(#x\h+)(.*)$/)
294
- l, s = $1, $2
295
- [[:hex, l], s]
296
- when /[\w\.]/ # SYMBOL
297
- s.match(/([\w\.]+)(.*)$/)
298
- l, s = $1, $2
299
- [l.to_sym, s]
300
- when '@' # @pass or @terminals
301
- s.match(/@(#\w+)(.*)$/)
302
- l, s = $1, $2
303
- [[:"@", l], s]
304
- when '-'
305
- [[:diff], s[1..-1]]
306
- when '?'
307
- [[:opt], s[1..-1]]
308
- when '|'
309
- [[:alt], s[1..-1]]
310
- when '+'
311
- [[:plus], s[1..-1]]
312
- when '*'
313
- [[:star], s[1..-1]]
314
- when /[\(\)]/ # '(' or ')'
315
- [[m.to_sym], s[1..-1]]
316
- else
317
- error("terminal", "unrecognized terminal: #{s.inspect}")
318
- raise "Syntax Error, unrecognized terminal: #{s.inspect}"
204
+
205
+ production(:_diff_1) do |value|
206
+ value.last[:postfix] if value
207
+ end
208
+
209
+ # Production for end of `postfix` non-terminal.
210
+ # Either returns the `primary` production value, or as modified by the `postfix`.
211
+ #
212
+ # The `value` parameter, is of the form `{primary: "v", _postfix_1: "v"}`.
213
+ #
214
+ # [:primary] => [:primary]
215
+ # [:primary, '*'] => [:star, :primary]
216
+ # [:primary, '+'] => [:plus, :primary]
217
+ # [:primary, '?'] => [:opt, :primary]
218
+ #
219
+ # [8] postfix ::= primary POSTFIX?
220
+ start_production(:postfix, as_hash: true)
221
+ production(:postfix) do |value|
222
+ # Push result onto input stack, as the `diff` production can have some number of `postfix` values that are applied recursively
223
+ case value[:_postfix_1]
224
+ when "*" then [:star, value[:primary]]
225
+ when "+" then [:plus, value[:primary]]
226
+ when "?" then [:opt, value[:primary]]
227
+ else value[:primary]
228
+ end
229
+ end
230
+
231
+ # Production for end of `primary` non-terminal.
232
+ # Places `:primary` on the stack
233
+ #
234
+ # The `value` parameter, is either a string (for a terminal) or an array of the form `['(': '(', expression: "v", ')', ')']`.
235
+ #
236
+ # This may either be a terminal, or the result of an `expression`.
237
+ #
238
+ # [9] primary ::= HEX
239
+ # | SYMBOL
240
+ # | RANGE
241
+ # | O_RANGE
242
+ # | STRING1
243
+ # | STRING2
244
+ # | '(' expression ')'
245
+ production(:primary) do |value|
246
+ Array(value).length > 2 ? value[1][:expression] : value
247
+ end
248
+
249
+ # Production for end of pass non-terminal.
250
+ #
251
+ # [10] pass ::= '@pass' expression
252
+ production(:pass) do |value, data, callback|
253
+ # Invoke callback
254
+ callback.call(:pass, value.last[:expression])
255
+ end
256
+
257
+ # ## Parser invocation.
258
+ # On start, yield ourselves if a block is given, otherwise, return this parser instance
259
+ #
260
+ # @param [#read, #to_s] input
261
+ # @param [Hash{Symbol => Object}] options
262
+ # @option options [Boolean] :level
263
+ # Trace level. 0(debug), 1(info), 2(warn), 3(error).
264
+ # @return [EBNFParser]
265
+ def initialize(input, **options, &block)
266
+ # If the `level` option is set, instantiate a logger for collecting trace information.
267
+ if options.has_key?(:level)
268
+ options[:logger] = Logger.new(STDERR)
269
+ options[:logger].level = options[:level]
270
+ options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
271
+ end
272
+
273
+ # Read input, if necessary, which will be used in a Scanner.
274
+ @input = input.respond_to?(:read) ? input.read : input.to_s
275
+
276
+ parsing_terminals = false
277
+ @ast = []
278
+ parse(@input, :ebnf, EBNFMeta::RULES,
279
+ # Use an optimized Regexp for whitespace
280
+ whitespace: EBNF::Terminals::PASS,
281
+ **options
282
+ ) do |context, *data|
283
+ rule = case context
284
+ when :terminals
285
+ # After parsing `@terminals`
286
+ # This changes the state of the parser to treat subsequent rules as terminals.
287
+ parsing_terminals = true
288
+ rule = EBNF::Rule.new(nil, nil, data.first, kind: :terminals)
289
+ when :pass
290
+ # After parsing `@pass`
291
+ # This defines a specific rule for whitespace.
292
+ rule = EBNF::Rule.new(nil, nil, data.first, kind: :pass)
293
+ when :rule
294
+ # A rule which has already been turned into a `Rule` object.
295
+ rule = data.first
296
+ rule.kind = :terminal if parsing_terminals
297
+ rule
298
+ end
299
+ @ast << rule if rule
319
300
  end
301
+ rescue EBNF::PEG::Parser::Error => e
302
+ raise SyntaxError, e.message
320
303
  end
321
304
  end
322
305
  end