ebnf 1.1.1 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +218 -196
  3. data/UNLICENSE +1 -1
  4. data/VERSION +1 -1
  5. data/bin/ebnf +40 -21
  6. data/etc/abnf-core.ebnf +52 -0
  7. data/etc/abnf.abnf +121 -0
  8. data/etc/abnf.ebnf +124 -0
  9. data/etc/abnf.sxp +45 -0
  10. data/etc/doap.ttl +13 -12
  11. data/etc/ebnf.ebnf +21 -33
  12. data/etc/ebnf.html +171 -160
  13. data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
  14. data/etc/ebnf.ll1.sxp +182 -183
  15. data/etc/ebnf.peg.rb +90 -0
  16. data/etc/ebnf.peg.sxp +84 -0
  17. data/etc/ebnf.sxp +40 -41
  18. data/etc/iso-ebnf.ebnf +140 -0
  19. data/etc/iso-ebnf.isoebnf +138 -0
  20. data/etc/iso-ebnf.sxp +65 -0
  21. data/etc/sparql.ebnf +4 -4
  22. data/etc/sparql.html +1603 -1751
  23. data/etc/sparql.ll1.sxp +7372 -7372
  24. data/etc/sparql.peg.rb +532 -0
  25. data/etc/sparql.peg.sxp +597 -0
  26. data/etc/sparql.sxp +363 -362
  27. data/etc/turtle.ebnf +3 -3
  28. data/etc/turtle.html +465 -517
  29. data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
  30. data/etc/turtle.ll1.sxp +425 -425
  31. data/etc/turtle.peg.rb +182 -0
  32. data/etc/turtle.peg.sxp +199 -0
  33. data/etc/turtle.sxp +103 -101
  34. data/lib/ebnf.rb +7 -2
  35. data/lib/ebnf/abnf.rb +301 -0
  36. data/lib/ebnf/abnf/core.rb +23 -0
  37. data/lib/ebnf/abnf/meta.rb +111 -0
  38. data/lib/ebnf/base.rb +128 -87
  39. data/lib/ebnf/bnf.rb +1 -26
  40. data/lib/ebnf/ebnf/meta.rb +90 -0
  41. data/lib/ebnf/isoebnf.rb +229 -0
  42. data/lib/ebnf/isoebnf/meta.rb +75 -0
  43. data/lib/ebnf/ll1.rb +140 -8
  44. data/lib/ebnf/ll1/lexer.rb +37 -32
  45. data/lib/ebnf/ll1/parser.rb +113 -73
  46. data/lib/ebnf/ll1/scanner.rb +84 -51
  47. data/lib/ebnf/native.rb +320 -0
  48. data/lib/ebnf/parser.rb +285 -302
  49. data/lib/ebnf/peg.rb +39 -0
  50. data/lib/ebnf/peg/parser.rb +554 -0
  51. data/lib/ebnf/peg/rule.rb +241 -0
  52. data/lib/ebnf/rule.rb +453 -163
  53. data/lib/ebnf/terminals.rb +21 -0
  54. data/lib/ebnf/writer.rb +554 -85
  55. metadata +98 -20
  56. data/etc/sparql.rb +0 -45773
@@ -3,7 +3,7 @@ require 'strscan' unless defined?(StringScanner)
3
3
 
4
4
  module EBNF::LL1
5
5
  ##
6
- # Overload StringScanner with file operations
6
+ # Overload StringScanner with file operations and line counting
7
7
  #
8
8
  # * Reloads scanner as required until EOF.
9
9
  # * Loads to a high-water and reloads when remaining size reaches a low-water.
@@ -14,25 +14,14 @@ module EBNF::LL1
14
14
  LOW_WATER = 4 * 1024
15
15
 
16
16
  ##
17
- # @return [IO, StringIO]
17
+ # @return [String, IO, StringIO]
18
18
  attr_reader :input
19
19
 
20
20
  ##
21
- # If we don't have an IO input, simply use StringScanner directly
22
- # @private
23
- def self.new(input, options = {})
24
- input ||= ""
25
- if input.respond_to?(:read)
26
- scanner = self.allocate
27
- scanner.send(:initialize, input, options)
28
- else
29
- if input.encoding != Encoding::UTF_8
30
- input = input.dup if input.frozen?
31
- input.force_encoding(Encoding::UTF_8)
32
- end
33
- StringScanner.new(input)
34
- end
35
- end
21
+ # The current line number (one-based).
22
+ #
23
+ # @return [Integer]
24
+ attr_accessor :lineno
36
25
 
37
26
  ##
38
27
  # Create a scanner, from an IO
@@ -42,35 +31,26 @@ module EBNF::LL1
42
31
  # @option options[Integer] :high_water (HIGH_WATER)
43
32
  # @option options[Integer] :low_water (LOW_WATER)
44
33
  # @return [Scanner]
45
- def initialize(input, options = {})
34
+ def initialize(input, **options)
46
35
  @options = options.merge(high_water: HIGH_WATER, low_water: LOW_WATER)
47
36
 
48
- @input = input
49
- super("")
37
+ @previous_lineno = @lineno = 1
38
+ @input = input.is_a?(String) ? encode_utf8(input) : input
39
+ super(input.is_a?(String) ? @input : "")
50
40
  feed_me
51
41
  self
52
42
  end
53
43
 
54
44
  ##
55
- # Returns the "rest" of the line, or the next line if at EOL (i.e. everything after the scan pointer).
56
- # If there is no more data (eos? = true), it returns "".
57
- #
58
- # @return [String]
59
- def rest
60
- feed_me
61
- encode_utf8 super
62
- end
63
-
64
- ##
65
- # Attempts to skip over the given `pattern` beginning with the scan pointer.
66
- # If it matches, the scan pointer is advanced to the end of the match,
67
- # and the length of the match is returned. Otherwise, `nil` is returned.
68
- #
69
- # similar to `scan`, but without returning the matched string.
70
- # @param [Regexp] pattern
71
- def skip(pattern)
72
- feed_me
73
- super
45
+ # Ensures that the input buffer is full to the high water mark, or end of file. Useful when matching tokens that may be longer than the low water mark
46
+ def ensure_buffer_full
47
+ # Read up to high-water mark ensuring we're at an end of line
48
+ if @input.respond_to?(:eof?) && !@input.eof?
49
+ diff = @options[:high_water] - rest_size
50
+ string = encode_utf8(@input.read(diff))
51
+ string << encode_utf8(@input.gets) unless @input.eof?
52
+ self << string if string
53
+ end
74
54
  end
75
55
 
76
56
  ##
@@ -83,10 +63,14 @@ module EBNF::LL1
83
63
  end
84
64
 
85
65
  ##
86
- # Set the scan pointer to the end of the string and clear matching data
87
- def terminate
66
+ # Returns the "rest" of the line, or the next line if at EOL (i.e. everything after the scan pointer).
67
+ # If there is no more data (eos? = true), it returns "".
68
+ #
69
+ # @return [String]
70
+ def rest
88
71
  feed_me
89
- super
72
+ @lineno += 1 if eos?
73
+ encode_utf8 super
90
74
  end
91
75
 
92
76
  ##
@@ -108,19 +92,68 @@ module EBNF::LL1
108
92
  # @return [String]
109
93
  def scan(pattern)
110
94
  feed_me
111
- encode_utf8 super
95
+ @previous_lineno = @lineno
96
+ if matched = encode_utf8(super)
97
+ @lineno += matched.count("\n")
98
+ end
99
+ matched
112
100
  end
113
101
 
114
102
  ##
115
- # Ensures that the input buffer is full to the high water mark, or end of file. Useful when matching tokens that may be longer than the low water mark
116
- def ensure_buffer_full
117
- # Read up to high-water mark ensuring we're at an end of line
118
- if @input && !@input.eof?
119
- diff = @options[:high_water] - rest_size
120
- string = encode_utf8(@input.read(diff))
121
- string << encode_utf8(@input.gets) unless @input.eof?
122
- self << string if string
103
+ # Scans the string until the pattern is matched. Returns the substring up to and including the end of the match, advancing the scan pointer to that location. If there is no match, nil is returned.
104
+ #
105
+ # @example
106
+ # s = StringScanner.new("Fri Dec 12 1975 14:39")
107
+ # s.scan_until(/1/) # -> "Fri Dec 1"
108
+ # s.pre_match # -> "Fri Dec "
109
+ # s.scan_until(/XYZ/) # -> nil
110
+ #
111
+ # @param [Regexp] pattern
112
+ # @return [String]
113
+ def scan_until(pattern)
114
+ feed_me
115
+ @previous_lineno = @lineno
116
+ if matched = encode_utf8(super)
117
+ @lineno += matched.count("\n")
123
118
  end
119
+ matched
120
+ end
121
+
122
+ ##
123
+ # Attempts to skip over the given `pattern` beginning with the scan pointer.
124
+ # If it matches, the scan pointer is advanced to the end of the match,
125
+ # and the length of the match is returned. Otherwise, `nil` is returned.
126
+ #
127
+ # similar to `scan`, but without returning the matched string.
128
+ # @param [Regexp] pattern
129
+ def skip(pattern)
130
+ scan(pattern)
131
+ nil
132
+ end
133
+
134
+ ##
135
+ # Advances the scan pointer until pattern is matched and consumed. Returns the number of bytes advanced, or nil if no match was found.
136
+ #
137
+ # Look ahead to match pattern, and advance the scan pointer to the end of the match. Return the number of characters advanced, or nil if the match was unsuccessful.
138
+ #
139
+ # It’s similar to scan_until, but without returning the intervening string.
140
+ # @param [Regexp] pattern
141
+ def skip_until(pattern)
142
+ (matched = scan_until(pattern)) && matched.length
143
+ end
144
+
145
+ ##
146
+ # Sets the scan pointer to the previous position. Only one previous position is remembered, and it changes with each scanning operation.
147
+ def unscan
148
+ @lineno = @previous_lineno
149
+ super
150
+ end
151
+
152
+ ##
153
+ # Set the scan pointer to the end of the string and clear matching data
154
+ def terminate
155
+ feed_me
156
+ super
124
157
  end
125
158
 
126
159
  private
@@ -0,0 +1,320 @@
1
+ module EBNF
2
+ module Native
3
+ ##
4
+ # Native parser for EBNF; less accurate, but appropriate when changing EBNF grammar, itself.
5
+ #
6
+ # Iterate over rule strings.
7
+ # a line that starts with '\[' or '@' starts a new rule
8
+ #
9
+ # @param [StringScanner] scanner
10
+ # @yield rule_string
11
+ # @yieldparam [String] rule_string
12
+ def eachRule(scanner)
13
+ cur_lineno = 1
14
+ r = ''
15
+ until scanner.eos?
16
+ case
17
+ when s = scanner.scan(%r(\s+)m)
18
+ # Eat whitespace
19
+ cur_lineno += s.count("\n")
20
+ #debug("eachRule(ws)") { "[#{cur_lineno}] #{s.inspect}" }
21
+ when s = scanner.scan(%r(/\*([^\*]|\*[^\/])*\*/)m)
22
+ # Eat comments /* .. */
23
+ cur_lineno += s.count("\n")
24
+ debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
25
+ when s = scanner.scan(%r(\(\*([^\*]|\*[^\)])*\*\))m)
26
+ # Eat comments (* .. *)
27
+ cur_lineno += s.count("\n")
28
+ debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
29
+ when s = scanner.scan(%r((#(?!x)|//).*$))
30
+ # Eat comments // & #
31
+ cur_lineno += s.count("\n")
32
+ debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
33
+ when s = scanner.scan(/\A["']/)
34
+ # Found a quote, scan until end of matching quote
35
+ s += scanner.scan_until(/#{scanner.matched}|$/)
36
+ r += s
37
+ when s = scanner.scan(%r(^@terminals))
38
+ #debug("eachRule(@terminals)") { "[#{cur_lineno}] #{s.inspect}" }
39
+ yield(r) unless r.empty?
40
+ @lineno = cur_lineno
41
+ yield(s)
42
+ r = ''
43
+ when s = scanner.scan(/@pass/)
44
+ # Found rule start, if we've already collected a rule, yield it
45
+ #debug("eachRule(@pass)") { "[#{cur_lineno}] #{s.inspect}" }
46
+ yield r unless r.empty?
47
+ @lineno = cur_lineno
48
+ r = s
49
+ when s = scanner.scan(EBNF::Terminals::LHS)
50
+ # Found rule start, if we've already collected a rule, yield it
51
+ yield r unless r.empty?
52
+ #debug("eachRule(rule)") { "[#{cur_lineno}] #{s.inspect}" }
53
+ @lineno = cur_lineno
54
+ r = s
55
+ else
56
+ # Collect until end of line, or start of comment or quote
57
+ s = scanner.scan_until(%r{(?:[/\(]\*)|#(?!x)|//|["']|$})
58
+ if scanner.matched.length > 0
59
+ # Back up scan head before ending match
60
+ scanner.pos = scanner.pos - scanner.matched.length
61
+
62
+ # Remove matched from end of string
63
+ s = s[0..-(scanner.matched.length+1)]
64
+ end
65
+ cur_lineno += s.count("\n")
66
+ #debug("eachRule(rest)") { "[#{cur_lineno}] #{s.inspect}" }
67
+ r += s
68
+ end
69
+ end
70
+ yield r unless r.empty?
71
+ end
72
+
73
+ ##
74
+ # Parse a rule into an optional rule number, a symbol and an expression
75
+ #
76
+ # @param [String] rule
77
+ # @return [Rule]
78
+ def ruleParts(rule)
79
+ num_sym, expr = rule.split('::=', 2).map(&:strip)
80
+ num, sym = num_sym.split(']', 2).map(&:strip)
81
+ num, sym = "", num if sym.nil?
82
+ num = num[1..-1]
83
+ r = Rule.new(sym && sym.to_sym, num, expression(expr).first, ebnf: self)
84
+ debug("ruleParts") { r.inspect }
85
+ r
86
+ end
87
+
88
+ ##
89
+ # Parse a string into an expression tree and a remaining string
90
+ #
91
+ # @example
92
+ # >>> expression("a b c")
93
+ # ((seq a b c) '')
94
+ #
95
+ # >>> expression("a? b+ c*")
96
+ # ((seq (opt a) (plus b) (star c)) '')
97
+ #
98
+ # >>> expression(" | x xlist")
99
+ # ((alt (seq) (seq x xlist)) '')
100
+ #
101
+ # >>> expression("a | (b - c)")
102
+ # ((alt a (diff b c)) '')
103
+ #
104
+ # >>> expression("a b | c d")
105
+ # ((alt (seq a b) (seq c d)) '')
106
+ #
107
+ # >>> expression("a | b | c")
108
+ # ((alt a b c) '')
109
+ #
110
+ # >>> expression("a) b c")
111
+ # (a ' b c')
112
+ #
113
+ # >>> expression("BaseDecl? PrefixDecl*")
114
+ # ((seq (opt BaseDecl) (star PrefixDecl)) '')
115
+ #
116
+ # >>> expression("NCCHAR1 | diff | [0-9] | #x00B7 | [#x0300-#x036F] | \[#x203F-#x2040\]")
117
+ # ((alt NCCHAR1 diff
118
+ # (range '0-9')
119
+ # (hex '#x00B7')
120
+ # (range '#x0300-#x036F')
121
+ # (range, '#x203F-#x2040')) '')
122
+ #
123
+ # @param [String] s
124
+ # @return [Array]
125
+ def expression(s)
126
+ debug("expression") {"(#{s.inspect})"}
127
+ e, s = depth {alt(s)}
128
+ debug {"=> alt returned #{[e, s].inspect}"}
129
+ unless s.to_s.empty?
130
+ t, ss = depth {terminal(s)}
131
+ debug {"=> terminal returned #{[t, ss].inspect}"}
132
+ return [e, ss] if t.is_a?(Array) && t.first == :")"
133
+ end
134
+ [e, s]
135
+ end
136
+
137
+ ##
138
+ # Parse alt
139
+ # >>> alt("a | b | c")
140
+ # ((alt a b c) '')
141
+ # @param [String] s
142
+ # @return [Array]
143
+ def alt(s)
144
+ debug("alt") {"(#{s.inspect})"}
145
+ args = []
146
+ while !s.to_s.empty?
147
+ e, s = depth {seq(s)}
148
+ debug {"=> seq returned #{[e, s].inspect}"}
149
+ if e.to_s.empty?
150
+ break unless args.empty?
151
+ e = [:seq, []] # empty sequence
152
+ end
153
+ args << e
154
+ unless s.to_s.empty?
155
+ t, ss = depth {terminal(s)}
156
+ break unless t[0] == :alt
157
+ s = ss
158
+ end
159
+ end
160
+ args.length > 1 ? [args.unshift(:alt), s] : [e, s]
161
+ end
162
+
163
+ ##
164
+ # parse seq
165
+ #
166
+ # >>> seq("a b c")
167
+ # ((seq a b c) '')
168
+ #
169
+ # >>> seq("a b? c")
170
+ # ((seq a (opt b) c) '')
171
+ def seq(s)
172
+ debug("seq") {"(#{s.inspect})"}
173
+ args = []
174
+ while !s.to_s.empty?
175
+ e, ss = depth {diff(s)}
176
+ debug {"=> diff returned #{[e, ss].inspect}"}
177
+ unless e.to_s.empty?
178
+ args << e
179
+ s = ss
180
+ else
181
+ break;
182
+ end
183
+ end
184
+ if args.length > 1
185
+ [args.unshift(:seq), s]
186
+ elsif args.length == 1
187
+ args + [s]
188
+ else
189
+ ["", s]
190
+ end
191
+ end
192
+
193
+ ##
194
+ # parse diff
195
+ #
196
+ # >>> diff("a - b")
197
+ # ((diff a b) '')
198
+ def diff(s)
199
+ debug("diff") {"(#{s.inspect})"}
200
+ e1, s = depth {postfix(s)}
201
+ debug {"=> postfix returned #{[e1, s].inspect}"}
202
+ unless e1.to_s.empty?
203
+ unless s.to_s.empty?
204
+ t, ss = depth {terminal(s)}
205
+ debug {"diff #{[t, ss].inspect}"}
206
+ if t.is_a?(Array) && t.first == :diff
207
+ s = ss
208
+ e2, s = primary(s)
209
+ unless e2.to_s.empty?
210
+ return [[:diff, e1, e2], s]
211
+ else
212
+ error("diff", "Syntax Error")
213
+ raise SyntaxError, "diff missing second operand"
214
+ end
215
+ end
216
+ end
217
+ end
218
+ [e1, s]
219
+ end
220
+
221
+ ##
222
+ # parse postfix
223
+ #
224
+ # >>> postfix("a b c")
225
+ # (a ' b c')
226
+ #
227
+ # >>> postfix("a? b c")
228
+ # ((opt, a) ' b c')
229
+ def postfix(s)
230
+ debug("postfix") {"(#{s.inspect})"}
231
+ e, s = depth {primary(s)}
232
+ debug {"=> primary returned #{[e, s].inspect}"}
233
+ return ["", s] if e.to_s.empty?
234
+ if !s.to_s.empty?
235
+ t, ss = depth {terminal(s)}
236
+ debug {"=> #{[t, ss].inspect}"}
237
+ if t.is_a?(Array) && [:opt, :star, :plus].include?(t.first)
238
+ return [[t.first, e], ss]
239
+ end
240
+ end
241
+ [e, s]
242
+ end
243
+
244
+ ##
245
+ # parse primary
246
+ #
247
+ # >>> primary("a b c")
248
+ # (a ' b c')
249
+ def primary(s)
250
+ debug("primary") {"(#{s.inspect})"}
251
+ t, s = depth {terminal(s)}
252
+ debug {"=> terminal returned #{[t, s].inspect}"}
253
+ if t.is_a?(Symbol) || t.is_a?(String)
254
+ [t, s]
255
+ elsif %w(range hex).map(&:to_sym).include?(t.first)
256
+ [t, s]
257
+ elsif t.first == :"("
258
+ e, s = depth {expression(s)}
259
+ debug {"=> expression returned #{[e, s].inspect}"}
260
+ [e, s]
261
+ else
262
+ ["", s]
263
+ end
264
+ end
265
+
266
+ ##
267
+ # parse one terminal; return the terminal and the remaining string
268
+ #
269
+ # A terminal is represented as a tuple whose 1st item gives the type;
270
+ # some types have additional info in the tuple.
271
+ #
272
+ # @example
273
+ # >>> terminal("'abc' def")
274
+ # ('abc' ' def')
275
+ #
276
+ # >>> terminal("[0-9]")
277
+ # ((range '0-9') '')
278
+ # >>> terminal("#x00B7")
279
+ # ((hex '#x00B7') '')
280
+ # >>> terminal ("\[#x0300-#x036F\]")
281
+ # ((range '#x0300-#x036F') '')
282
+ # >>> terminal("\[^<>'{}|^`\]-\[#x00-#x20\]")
283
+ # ((range "^<>'{}|^`") '-\[#x00-#x20\]')
284
+ def terminal(s)
285
+ s = s.strip
286
+ #STDERR.puts s.inspect
287
+ case m = s[0,1]
288
+ when '"', "'" # STRING1 or STRING2
289
+ l, s = s[1..-1].split(m.rstrip, 2)
290
+ [LL1::Lexer.unescape_string(l), s]
291
+ when '[' # RANGE, O_RANGE
292
+ l, s = s[1..-1].split(/(?<=[^\\])\]/, 2)
293
+ [[:range, LL1::Lexer.unescape_string(l)], s]
294
+ when '#' # HEX
295
+ s.match(/(#x\h+)(.*)$/)
296
+ l, s = $1, $2
297
+ [[:hex, l], s]
298
+ when /[\w\.]/ # SYMBOL
299
+ s.match(/([\w\.]+)(.*)$/)
300
+ l, s = $1, $2
301
+ [l.to_sym, s]
302
+ when '-'
303
+ [[:diff], s[1..-1]]
304
+ when '?'
305
+ [[:opt], s[1..-1]]
306
+ when '|'
307
+ [[:alt], s[1..-1]]
308
+ when '+'
309
+ [[:plus], s[1..-1]]
310
+ when '*'
311
+ [[:star], s[1..-1]]
312
+ when /[\(\)]/ # '(' or ')'
313
+ [[m.to_sym], s[1..-1]]
314
+ else
315
+ error("terminal", "unrecognized terminal: #{s.inspect}")
316
+ raise SyntaxError, "unrecognized terminal: #{s.inspect}"
317
+ end
318
+ end
319
+ end
320
+ end