ebnf 1.2.0 → 2.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +223 -199
  3. data/UNLICENSE +1 -1
  4. data/VERSION +1 -1
  5. data/bin/ebnf +38 -19
  6. data/etc/abnf-core.ebnf +52 -0
  7. data/etc/abnf.abnf +121 -0
  8. data/etc/abnf.ebnf +124 -0
  9. data/etc/abnf.sxp +45 -0
  10. data/etc/doap.ttl +23 -18
  11. data/etc/ebnf.ebnf +21 -33
  12. data/etc/ebnf.html +76 -160
  13. data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
  14. data/etc/ebnf.ll1.sxp +182 -183
  15. data/etc/ebnf.peg.rb +90 -0
  16. data/etc/ebnf.peg.sxp +84 -0
  17. data/etc/ebnf.sxp +40 -41
  18. data/etc/iso-ebnf.ebnf +140 -0
  19. data/etc/iso-ebnf.isoebnf +138 -0
  20. data/etc/iso-ebnf.sxp +65 -0
  21. data/etc/sparql.ebnf +4 -4
  22. data/etc/sparql.html +1603 -1751
  23. data/etc/sparql.ll1.sxp +7372 -7372
  24. data/etc/sparql.peg.rb +532 -0
  25. data/etc/sparql.peg.sxp +597 -0
  26. data/etc/sparql.sxp +363 -362
  27. data/etc/turtle.ebnf +3 -3
  28. data/etc/turtle.html +465 -517
  29. data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
  30. data/etc/turtle.ll1.sxp +425 -425
  31. data/etc/turtle.peg.rb +182 -0
  32. data/etc/turtle.peg.sxp +199 -0
  33. data/etc/turtle.sxp +103 -101
  34. data/lib/ebnf.rb +6 -1
  35. data/lib/ebnf/abnf.rb +301 -0
  36. data/lib/ebnf/abnf/core.rb +23 -0
  37. data/lib/ebnf/abnf/meta.rb +111 -0
  38. data/lib/ebnf/base.rb +114 -69
  39. data/lib/ebnf/bnf.rb +1 -26
  40. data/lib/ebnf/ebnf/meta.rb +90 -0
  41. data/lib/ebnf/isoebnf.rb +229 -0
  42. data/lib/ebnf/isoebnf/meta.rb +75 -0
  43. data/lib/ebnf/ll1.rb +131 -3
  44. data/lib/ebnf/ll1/lexer.rb +20 -22
  45. data/lib/ebnf/ll1/parser.rb +97 -64
  46. data/lib/ebnf/ll1/scanner.rb +82 -50
  47. data/lib/ebnf/native.rb +320 -0
  48. data/lib/ebnf/parser.rb +285 -302
  49. data/lib/ebnf/peg.rb +39 -0
  50. data/lib/ebnf/peg/parser.rb +561 -0
  51. data/lib/ebnf/peg/rule.rb +250 -0
  52. data/lib/ebnf/rule.rb +442 -148
  53. data/lib/ebnf/terminals.rb +21 -0
  54. data/lib/ebnf/writer.rb +587 -82
  55. metadata +125 -18
  56. data/etc/sparql.rb +0 -45773
@@ -3,7 +3,7 @@ require 'strscan' unless defined?(StringScanner)
3
3
 
4
4
  module EBNF::LL1
5
5
  ##
6
- # Overload StringScanner with file operations
6
+ # Overload StringScanner with file operations and line counting
7
7
  #
8
8
  # * Reloads scanner as required until EOF.
9
9
  # * Loads to a high-water and reloads when remaining size reaches a low-water.
@@ -14,25 +14,14 @@ module EBNF::LL1
14
14
  LOW_WATER = 4 * 1024
15
15
 
16
16
  ##
17
- # @return [IO, StringIO]
17
+ # @return [String, IO, StringIO]
18
18
  attr_reader :input
19
19
 
20
20
  ##
21
- # If we don't have an IO input, simply use StringScanner directly
22
- # @private
23
- def self.new(input, **options)
24
- input ||= ""
25
- if input.respond_to?(:read)
26
- scanner = self.allocate
27
- scanner.send(:initialize, input, **options)
28
- else
29
- if input.encoding != Encoding::UTF_8
30
- input = input.dup if input.frozen?
31
- input.force_encoding(Encoding::UTF_8)
32
- end
33
- StringScanner.new(input)
34
- end
35
- end
21
+ # The current line number (one-based).
22
+ #
23
+ # @return [Integer]
24
+ attr_accessor :lineno
36
25
 
37
26
  ##
38
27
  # Create a scanner, from an IO
@@ -45,32 +34,23 @@ module EBNF::LL1
45
34
  def initialize(input, **options)
46
35
  @options = options.merge(high_water: HIGH_WATER, low_water: LOW_WATER)
47
36
 
48
- @input = input
49
- super("")
37
+ @previous_lineno = @lineno = 1
38
+ @input = input.is_a?(String) ? encode_utf8(input) : input
39
+ super(input.is_a?(String) ? @input : "")
50
40
  feed_me
51
41
  self
52
42
  end
53
43
 
54
44
  ##
55
- # Returns the "rest" of the line, or the next line if at EOL (i.e. everything after the scan pointer).
56
- # If there is no more data (eos? = true), it returns "".
57
- #
58
- # @return [String]
59
- def rest
60
- feed_me
61
- encode_utf8 super
62
- end
63
-
64
- ##
65
- # Attempts to skip over the given `pattern` beginning with the scan pointer.
66
- # If it matches, the scan pointer is advanced to the end of the match,
67
- # and the length of the match is returned. Otherwise, `nil` is returned.
68
- #
69
- # similar to `scan`, but without returning the matched string.
70
- # @param [Regexp] pattern
71
- def skip(pattern)
72
- feed_me
73
- super
45
+ # Ensures that the input buffer is full to the high water mark, or end of file. Useful when matching tokens that may be longer than the low water mark
46
+ def ensure_buffer_full
47
+ # Read up to high-water mark ensuring we're at an end of line
48
+ if @input.respond_to?(:eof?) && !@input.eof?
49
+ diff = @options[:high_water] - rest_size
50
+ string = encode_utf8(@input.read(diff))
51
+ string << encode_utf8(@input.gets) unless @input.eof?
52
+ self << string if string
53
+ end
74
54
  end
75
55
 
76
56
  ##
@@ -83,10 +63,13 @@ module EBNF::LL1
83
63
  end
84
64
 
85
65
  ##
86
- # Set the scan pointer to the end of the string and clear matching data
87
- def terminate
66
+ # Returns the "rest" of the line, or the next line if at EOL (i.e. everything after the scan pointer).
67
+ # If there is no more data (eos? = true), it returns "".
68
+ #
69
+ # @return [String]
70
+ def rest
88
71
  feed_me
89
- super
72
+ encode_utf8 super
90
73
  end
91
74
 
92
75
  ##
@@ -108,19 +91,68 @@ module EBNF::LL1
108
91
  # @return [String]
109
92
  def scan(pattern)
110
93
  feed_me
111
- encode_utf8 super
94
+ @previous_lineno = @lineno
95
+ if matched = encode_utf8(super)
96
+ @lineno += matched.count("\n")
97
+ end
98
+ matched
112
99
  end
113
100
 
114
101
  ##
115
- # Ensures that the input buffer is full to the high water mark, or end of file. Useful when matching tokens that may be longer than the low water mark
116
- def ensure_buffer_full
117
- # Read up to high-water mark ensuring we're at an end of line
118
- if @input && !@input.eof?
119
- diff = @options[:high_water] - rest_size
120
- string = encode_utf8(@input.read(diff))
121
- string << encode_utf8(@input.gets) unless @input.eof?
122
- self << string if string
102
+ # Scans the string until the pattern is matched. Returns the substring up to and including the end of the match, advancing the scan pointer to that location. If there is no match, nil is returned.
103
+ #
104
+ # @example
105
+ # s = StringScanner.new("Fri Dec 12 1975 14:39")
106
+ # s.scan_until(/1/) # -> "Fri Dec 1"
107
+ # s.pre_match # -> "Fri Dec "
108
+ # s.scan_until(/XYZ/) # -> nil
109
+ #
110
+ # @param [Regexp] pattern
111
+ # @return [String]
112
+ def scan_until(pattern)
113
+ feed_me
114
+ @previous_lineno = @lineno
115
+ if matched = encode_utf8(super)
116
+ @lineno += matched.count("\n")
123
117
  end
118
+ matched
119
+ end
120
+
121
+ ##
122
+ # Attempts to skip over the given `pattern` beginning with the scan pointer.
123
+ # If it matches, the scan pointer is advanced to the end of the match,
124
+ # and the length of the match is returned. Otherwise, `nil` is returned.
125
+ #
126
+ # similar to `scan`, but without returning the matched string.
127
+ # @param [Regexp] pattern
128
+ def skip(pattern)
129
+ scan(pattern)
130
+ nil
131
+ end
132
+
133
+ ##
134
+ # Advances the scan pointer until pattern is matched and consumed. Returns the number of bytes advanced, or nil if no match was found.
135
+ #
136
+ # Look ahead to match pattern, and advance the scan pointer to the end of the match. Return the number of characters advanced, or nil if the match was unsuccessful.
137
+ #
138
+ # It’s similar to scan_until, but without returning the intervening string.
139
+ # @param [Regexp] pattern
140
+ def skip_until(pattern)
141
+ (matched = scan_until(pattern)) && matched.length
142
+ end
143
+
144
+ ##
145
+ # Sets the scan pointer to the previous position. Only one previous position is remembered, and it changes with each scanning operation.
146
+ def unscan
147
+ @lineno = @previous_lineno
148
+ super
149
+ end
150
+
151
+ ##
152
+ # Set the scan pointer to the end of the string and clear matching data
153
+ def terminate
154
+ feed_me
155
+ super
124
156
  end
125
157
 
126
158
  private
@@ -0,0 +1,320 @@
1
+ module EBNF
2
+ module Native
3
+ ##
4
+ # Native parser for EBNF; less accurate, but appropriate when changing EBNF grammar, itself.
5
+ #
6
+ # Iterate over rule strings.
7
+ # a line that starts with '\[' or '@' starts a new rule
8
+ #
9
+ # @param [StringScanner] scanner
10
+ # @yield rule_string
11
+ # @yieldparam [String] rule_string
12
+ def eachRule(scanner)
13
+ cur_lineno = 1
14
+ r = ''
15
+ until scanner.eos?
16
+ case
17
+ when s = scanner.scan(%r(\s+)m)
18
+ # Eat whitespace
19
+ cur_lineno += s.count("\n")
20
+ #debug("eachRule(ws)") { "[#{cur_lineno}] #{s.inspect}" }
21
+ when s = scanner.scan(%r(/\*([^\*]|\*[^\/])*\*/)m)
22
+ # Eat comments /* .. */
23
+ cur_lineno += s.count("\n")
24
+ debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
25
+ when s = scanner.scan(%r(\(\*([^\*]|\*[^\)])*\*\))m)
26
+ # Eat comments (* .. *)
27
+ cur_lineno += s.count("\n")
28
+ debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
29
+ when s = scanner.scan(%r((#(?!x)|//).*$))
30
+ # Eat comments // & #
31
+ cur_lineno += s.count("\n")
32
+ debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
33
+ when s = scanner.scan(/\A["']/)
34
+ # Found a quote, scan until end of matching quote
35
+ s += scanner.scan_until(/#{scanner.matched}|$/)
36
+ r += s
37
+ when s = scanner.scan(%r(^@terminals))
38
+ #debug("eachRule(@terminals)") { "[#{cur_lineno}] #{s.inspect}" }
39
+ yield(r) unless r.empty?
40
+ @lineno = cur_lineno
41
+ yield(s)
42
+ r = ''
43
+ when s = scanner.scan(/@pass/)
44
+ # Found rule start, if we've already collected a rule, yield it
45
+ #debug("eachRule(@pass)") { "[#{cur_lineno}] #{s.inspect}" }
46
+ yield r unless r.empty?
47
+ @lineno = cur_lineno
48
+ r = s
49
+ when s = scanner.scan(EBNF::Terminals::LHS)
50
+ # Found rule start, if we've already collected a rule, yield it
51
+ yield r unless r.empty?
52
+ #debug("eachRule(rule)") { "[#{cur_lineno}] #{s.inspect}" }
53
+ @lineno = cur_lineno
54
+ r = s
55
+ else
56
+ # Collect until end of line, or start of comment or quote
57
+ s = scanner.scan_until(%r{(?:[/\(]\*)|#(?!x)|//|["']|$})
58
+ if scanner.matched.length > 0
59
+ # Back up scan head before ending match
60
+ scanner.pos = scanner.pos - scanner.matched.length
61
+
62
+ # Remove matched from end of string
63
+ s = s[0..-(scanner.matched.length+1)]
64
+ end
65
+ cur_lineno += s.count("\n")
66
+ #debug("eachRule(rest)") { "[#{cur_lineno}] #{s.inspect}" }
67
+ r += s
68
+ end
69
+ end
70
+ yield r unless r.empty?
71
+ end
72
+
73
+ ##
74
+ # Parse a rule into an optional rule number, a symbol and an expression
75
+ #
76
+ # @param [String] rule
77
+ # @return [Rule]
78
+ def ruleParts(rule)
79
+ num_sym, expr = rule.split('::=', 2).map(&:strip)
80
+ num, sym = num_sym.split(']', 2).map(&:strip)
81
+ num, sym = "", num if sym.nil?
82
+ num = num[1..-1]
83
+ r = Rule.new(sym && sym.to_sym, num, expression(expr).first, ebnf: self)
84
+ debug("ruleParts") { r.inspect }
85
+ r
86
+ end
87
+
88
+ ##
89
+ # Parse a string into an expression tree and a remaining string
90
+ #
91
+ # @example
92
+ # >>> expression("a b c")
93
+ # ((seq a b c) '')
94
+ #
95
+ # >>> expression("a? b+ c*")
96
+ # ((seq (opt a) (plus b) (star c)) '')
97
+ #
98
+ # >>> expression(" | x xlist")
99
+ # ((alt (seq) (seq x xlist)) '')
100
+ #
101
+ # >>> expression("a | (b - c)")
102
+ # ((alt a (diff b c)) '')
103
+ #
104
+ # >>> expression("a b | c d")
105
+ # ((alt (seq a b) (seq c d)) '')
106
+ #
107
+ # >>> expression("a | b | c")
108
+ # ((alt a b c) '')
109
+ #
110
+ # >>> expression("a) b c")
111
+ # (a ' b c')
112
+ #
113
+ # >>> expression("BaseDecl? PrefixDecl*")
114
+ # ((seq (opt BaseDecl) (star PrefixDecl)) '')
115
+ #
116
+ # >>> expression("NCCHAR1 | diff | [0-9] | #x00B7 | [#x0300-#x036F] | \[#x203F-#x2040\]")
117
+ # ((alt NCCHAR1 diff
118
+ # (range '0-9')
119
+ # (hex '#x00B7')
120
+ # (range '#x0300-#x036F')
121
+ # (range, '#x203F-#x2040')) '')
122
+ #
123
+ # @param [String] s
124
+ # @return [Array]
125
+ def expression(s)
126
+ debug("expression") {"(#{s.inspect})"}
127
+ e, s = depth {alt(s)}
128
+ debug {"=> alt returned #{[e, s].inspect}"}
129
+ unless s.to_s.empty?
130
+ t, ss = depth {terminal(s)}
131
+ debug {"=> terminal returned #{[t, ss].inspect}"}
132
+ return [e, ss] if t.is_a?(Array) && t.first == :")"
133
+ end
134
+ [e, s]
135
+ end
136
+
137
+ ##
138
+ # Parse alt
139
+ # >>> alt("a | b | c")
140
+ # ((alt a b c) '')
141
+ # @param [String] s
142
+ # @return [Array]
143
+ def alt(s)
144
+ debug("alt") {"(#{s.inspect})"}
145
+ args = []
146
+ while !s.to_s.empty?
147
+ e, s = depth {seq(s)}
148
+ debug {"=> seq returned #{[e, s].inspect}"}
149
+ if e.to_s.empty?
150
+ break unless args.empty?
151
+ e = [:seq, []] # empty sequence
152
+ end
153
+ args << e
154
+ unless s.to_s.empty?
155
+ t, ss = depth {terminal(s)}
156
+ break unless t[0] == :alt
157
+ s = ss
158
+ end
159
+ end
160
+ args.length > 1 ? [args.unshift(:alt), s] : [e, s]
161
+ end
162
+
163
+ ##
164
+ # parse seq
165
+ #
166
+ # >>> seq("a b c")
167
+ # ((seq a b c) '')
168
+ #
169
+ # >>> seq("a b? c")
170
+ # ((seq a (opt b) c) '')
171
+ def seq(s)
172
+ debug("seq") {"(#{s.inspect})"}
173
+ args = []
174
+ while !s.to_s.empty?
175
+ e, ss = depth {diff(s)}
176
+ debug {"=> diff returned #{[e, ss].inspect}"}
177
+ unless e.to_s.empty?
178
+ args << e
179
+ s = ss
180
+ else
181
+ break;
182
+ end
183
+ end
184
+ if args.length > 1
185
+ [args.unshift(:seq), s]
186
+ elsif args.length == 1
187
+ args + [s]
188
+ else
189
+ ["", s]
190
+ end
191
+ end
192
+
193
+ ##
194
+ # parse diff
195
+ #
196
+ # >>> diff("a - b")
197
+ # ((diff a b) '')
198
+ def diff(s)
199
+ debug("diff") {"(#{s.inspect})"}
200
+ e1, s = depth {postfix(s)}
201
+ debug {"=> postfix returned #{[e1, s].inspect}"}
202
+ unless e1.to_s.empty?
203
+ unless s.to_s.empty?
204
+ t, ss = depth {terminal(s)}
205
+ debug {"diff #{[t, ss].inspect}"}
206
+ if t.is_a?(Array) && t.first == :diff
207
+ s = ss
208
+ e2, s = primary(s)
209
+ unless e2.to_s.empty?
210
+ return [[:diff, e1, e2], s]
211
+ else
212
+ error("diff", "Syntax Error")
213
+ raise SyntaxError, "diff missing second operand"
214
+ end
215
+ end
216
+ end
217
+ end
218
+ [e1, s]
219
+ end
220
+
221
+ ##
222
+ # parse postfix
223
+ #
224
+ # >>> postfix("a b c")
225
+ # (a ' b c')
226
+ #
227
+ # >>> postfix("a? b c")
228
+ # ((opt, a) ' b c')
229
+ def postfix(s)
230
+ debug("postfix") {"(#{s.inspect})"}
231
+ e, s = depth {primary(s)}
232
+ debug {"=> primary returned #{[e, s].inspect}"}
233
+ return ["", s] if e.to_s.empty?
234
+ if !s.to_s.empty?
235
+ t, ss = depth {terminal(s)}
236
+ debug {"=> #{[t, ss].inspect}"}
237
+ if t.is_a?(Array) && [:opt, :star, :plus].include?(t.first)
238
+ return [[t.first, e], ss]
239
+ end
240
+ end
241
+ [e, s]
242
+ end
243
+
244
+ ##
245
+ # parse primary
246
+ #
247
+ # >>> primary("a b c")
248
+ # (a ' b c')
249
+ def primary(s)
250
+ debug("primary") {"(#{s.inspect})"}
251
+ t, s = depth {terminal(s)}
252
+ debug {"=> terminal returned #{[t, s].inspect}"}
253
+ if t.is_a?(Symbol) || t.is_a?(String)
254
+ [t, s]
255
+ elsif %w(range hex).map(&:to_sym).include?(t.first)
256
+ [t, s]
257
+ elsif t.first == :"("
258
+ e, s = depth {expression(s)}
259
+ debug {"=> expression returned #{[e, s].inspect}"}
260
+ [e, s]
261
+ else
262
+ ["", s]
263
+ end
264
+ end
265
+
266
+ ##
267
+ # parse one terminal; return the terminal and the remaining string
268
+ #
269
+ # A terminal is represented as a tuple whose 1st item gives the type;
270
+ # some types have additional info in the tuple.
271
+ #
272
+ # @example
273
+ # >>> terminal("'abc' def")
274
+ # ('abc' ' def')
275
+ #
276
+ # >>> terminal("[0-9]")
277
+ # ((range '0-9') '')
278
+ # >>> terminal("#x00B7")
279
+ # ((hex '#x00B7') '')
280
+ # >>> terminal ("\[#x0300-#x036F\]")
281
+ # ((range '#x0300-#x036F') '')
282
+ # >>> terminal("\[^<>'{}|^`\]-\[#x00-#x20\]")
283
+ # ((range "^<>'{}|^`") '-\[#x00-#x20\]')
284
+ def terminal(s)
285
+ s = s.strip
286
+ #STDERR.puts s.inspect
287
+ case m = s[0,1]
288
+ when '"', "'" # STRING1 or STRING2
289
+ l, s = s[1..-1].split(m.rstrip, 2)
290
+ [LL1::Lexer.unescape_string(l), s]
291
+ when '[' # RANGE, O_RANGE
292
+ l, s = s[1..-1].split(/(?<=[^\\])\]/, 2)
293
+ [[:range, LL1::Lexer.unescape_string(l)], s]
294
+ when '#' # HEX
295
+ s.match(/(#x\h+)(.*)$/)
296
+ l, s = $1, $2
297
+ [[:hex, l], s]
298
+ when /[\w\.]/ # SYMBOL
299
+ s.match(/([\w\.]+)(.*)$/)
300
+ l, s = $1, $2
301
+ [l.to_sym, s]
302
+ when '-'
303
+ [[:diff], s[1..-1]]
304
+ when '?'
305
+ [[:opt], s[1..-1]]
306
+ when '|'
307
+ [[:alt], s[1..-1]]
308
+ when '+'
309
+ [[:plus], s[1..-1]]
310
+ when '*'
311
+ [[:star], s[1..-1]]
312
+ when /[\(\)]/ # '(' or ')'
313
+ [[m.to_sym], s[1..-1]]
314
+ else
315
+ error("terminal", "unrecognized terminal: #{s.inspect}")
316
+ raise SyntaxError, "unrecognized terminal: #{s.inspect}"
317
+ end
318
+ end
319
+ end
320
+ end