ebnf 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
- module RDF::LL1
2
- require 'rdf/ll1/scanner' unless defined?(Scanner)
1
+ module EBNF::LL1
2
+ require 'ebnf/ll1/scanner' unless defined?(Scanner)
3
3
 
4
4
  ##
5
5
  # A lexical analyzer
@@ -10,13 +10,13 @@ module RDF::LL1
10
10
  # ...
11
11
  # ]
12
12
  # ttl = "@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ."
13
- # lexer = RDF::LL1::Lexer.tokenize(ttl, terminals)
13
+ # lexer = EBNF::LL1::Lexer.tokenize(ttl, terminals)
14
14
  # lexer.each_token do |token|
15
15
  # puts token.inspect
16
16
  # end
17
17
  #
18
18
  # @example Tokenizing and returning a token stream
19
- # lexer = RDF::LL1::Lexer.tokenize(...)
19
+ # lexer = EBNF::LL1::Lexer.tokenize(...)
20
20
  # while :some-condition
21
21
  # token = lexer.first # Get the current token
22
22
  # token = lexer.shift # Get the current token and shift to the next
@@ -24,8 +24,8 @@ module RDF::LL1
24
24
  #
25
25
  # @example Handling error conditions
26
26
  # begin
27
- # RDF::Turtle::Lexer.tokenize(query)
28
- # rescue RDF::Turtle::Lexer::Error => error
27
+ # EBNF::LL1::Lexer.tokenize(query)
28
+ # rescue EBNF::LL1::Lexer::Error => error
29
29
  # warn error.inspect
30
30
  # end
31
31
  #
@@ -307,7 +307,7 @@ module RDF::LL1
307
307
  # Represents a lexer token.
308
308
  #
309
309
  # @example Creating a new token
310
- # token = RDF::LL1::Lexer::Token.new(:LANGTAG, "en")
310
+ # token = EBNF::LL1::Lexer::Token.new(:LANGTAG, "en")
311
311
  # token.type #=> :LANGTAG
312
312
  # token.value #=> "en"
313
313
  #
@@ -369,10 +369,10 @@ module RDF::LL1
369
369
  # of this token.
370
370
  #
371
371
  # @example Matching using the symbolic type
372
- # RDF::LL1::Lexer::Token.new(:NIL) === :NIL #=> true
372
+ # EBNF::LL1::Lexer::Token.new(:NIL) === :NIL #=> true
373
373
  #
374
374
  # @example Matching using the string value
375
- # RDF::LL1::Lexer::Token.new(nil, "{") === "{" #=> true
375
+ # EBNF::LL1::Lexer::Token.new(nil, "{") === "{" #=> true
376
376
  #
377
377
  # @param [Symbol, String] value
378
378
  # @return [Boolean]
@@ -425,7 +425,7 @@ module RDF::LL1
425
425
  # Raised for errors during lexical analysis.
426
426
  #
427
427
  # @example Raising a lexer error
428
- # raise RDF::LL1::Lexer::Error.new(
428
+ # raise EBNF::LL1::Lexer::Error.new(
429
429
  # "invalid token '%' on line 10",
430
430
  # :input => query, :token => '%', :lineno => 9)
431
431
  #
@@ -472,4 +472,4 @@ module RDF::LL1
472
472
  end
473
473
  end
474
474
  end # class Lexer
475
- end # module RDF::Turtle
475
+ end # module EBNF
@@ -1,7 +1,6 @@
1
- require 'rdf'
2
- require 'rdf/ll1/lexer'
1
+ require 'ebnf/ll1/lexer'
3
2
 
4
- module RDF::LL1
3
+ module EBNF::LL1
5
4
  ##
6
5
  # A Generic LL1 parser using a lexer and branch tables defined using the SWAP tool chain (modified).
7
6
  module Parser
@@ -33,9 +32,9 @@ module RDF::LL1
33
32
  #
34
33
  # @param [Symbol] term
35
34
  # Term which is a key in the branch table
36
- # @yield [reader, phase, input, current]
37
- # @yieldparam [RDF::Reader] reader
38
- # Reader instance
35
+ # @yield [parse, phase, input, current]
36
+ # @yieldparam [Object] parse
37
+ # Parser instance
39
38
  # @yieldparam [Symbol] phase
40
39
  # Phase of parsing, one of :start, or :finish
41
40
  # @yieldparam [Hash] input
@@ -45,7 +44,7 @@ module RDF::LL1
45
44
  # may be initialized with data to pass to further productions,
46
45
  # during :finish, it contains data placed by earlier productions
47
46
  # @yieldparam [Prod] block
48
- # Block passed to initialization for yielding to calling reader.
47
+ # Block passed to initialization for yielding to calling parser.
49
48
  # Should conform to the yield specs for #initialize
50
49
  # Yield to generate a triple
51
50
  def production(term, &block)
@@ -66,9 +65,9 @@ module RDF::LL1
66
65
  # @param [Hash] options
67
66
  # @option options [Boolean] :unescape
68
67
  # Cause strings and codepoints to be unescaped.
69
- # @yield [reader, term, token, input]
70
- # @yieldparam [RDF::Reader] reader
71
- # Reader instance
68
+ # @yield [parser, term, token, input]
69
+ # @yieldparam [Object] parser
70
+ # Parser instance
72
71
  # @yieldparam [Symbol] term
73
72
  # A symbol indicating the production which referenced this terminal
74
73
  # @yieldparam [String] token
@@ -76,7 +75,7 @@ module RDF::LL1
76
75
  # @yieldparam [Hash] input
77
76
  # A Hash containing input from the parent production
78
77
  # @yieldparam [Prod] block
79
- # Block passed to initialization for yielding to calling reader.
78
+ # Block passed to initialization for yielding to calling parser.
80
79
  # Should conform to the yield specs for #initialize
81
80
  def terminal(term, regexp, options = {}, &block)
82
81
  @@patterns ||= []
@@ -96,10 +95,10 @@ module RDF::LL1
96
95
  # @example
97
96
  # require 'rdf/ll1/parser'
98
97
  #
99
- # class Reader << RDF::Reader
100
- # include RDF::LL1::Parser
98
+ # class MyParser
99
+ # include EBNF::LL1::Parser
101
100
  #
102
- # branch RDF::Turtle::Reader::BRANCH
101
+ # branch MyParser::BRANCH
103
102
  #
104
103
  # ##
105
104
  # # Defines a production called during different phases of parsing
@@ -107,14 +106,14 @@ module RDF::LL1
107
106
  # # current production
108
107
  # #
109
108
  # # Yield to generate a triple
110
- # production :object do |reader, phase, input, current|
109
+ # production :object do |parser, phase, input, current|
111
110
  # object = current[:resource]
112
111
  # yield :statement, RDF::Statement.new(input[:subject], input[:predicate], object)
113
112
  # end
114
113
  #
115
114
  # ##
116
115
  # # Defines the pattern for a terminal node
117
- # terminal :BLANK_NODE_LABEL, %r(_:(#{PN_LOCAL})) do |reader, production, token, input|
116
+ # terminal :BLANK_NODE_LABEL, %r(_:(#{PN_LOCAL})) do |parser, production, token, input|
118
117
  # input[:BLANK_NODE_LABEL] = RDF::Node.new(token)
119
118
  # end
120
119
  #
@@ -138,29 +137,26 @@ module RDF::LL1
138
137
  # end
139
138
  #
140
139
  # @param [String, #to_s] input
141
- # @param [Symbol, #to_s] prod The starting production for the parser.
142
- # It may be a URI from the grammar, or a symbol representing the local_name portion of the grammar URI.
140
+ # @param [Symbol, #to_s] prod The starting production for the parser. It may be a URI from the grammar, or a symbol representing the local_name portion of the grammar URI.
143
141
  # @param [Hash{Symbol => Object}] options
144
- # @option options [Hash{Symbol,String => Hash{Symbol,String => Array<Symbol,String>}}] :branch
145
- # LL1 branch table.
142
+ # @option options [Hash{Symbol,String => Hash{Symbol,String => Array<Symbol,String>}}] :branch LL1 branch table.
146
143
  # @option options [HHash{Symbol,String => Array<Symbol,String>}] :first ({})
147
144
  # Lists valid terminals that can precede each production (for error recovery).
148
- # @option options [HHash{Symbol,String => Array<Symbol,String>}] :follow ({})
145
+ # @option options [Hash{Symbol,String => Array<Symbol,String>}] :follow ({})
149
146
  # Lists valid terminals that can follow each production (for error recovery).
150
147
  # @option options [Boolean] :validate (false)
151
- # whether to validate the parsed statements and values. If not validating,
152
- # the parser will attempt to recover from errors.
148
+ # whether to validate the parsed statements and values. If not validating, the parser will attempt to recover from errors.
153
149
  # @option options [Boolean] :progress
154
150
  # Show progress of parser productions
155
151
  # @option options [Boolean] :debug
156
152
  # Detailed debug output
157
153
  # @yield [context, *data]
158
- # Yields for to return data to reader
154
+ # Yields for to return data to parser
159
155
  # @yieldparam [:statement, :trace] context
160
156
  # Context for block
161
157
  # @yieldparam [Symbol] *data
162
158
  # Data specific to the call
163
- # @return [RDF::LL1::Parser]
159
+ # @return [EBNF::LL1::Parser]
164
160
  # @see http://cs.adelaide.edu.au/~charles/lt/Lectures/07-ErrorRecovery.pdf
165
161
  def parse(input = nil, prod = nil, options = {}, &block)
166
162
  @options = options.dup
@@ -179,7 +175,7 @@ module RDF::LL1
179
175
  raise Error, "Starting production not defined" unless prod
180
176
 
181
177
  @prod_data = [{}]
182
- prod = RDF::URI(prod).fragment.to_sym unless prod.is_a?(Symbol)
178
+ prod = prod.split('#').last.to_sym unless prod.is_a?(Symbol)
183
179
  todo_stack = [{:prod => prod, :terms => nil}]
184
180
 
185
181
  while !todo_stack.empty?
@@ -216,8 +212,8 @@ module RDF::LL1
216
212
  end
217
213
 
218
214
  if sequence.nil?
219
- if prod_branch.has_key?(:"ebnf:empty")
220
- debug("parse(production)", :level => 2) {"empty sequence for ebnf:empty"}
215
+ if prod_branch.has_key?(:_empty)
216
+ debug("parse(production)", :level => 2) {"empty sequence for _empty"}
221
217
  else
222
218
  # If there is no sequence for this production, we're
223
219
  # in error recovery, and _token_ has been advanced to
@@ -357,7 +353,7 @@ module RDF::LL1
357
353
  # If this token can be used by the top production, return it
358
354
  # Otherwise, if the banch table allows empty, also return the token
359
355
  return token if !@recovering && (
360
- (@branch[cur_prod] && @branch[cur_prod].has_key?(:"ebnf:empty")) ||
356
+ (@branch[cur_prod] && @branch[cur_prod].has_key?(:_empty)) ||
361
357
  first.any? {|t| token === t})
362
358
 
363
359
  # Otherwise, it's an error condition, and skip either until
@@ -417,7 +413,7 @@ module RDF::LL1
417
413
  def get_token
418
414
  token = begin
419
415
  @lexer.first
420
- rescue RDF::LL1::Lexer::Error => e
416
+ rescue EBNF::LL1::Lexer::Error => e
421
417
  # Recover from lexer error
422
418
  @lineno = e.lineno
423
419
  error("get_token", "With input '#{e.input}': #{e.message}",
@@ -537,5 +533,5 @@ module RDF::LL1
537
533
  super(message.to_s)
538
534
  end
539
535
  end # class Error
540
- end # class Reader
541
- end # module RDF::Turtle
536
+ end # class Parser
537
+ end # module EBNF::LL1
@@ -1,6 +1,6 @@
1
1
  require 'strscan' unless defined?(StringScanner)
2
2
 
3
- module RDF::LL1
3
+ module EBNF::LL1
4
4
  ##
5
5
  # Overload StringScanner with file operations
6
6
  #
@@ -0,0 +1,297 @@
1
+ module EBNF
2
+ module Parser
3
+ ##
4
+ # Iterate over rule strings.
5
+ # a line that starts with '\[' or '@' starts a new rule
6
+ #
7
+ # @param [StringScanner] scanner
8
+ # @yield rule_string
9
+ # @yieldparam [String] rule_string
10
+ def eachRule(scanner)
11
+ cur_lineno = 1
12
+ r = ''
13
+ until scanner.eos?
14
+ case
15
+ when s = scanner.scan(%r(\s+)m)
16
+ # Eat whitespace
17
+ cur_lineno += s.count("\n")
18
+ #debug("eachRule(ws)") { "[#{cur_lineno}] #{s.inspect}" }
19
+ when s = scanner.scan(%r(/\*([^\*]|\*[^\/])*\*/)m)
20
+ # Eat comments
21
+ cur_lineno += s.count("\n")
22
+ debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
23
+ when s = scanner.scan(%r(^@terminals))
24
+ #debug("eachRule(@terminals)") { "[#{cur_lineno}] #{s.inspect}" }
25
+ yield(r) unless r.empty?
26
+ @lineno = cur_lineno
27
+ yield(s)
28
+ r = ''
29
+ when s = scanner.scan(/@pass/)
30
+ # Found rule start, if we've already collected a rule, yield it
31
+ #debug("eachRule(@pass)") { "[#{cur_lineno}] #{s.inspect}" }
32
+ yield r unless r.empty?
33
+ @lineno = cur_lineno
34
+ r = s
35
+ when s = scanner.scan(/\[(?=\w+\])/)
36
+ # Found rule start, if we've already collected a rule, yield it
37
+ yield r unless r.empty?
38
+ #debug("eachRule(rule)") { "[#{cur_lineno}] #{s.inspect}" }
39
+ @lineno = cur_lineno
40
+ r = s
41
+ else
42
+ # Collect until end of line, or start of comment
43
+ s = scanner.scan_until(%r((?:/\*)|$)m)
44
+ cur_lineno += s.count("\n")
45
+ #debug("eachRule(rest)") { "[#{cur_lineno}] #{s.inspect}" }
46
+ r += s
47
+ end
48
+ end
49
+ yield r unless r.empty?
50
+ end
51
+
52
+ ##
53
+ # Parse a rule into a rule number, a symbol and an expression
54
+ #
55
+ # @param [String] rule
56
+ # @return [Rule]
57
+ def ruleParts(rule)
58
+ num_sym, expr = rule.split('::=', 2).map(&:strip)
59
+ num, sym = num_sym.split(']', 2).map(&:strip)
60
+ num = num[1..-1]
61
+ r = Rule.new(sym && sym.to_sym, num, ebnf(expr).first, :ebnf => self)
62
+ debug("ruleParts") { r.inspect }
63
+ r
64
+ end
65
+
66
+ ##
67
+ # Parse a string into an expression tree and a remaining string
68
+ #
69
+ # @example
70
+ # >>> ebnf("a b c")
71
+ # ((seq, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
72
+ #
73
+ # >>> ebnf("a? b+ c*")
74
+ # ((seq, \[(opt, ('id', 'a')), (plus, ('id', 'b')), ('*', ('id', 'c'))\]), '')
75
+ #
76
+ # >>> ebnf(" | x xlist")
77
+ # ((alt, \[(seq, \[\]), (seq, \[('id', 'x'), ('id', 'xlist')\])\]), '')
78
+ #
79
+ # >>> ebnf("a | (b - c)")
80
+ # ((alt, \[('id', 'a'), (diff, \[('id', 'b'), ('id', 'c')\])\]), '')
81
+ #
82
+ # >>> ebnf("a b | c d")
83
+ # ((alt, \[(seq, \[('id', 'a'), ('id', 'b')\]), (seq, \[('id', 'c'), ('id', 'd')\])\]), '')
84
+ #
85
+ # >>> ebnf("a | b | c")
86
+ # ((alt, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
87
+ #
88
+ # >>> ebnf("a) b c")
89
+ # (('id', 'a'), ' b c')
90
+ #
91
+ # >>> ebnf("BaseDecl? PrefixDecl*")
92
+ # ((seq, \[(opt, ('id', 'BaseDecl')), ('*', ('id', 'PrefixDecl'))\]), '')
93
+ #
94
+ # >>> ebnf("NCCHAR1 | diff | [0-9] | #x00B7 | [#x0300-#x036F] | \[#x203F-#x2040\]")
95
+ # ((alt, \[('id', 'NCCHAR1'), ("'", diff), (range, '0-9'), (hex, '#x00B7'), (range, '#x0300-#x036F'), (range, '#x203F-#x2040')\]), '')
96
+ #
97
+ # @param [String] s
98
+ # @return [Array]
99
+ def ebnf(s)
100
+ debug("ebnf") {"(#{s.inspect})"}
101
+ e, s = depth {alt(s)}
102
+ debug {"=> alt returned #{[e, s].inspect}"}
103
+ unless s.empty?
104
+ t, ss = depth {terminal(s)}
105
+ debug {"=> terminal returned #{[t, ss].inspect}"}
106
+ return [e, ss] if t.is_a?(Array) && t.first == :")"
107
+ end
108
+ [e, s]
109
+ end
110
+
111
+ ##
112
+ # Parse alt
113
+ # >>> alt("a | b | c")
114
+ # ((alt, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
115
+ # @param [String] s
116
+ # @return [Array]
117
+ def alt(s)
118
+ debug("alt") {"(#{s.inspect})"}
119
+ args = []
120
+ while !s.empty?
121
+ e, s = depth {seq(s)}
122
+ debug {"=> seq returned #{[e, s].inspect}"}
123
+ if e.to_s.empty?
124
+ break unless args.empty?
125
+ e = [:seq, []] # empty sequence
126
+ end
127
+ args << e
128
+ unless s.empty?
129
+ t, ss = depth {terminal(s)}
130
+ break unless t[0] == :alt
131
+ s = ss
132
+ end
133
+ end
134
+ args.length > 1 ? [args.unshift(:alt), s] : [e, s]
135
+ end
136
+
137
+ ##
138
+ # parse seq
139
+ #
140
+ # >>> seq("a b c")
141
+ # ((seq, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
142
+ #
143
+ # >>> seq("a b? c")
144
+ # ((seq, \[('id', 'a'), (opt, ('id', 'b')), ('id', 'c')\]), '')
145
+ def seq(s)
146
+ debug("seq") {"(#{s.inspect})"}
147
+ args = []
148
+ while !s.empty?
149
+ e, ss = depth {diff(s)}
150
+ debug {"=> diff returned #{[e, ss].inspect}"}
151
+ unless e.to_s.empty?
152
+ args << e
153
+ s = ss
154
+ else
155
+ break;
156
+ end
157
+ end
158
+ if args.length > 1
159
+ [args.unshift(:seq), s]
160
+ elsif args.length == 1
161
+ args + [s]
162
+ else
163
+ ["", s]
164
+ end
165
+ end
166
+
167
+ ##
168
+ # parse diff
169
+ #
170
+ # >>> diff("a - b")
171
+ # ((diff, \[('id', 'a'), ('id', 'b')\]), '')
172
+ def diff(s)
173
+ debug("diff") {"(#{s.inspect})"}
174
+ e1, s = depth {postfix(s)}
175
+ debug {"=> postfix returned #{[e1, s].inspect}"}
176
+ unless e1.to_s.empty?
177
+ unless s.empty?
178
+ t, ss = depth {terminal(s)}
179
+ debug {"diff #{[t, ss].inspect}"}
180
+ if t.is_a?(Array) && t.first == :diff
181
+ s = ss
182
+ e2, s = primary(s)
183
+ unless e2.to_s.empty?
184
+ return [[:diff, e1, e2], s]
185
+ else
186
+ error("diff", "Syntax Error")
187
+ raise "Syntax Error"
188
+ end
189
+ end
190
+ end
191
+ end
192
+ [e1, s]
193
+ end
194
+
195
+ ##
196
+ # parse postfix
197
+ #
198
+ # >>> postfix("a b c")
199
+ # (('id', 'a'), ' b c')
200
+ #
201
+ # >>> postfix("a? b c")
202
+ # ((opt, ('id', 'a')), ' b c')
203
+ def postfix(s)
204
+ debug("postfix") {"(#{s.inspect})"}
205
+ e, s = depth {primary(s)}
206
+ debug {"=> primary returned #{[e, s].inspect}"}
207
+ return ["", s] if e.to_s.empty?
208
+ if !s.empty?
209
+ t, ss = depth {terminal(s)}
210
+ debug {"=> #{[t, ss].inspect}"}
211
+ if t.is_a?(Array) && [:opt, :star, :plus].include?(t.first)
212
+ return [[t.first, e], ss]
213
+ end
214
+ end
215
+ [e, s]
216
+ end
217
+
218
+ ##
219
+ # parse primary
220
+ #
221
+ # >>> primary("a b c")
222
+ # (('id', 'a'), ' b c')
223
+ def primary(s)
224
+ debug("primary") {"(#{s.inspect})"}
225
+ t, s = depth {terminal(s)}
226
+ debug {"=> terminal returned #{[t, s].inspect}"}
227
+ if t.is_a?(Symbol) || t.is_a?(String)
228
+ [t, s]
229
+ elsif %w(range hex).map(&:to_sym).include?(t.first)
230
+ [t, s]
231
+ elsif t.first == :"("
232
+ e, s = depth {ebnf(s)}
233
+ debug {"=> ebnf returned #{[e, s].inspect}"}
234
+ [e, s]
235
+ else
236
+ ["", s]
237
+ end
238
+ end
239
+
240
+ ##
241
+ # parse one terminal; return the terminal and the remaining string
242
+ #
243
+ # A terminal is represented as a tuple whose 1st item gives the type;
244
+ # some types have additional info in the tuple.
245
+ #
246
+ # @example
247
+ # >>> terminal("'abc' def")
248
+ # (("'", 'abc'), ' def')
249
+ #
250
+ # >>> terminal("[0-9]")
251
+ # ((range, '0-9'), '')
252
+ # >>> terminal("#x00B7")
253
+ # ((hex, '#x00B7'), '')
254
+ # >>> terminal ("\[#x0300-#x036F\]")
255
+ # ((range, '#x0300-#x036F'), '')
256
+ # >>> terminal("\[^<>'{}|^`\]-\[#x00-#x20\]")
257
+ # ((range, "^<>'{}|^`"), '-\[#x00-#x20\]')
258
+ def terminal(s)
259
+ s = s.strip
260
+ case m = s[0,1]
261
+ when '"', "'"
262
+ l, s = s[1..-1].split(m, 2)
263
+ [l, s]
264
+ when '['
265
+ l, s = s[1..-1].split(']', 2)
266
+ [[:range, l], s]
267
+ when '#'
268
+ s.match(/(#\w+)(.*)$/)
269
+ l, s = $1, $2
270
+ [[:hex, l], s]
271
+ when /[[:alpha:]]/
272
+ s.match(/(\w+)(.*)$/)
273
+ l, s = $1, $2
274
+ [l.to_sym, s]
275
+ when '@'
276
+ s.match(/@(#\w+)(.*)$/)
277
+ l, s = $1, $2
278
+ [[:"@", l], s]
279
+ when '-'
280
+ [[:diff], s[1..-1]]
281
+ when '?'
282
+ [[:opt], s[1..-1]]
283
+ when '|'
284
+ [[:alt], s[1..-1]]
285
+ when '+'
286
+ [[:plus], s[1..-1]]
287
+ when '*'
288
+ [[:star], s[1..-1]]
289
+ when /[\(\)]/
290
+ [[m.to_sym], s[1..-1]]
291
+ else
292
+ error("terminal", "unrecognized terminal: #{s.inspect}")
293
+ raise "Syntax Error"
294
+ end
295
+ end
296
+ end
297
+ end