sparql 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (84) hide show
  1. data/AUTHORS +3 -0
  2. data/CREDITS +0 -0
  3. data/README.markdown +103 -53
  4. data/UNLICENSE +24 -0
  5. data/VERSION +1 -0
  6. data/bin/sparql +87 -0
  7. data/lib/sparql.rb +105 -22
  8. data/lib/sparql/algebra.rb +369 -0
  9. data/lib/sparql/algebra/evaluatable.rb +37 -0
  10. data/lib/sparql/algebra/expression.rb +284 -0
  11. data/lib/sparql/algebra/extensions.rb +159 -0
  12. data/lib/sparql/algebra/operator.rb +492 -0
  13. data/lib/sparql/algebra/operator/add.rb +34 -0
  14. data/lib/sparql/algebra/operator/and.rb +65 -0
  15. data/lib/sparql/algebra/operator/asc.rb +29 -0
  16. data/lib/sparql/algebra/operator/ask.rb +46 -0
  17. data/lib/sparql/algebra/operator/base.rb +46 -0
  18. data/lib/sparql/algebra/operator/bgp.rb +26 -0
  19. data/lib/sparql/algebra/operator/bound.rb +48 -0
  20. data/lib/sparql/algebra/operator/compare.rb +84 -0
  21. data/lib/sparql/algebra/operator/construct.rb +85 -0
  22. data/lib/sparql/algebra/operator/dataset.rb +77 -0
  23. data/lib/sparql/algebra/operator/datatype.rb +42 -0
  24. data/lib/sparql/algebra/operator/desc.rb +17 -0
  25. data/lib/sparql/algebra/operator/describe.rb +71 -0
  26. data/lib/sparql/algebra/operator/distinct.rb +50 -0
  27. data/lib/sparql/algebra/operator/divide.rb +43 -0
  28. data/lib/sparql/algebra/operator/equal.rb +32 -0
  29. data/lib/sparql/algebra/operator/exprlist.rb +52 -0
  30. data/lib/sparql/algebra/operator/filter.rb +71 -0
  31. data/lib/sparql/algebra/operator/graph.rb +28 -0
  32. data/lib/sparql/algebra/operator/greater_than.rb +32 -0
  33. data/lib/sparql/algebra/operator/greater_than_or_equal.rb +33 -0
  34. data/lib/sparql/algebra/operator/is_blank.rb +35 -0
  35. data/lib/sparql/algebra/operator/is_iri.rb +37 -0
  36. data/lib/sparql/algebra/operator/is_literal.rb +36 -0
  37. data/lib/sparql/algebra/operator/join.rb +67 -0
  38. data/lib/sparql/algebra/operator/lang.rb +29 -0
  39. data/lib/sparql/algebra/operator/lang_matches.rb +53 -0
  40. data/lib/sparql/algebra/operator/left_join.rb +95 -0
  41. data/lib/sparql/algebra/operator/less_than.rb +32 -0
  42. data/lib/sparql/algebra/operator/less_than_or_equal.rb +32 -0
  43. data/lib/sparql/algebra/operator/minus.rb +31 -0
  44. data/lib/sparql/algebra/operator/multiply.rb +34 -0
  45. data/lib/sparql/algebra/operator/not.rb +35 -0
  46. data/lib/sparql/algebra/operator/not_equal.rb +26 -0
  47. data/lib/sparql/algebra/operator/or.rb +65 -0
  48. data/lib/sparql/algebra/operator/order.rb +69 -0
  49. data/lib/sparql/algebra/operator/plus.rb +31 -0
  50. data/lib/sparql/algebra/operator/prefix.rb +45 -0
  51. data/lib/sparql/algebra/operator/project.rb +46 -0
  52. data/lib/sparql/algebra/operator/reduced.rb +47 -0
  53. data/lib/sparql/algebra/operator/regex.rb +70 -0
  54. data/lib/sparql/algebra/operator/same_term.rb +46 -0
  55. data/lib/sparql/algebra/operator/slice.rb +60 -0
  56. data/lib/sparql/algebra/operator/str.rb +35 -0
  57. data/lib/sparql/algebra/operator/subtract.rb +32 -0
  58. data/lib/sparql/algebra/operator/union.rb +55 -0
  59. data/lib/sparql/algebra/query.rb +99 -0
  60. data/lib/sparql/algebra/sxp_extensions.rb +35 -0
  61. data/lib/sparql/algebra/version.rb +20 -0
  62. data/lib/sparql/extensions.rb +102 -0
  63. data/lib/sparql/grammar.rb +298 -0
  64. data/lib/sparql/grammar/lexer.rb +609 -0
  65. data/lib/sparql/grammar/parser.rb +1383 -0
  66. data/lib/sparql/grammar/parser/meta.rb +1801 -0
  67. data/lib/sparql/results.rb +220 -0
  68. data/lib/sparql/version.rb +20 -0
  69. metadata +232 -62
  70. data/Rakefile +0 -22
  71. data/coverage/index.html +0 -252
  72. data/coverage/lib-sparql-execute_sparql_rb.html +0 -621
  73. data/coverage/lib-sparql_rb.html +0 -622
  74. data/lib/sparql/execute_sparql.rb +0 -27
  75. data/lib/sparql/sparql.treetop +0 -159
  76. data/sparql.gemspec +0 -16
  77. data/spec/spec.opts +0 -2
  78. data/spec/spec_helper.rb +0 -24
  79. data/spec/unit/graph_parsing_spec.rb +0 -76
  80. data/spec/unit/iri_parsing_spec.rb +0 -46
  81. data/spec/unit/prefixed_names_parsing_spec.rb +0 -40
  82. data/spec/unit/primitives_parsing_spec.rb +0 -26
  83. data/spec/unit/sparql_parsing_spec.rb +0 -72
  84. data/spec/unit/variables_parsing_spec.rb +0 -36
@@ -0,0 +1,609 @@
1
+ require 'strscan' unless defined?(StringScanner)
2
+ require 'bigdecimal' unless defined?(BigDecimal)
3
+
4
+ module SPARQL; module Grammar
5
+ ##
6
+ # A lexical analyzer for the SPARQL 1.0 grammar.
7
+ #
8
+ # Note that productions [80]-[85] have been incorporated directly into
9
+ # [77], [78], [79].
10
+ #
11
+ # @example Tokenizing a SPARQL query string
12
+ # query = "SELECT * WHERE { ?s ?p ?o }"
13
+ # lexer = SPARQL::Grammar::Lexer.tokenize(query)
14
+ # lexer.each_token do |token|
15
+ # puts token.inspect
16
+ # end
17
+ #
18
+ # @example Handling error conditions
19
+ # begin
20
+ # SPARQL::Grammar::Lexer.tokenize(query)
21
+ # rescue SPARQL::Grammar::Lexer::Error => error
22
+ # warn error.inspect
23
+ # end
24
+ #
25
+ # @see http://www.w3.org/TR/rdf-sparql-query/#grammar
26
+ # @see http://en.wikipedia.org/wiki/Lexical_analysis
27
+ class Lexer
28
+ include Enumerable
29
+
30
+ ESCAPE_CHARS = {
31
+ '\t' => "\t", # \u0009 (tab)
32
+ '\n' => "\n", # \u000A (line feed)
33
+ '\r' => "\r", # \u000D (carriage return)
34
+ '\b' => "\b", # \u0008 (backspace)
35
+ '\f' => "\f", # \u000C (form feed)
36
+ '\\"' => '"', # \u0022 (quotation mark, double quote mark)
37
+ '\\\'' => '\'', # \u0027 (apostrophe-quote, single quote mark)
38
+ '\\\\' => '\\' # \u005C (backslash)
39
+ }
40
+ ESCAPE_CHAR4 = /\\u([0-9A-Fa-f]{4,4})/ # \uXXXX
41
+ ESCAPE_CHAR8 = /\\U([0-9A-Fa-f]{8,8})/ # \UXXXXXXXX
42
+ ESCAPE_CHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}/
43
+
44
+ ##
45
+ # Unicode regular expressions for Ruby 1.9+ with the Oniguruma engine.
46
+ module Unicode
47
+ if RUBY_VERSION >= '1.9'
48
+ U_CHARS1 = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
49
+ [\\u00C0-\\u00D6]|[\\u00D8-\\u00F6]|[\\u00F8-\\u02FF]|
50
+ [\\u0370-\\u037D]|[\\u037F-\\u1FFF]|[\\u200C-\\u200D]|
51
+ [\\u2070-\\u218F]|[\\u2C00-\\u2FEF]|[\\u3001-\\uD7FF]|
52
+ [\\uF900-\\uFDCF]|[\\uFDF0-\\uFFFD]|[\\u{10000}-\\u{EFFFF}]
53
+ EOS
54
+ U_CHARS2 = Regexp.compile("\\u00B7|[\\u0300-\\u036F]|[\\u203F-\\u2040]")
55
+ end
56
+ end
57
+
58
+ ##
59
+ # UTF-8 regular expressions for Ruby 1.8.x.
60
+ module UTF_8
61
+ if RUBY_VERSION < '1.9'
62
+ U_CHARS1 = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
63
+ \\xC3[\\x80-\\x96]| (?# [\\u00C0-\\u00D6]|)
64
+ \\xC3[\\x98-\\xB6]| (?# [\\u00D8-\\u00F6]|)
65
+ \\xC3[\\xB8-\\xBF]|[\\xC4-\\xCB][\\x80-\\xBF]| (?# [\\u00F8-\\u02FF]|)
66
+ \\xCD[\\xB0-\\xBD]| (?# [\\u0370-\\u037D]|)
67
+ \\xCD\\xBF|[\\xCE-\\xDF][\\x80-\\xBF]| (?# [\\u037F-\\u1FFF]|)
68
+ \\xE0[\\xA0-\\xBF][\\x80-\\xBF]| (?# ...)
69
+ \\xE1[\\x80-\\xBF][\\x80-\\xBF]| (?# ...)
70
+ \\xE2\\x80[\\x8C-\\x8D]| (?# [\\u200C-\\u200D]|)
71
+ \\xE2\\x81[\\xB0-\\xBF]| (?# [\\u2070-\\u218F]|)
72
+ \\xE2[\\x82-\\x85][\\x80-\\xBF]| (?# ...)
73
+ \\xE2\\x86[\\x80-\\x8F]| (?# ...)
74
+ \\xE2[\\xB0-\\xBE][\\x80-\\xBF]| (?# [\\u2C00-\\u2FEF]|)
75
+ \\xE2\\xBF[\\x80-\\xAF]| (?# ...)
76
+ \\xE3\\x80[\\x81-\\xBF]| (?# [\\u3001-\\uD7FF]|)
77
+ \\xE3[\\x81-\\xBF][\\x80-\\xBF]| (?# ...)
78
+ [\\xE4-\\xEC][\\x80-\\xBF][\\x80-\\xBF]| (?# ...)
79
+ \\xED[\\x80-\\x9F][\\x80-\\xBF]| (?# ...)
80
+ \\xEF[\\xA4-\\xB6][\\x80-\\xBF]| (?# [\\uF900-\\uFDCF]|)
81
+ \\xEF\\xB7[\\x80-\\x8F]| (?# ...)
82
+ \\xEF\\xB7[\\xB0-\\xBF]| (?# [\\uFDF0-\\uFFFD]|)
83
+ \\xEF[\\xB8-\\xBE][\\x80-\\xBF]| (?# ...)
84
+ \\xEF\\xBF[\\x80-\\xBD]| (?# ...)
85
+ \\xF0[\\x90-\\xBF][\\x80-\\xBF][\\x80-\\xBF]| (?# [\\u{10000}-\\u{EFFFF}])
86
+ [\\xF1-\\xF2][\\x80-\\xBF][\\x80-\\xBF][\\x80-\\xBF]|
87
+ \\xF3[\\x80-\\xAF][\\x80-\\xBF][\\x80-\\xBF] (?# ...)
88
+ EOS
89
+ U_CHARS2 = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
90
+ \\xC2\\xB7| (?# \\u00B7|)
91
+ \\xCC[\\x80-\\xBF]|\\xCD[\\x80-\\xAF]| (?# [\\u0300-\\u036F]|)
92
+ \\xE2\\x80\\xBF|\\xE2\\x81\\x80 (?# [\\u203F-\\u2040])
93
+ EOS
94
+ end
95
+ end
96
+
97
+ include RUBY_VERSION >= '1.9' ? Unicode : UTF_8
98
+
99
+ KEYWORD = /#{KEYWORDS.join('|')}|#{FUNCTIONS.join('|')}/i
100
+ DELIMITER = /\^\^|[{}()\[\],;\.]/
101
+ OPERATOR = /a|\|\||&&|!=|<=|>=|[!=<>+\-*\/]/
102
+ COMMENT = /#.*/
103
+
104
+ PN_CHARS_BASE = /[A-Z]|[a-z]|#{U_CHARS1}/ # [95]
105
+ PN_CHARS_U = /_|#{PN_CHARS_BASE}/ # [96]
106
+ VARNAME = /(?:[0-9]|#{PN_CHARS_U})
107
+ (?:[0-9]|#{PN_CHARS_U}|#{U_CHARS2})*/x # [97]
108
+ PN_CHARS = /-|[0-9]|#{PN_CHARS_U}|#{U_CHARS2}/ # [98]
109
+ PN_CHARS_BODY = /(?:(?:\.|#{PN_CHARS})*#{PN_CHARS})?/
110
+ PN_PREFIX = /#{PN_CHARS_BASE}#{PN_CHARS_BODY}/ # [99]
111
+ PN_LOCAL = /(?:[0-9]|#{PN_CHARS_U})#{PN_CHARS_BODY}/ # [100]
112
+
113
+ IRI_REF = /<([^<>"{}|^`\\\x00-\x20]*)>/ # [70]
114
+ PNAME_NS = /(#{PN_PREFIX}?):/ # [71]
115
+ PNAME_LN = /#{PNAME_NS}(#{PN_LOCAL})/ # [72]
116
+ BLANK_NODE_LABEL = /_:(#{PN_LOCAL})/ # [73]
117
+ VAR1 = /\?(#{VARNAME})/ # [74]
118
+ VAR2 = /\$(#{VARNAME})/ # [75]
119
+ LANGTAG = /@([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)/ # [76]
120
+ INTEGER = /[0-9]+/ # [77]
121
+ DECIMAL = /(?:[0-9]+\.[0-9]*|\.[0-9]+)/ # [78]
122
+ EXPONENT = /[eE][+-]?[0-9]+/ # [86]
123
+ DOUBLE = /(?:[0-9]+\.[0-9]*|\.[0-9]+|[0-9]+)#{EXPONENT}/ # [79]
124
+ ECHAR = /\\[tbnrf\\"']/ # [91]
125
+ STRING_LITERAL1 = /'((?:[^\x27\x5C\x0A\x0D]|#{ECHAR})*)'/ # [87]
126
+ STRING_LITERAL2 = /"((?:[^\x22\x5C\x0A\x0D]|#{ECHAR})*)"/ # [88]
127
+ STRING_LITERAL_LONG1 = /'''((?:(?:'|'')?(?:[^'\\]|#{ECHAR})+)*)'''/m # [89]
128
+ STRING_LITERAL_LONG2 = /"""((?:(?:"|"")?(?:[^"\\]|#{ECHAR})+)*)"""/m # [90]
129
+ WS = /\x20|\x09|\x0D|\x0A/ # [93]
130
+ NIL = /\(#{WS}*\)/ # [92]
131
+ ANON = /\[#{WS}*\]/ # [94]
132
+
133
+ BooleanLiteral = /true|false/ # [65]
134
+ String = /#{STRING_LITERAL_LONG1}|#{STRING_LITERAL_LONG2}|
135
+ #{STRING_LITERAL1}|#{STRING_LITERAL2}/x # [66]
136
+
137
+ # Make all defined regular expression constants immutable:
138
+ constants.each { |name| const_get(name).freeze }
139
+
140
+ ##
141
+ # Returns a copy of the given `input` string with all `\uXXXX` and
142
+ # `\UXXXXXXXX` Unicode codepoint escape sequences replaced with their
143
+ # unescaped UTF-8 character counterparts.
144
+ #
145
+ # @param [String] input
146
+ # @return [String]
147
+ # @see http://www.w3.org/TR/rdf-sparql-query/#codepointEscape
148
+ def self.unescape_codepoints(input)
149
+ string = input.dup
150
+ string.force_encoding(Encoding::ASCII_8BIT) if string.respond_to?(:force_encoding) # Ruby 1.9+
151
+
152
+ # Decode \uXXXX and \UXXXXXXXX code points:
153
+ string.gsub!(ESCAPE_CHAR) do
154
+ s = [($1 || $2).hex].pack('U*')
155
+ s.respond_to?(:force_encoding) ? s.force_encoding(Encoding::ASCII_8BIT) : s
156
+ end
157
+
158
+ string.force_encoding(Encoding::UTF_8) if string.respond_to?(:force_encoding) # Ruby 1.9+
159
+ string
160
+ end
161
+
162
+ ##
163
+ # Returns a copy of the given `input` string with all string escape
164
+ # sequences (e.g. `\n` and `\t`) replaced with their unescaped UTF-8
165
+ # character counterparts.
166
+ #
167
+ # @param [String] input
168
+ # @return [String]
169
+ # @see http://www.w3.org/TR/rdf-sparql-query/#grammarEscapes
170
+ def self.unescape_string(input)
171
+ input.gsub(ECHAR) { |escaped| ESCAPE_CHARS[escaped] }
172
+ end
173
+
174
+ ##
175
+ # Tokenizes the given `input` string or stream.
176
+ #
177
+ # @param [String, #to_s] input
178
+ # @param [Hash{Symbol => Object}] options
179
+ # @yield [lexer]
180
+ # @yieldparam [Lexer] lexer
181
+ # @return [Lexer]
182
+ # @raise [Lexer::Error] on invalid input
183
+ def self.tokenize(input, options = {}, &block)
184
+ lexer = self.new(input, options)
185
+ block_given? ? block.call(lexer) : lexer
186
+ end
187
+
188
+ ##
189
+ # Initializes a new lexer instance.
190
+ #
191
+ # @param [String, #to_s] input
192
+ # @param [Hash{Symbol => Object}] options
193
+ def initialize(input = nil, options = {})
194
+ @options = options.dup
195
+ self.input = input if input
196
+ end
197
+
198
+ ##
199
+ # Any additional options for the lexer.
200
+ #
201
+ # @return [Hash]
202
+ attr_reader :options
203
+
204
+ ##
205
+ # The current input string being processed.
206
+ #
207
+ # @return [String]
208
+ attr_accessor :input
209
+
210
+ ##
211
+ # The current line number (zero-based).
212
+ #
213
+ # @return [Integer]
214
+ attr_reader :lineno
215
+
216
+ ##
217
+ # @param [String, #to_s] input
218
+ # @return [void]
219
+ def input=(input)
220
+ @input = case input
221
+ when ::String then input
222
+ when IO, StringIO then input.read
223
+ else input.to_s
224
+ end
225
+ @input = @input.dup
226
+ @input.force_encoding(Encoding::UTF_8) if @input.respond_to?(:force_encoding) # Ruby 1.9+
227
+ @input = self.class.unescape_codepoints(@input)
228
+ @lineno = 0
229
+ end
230
+
231
+ ##
232
+ # Returns `true` if the input string is lexically valid.
233
+ #
234
+ # To be considered valid, the input string must contain more than zero
235
+ # tokens, and must not contain any invalid tokens.
236
+ #
237
+ # @return [Boolean]
238
+ def valid?
239
+ begin
240
+ !count.zero?
241
+ rescue Error
242
+ false
243
+ end
244
+ end
245
+
246
+ ##
247
+ # Enumerates each token in the input string.
248
+ #
249
+ # @yield [token]
250
+ # @yieldparam [Token] token
251
+ # @return [Enumerator]
252
+ def each_token(&block)
253
+ if block_given?
254
+ @lineno = 0
255
+ @scanner = StringScanner.new(@input)
256
+ until scanner.eos?
257
+ case
258
+ when skip_whitespace
259
+ when skip_comment
260
+ when token = match_token
261
+ yield token
262
+ else
263
+ lexeme = (@scanner.rest.split(/#{WS}|#{COMMENT}/).first rescue nil) || @scanner.rest
264
+ raise Error.new("invalid token #{lexeme.inspect} on line #{lineno + 1}",
265
+ :input => input, :token => lexeme, :lineno => lineno)
266
+ end
267
+ end
268
+ @scanner = nil
269
+ end
270
+ enum_for(:each_token)
271
+ end
272
+ alias_method :each, :each_token
273
+
274
+ protected
275
+
276
+ # @return [StringScanner]
277
+ attr_reader :scanner
278
+
279
+ # @see http://www.w3.org/TR/rdf-sparql-query/#whitespace
280
+ def skip_whitespace
281
+ # skip all white space, but keep track of the current line number
282
+ if matched = scanner.scan(WS)
283
+ @lineno += matched.count("\n")
284
+ matched
285
+ end
286
+ end
287
+
288
+ # @see http://www.w3.org/TR/rdf-sparql-query/#grammarComments
289
+ def skip_comment
290
+ # skip the remainder of the current line
291
+ skipped = scanner.skip(COMMENT)
292
+ end
293
+
294
+ def match_token
295
+ match_var1 ||
296
+ match_var2 ||
297
+ match_iri_ref ||
298
+ match_pname_ln ||
299
+ match_pname_ns ||
300
+ match_string_long_1 ||
301
+ match_string_long_2 ||
302
+ match_string_1 ||
303
+ match_string_2 ||
304
+ match_langtag ||
305
+ match_double ||
306
+ match_decimal ||
307
+ match_integer ||
308
+ match_boolean_literal ||
309
+ match_blank_node_label||
310
+ match_nil ||
311
+ match_anon ||
312
+ match_keyword ||
313
+ match_delimiter ||
314
+ match_operator
315
+ end
316
+
317
+ def match_var1
318
+ if matched = scanner.scan(VAR1)
319
+ token(:VAR1, scanner[1].to_s)
320
+ end
321
+ end
322
+
323
+ def match_var2
324
+ if matched = scanner.scan(VAR2)
325
+ token(:VAR2, scanner[1].to_s)
326
+ end
327
+ end
328
+
329
+ def match_iri_ref
330
+ if matched = scanner.scan(IRI_REF)
331
+ token(:IRI_REF, scanner[1].to_s)
332
+ end
333
+ end
334
+
335
+ def match_pname_ln
336
+ if matched = scanner.scan(PNAME_LN)
337
+ token(:PNAME_LN, [scanner[1].empty? ? nil : scanner[1].to_s, scanner[2].to_s])
338
+ end
339
+ end
340
+
341
+ def match_pname_ns
342
+ if matched = scanner.scan(PNAME_NS)
343
+ token(:PNAME_NS, scanner[1].empty? ? nil : scanner[1].to_s)
344
+ end
345
+ end
346
+
347
+ def match_string_long_1
348
+ if matched = scanner.scan(STRING_LITERAL_LONG1)
349
+ token(:STRING_LITERAL_LONG1, self.class.unescape_string(scanner[1]))
350
+ end
351
+ end
352
+
353
+ def match_string_long_2
354
+ if matched = scanner.scan(STRING_LITERAL_LONG2)
355
+ token(:STRING_LITERAL_LONG2, self.class.unescape_string(scanner[1]))
356
+ end
357
+ end
358
+
359
+ def match_string_1
360
+ if matched = scanner.scan(STRING_LITERAL1)
361
+ token(:STRING_LITERAL1, self.class.unescape_string(scanner[1]))
362
+ end
363
+ end
364
+
365
+ def match_string_2
366
+ if matched = scanner.scan(STRING_LITERAL2)
367
+ token(:STRING_LITERAL2, self.class.unescape_string(scanner[1]))
368
+ end
369
+ end
370
+
371
+ def match_langtag
372
+ if matched = scanner.scan(LANGTAG)
373
+ token(:LANGTAG, scanner[1].to_s)
374
+ end
375
+ end
376
+
377
+ def match_double
378
+ if matched = scanner.scan(DOUBLE)
379
+ token(:DOUBLE, matched)
380
+ end
381
+ end
382
+
383
+ def match_decimal
384
+ if matched = scanner.scan(DECIMAL)
385
+ token(:DECIMAL, matched)
386
+ end
387
+ end
388
+
389
+ def match_integer
390
+ if matched = scanner.scan(INTEGER)
391
+ token(:INTEGER, matched)
392
+ end
393
+ end
394
+
395
+ def match_boolean_literal
396
+ if matched = scanner.scan(BooleanLiteral)
397
+ token(:BooleanLiteral, matched)
398
+ end
399
+ end
400
+
401
+ def match_blank_node_label
402
+ if matched = scanner.scan(BLANK_NODE_LABEL)
403
+ token(:BLANK_NODE_LABEL, scanner[1].to_s)
404
+ end
405
+ end
406
+
407
+ def match_nil
408
+ if matched = scanner.scan(NIL)
409
+ token(:NIL)
410
+ end
411
+ end
412
+
413
+ def match_anon
414
+ if matched = scanner.scan(ANON)
415
+ token(:ANON)
416
+ end
417
+ end
418
+
419
+ def match_keyword
420
+ if matched = scanner.scan(KEYWORD)
421
+ token(nil, matched.upcase.to_s)
422
+ end
423
+ end
424
+
425
+ def match_delimiter
426
+ if matched = scanner.scan(DELIMITER)
427
+ token(nil, matched.to_s)
428
+ end
429
+ end
430
+
431
+ def match_operator
432
+ if matched = scanner.scan(OPERATOR)
433
+ token(nil, matched.to_s)
434
+ end
435
+ end
436
+
437
+ protected
438
+
439
+ ##
440
+ # Constructs a new token object annotated with the current line number.
441
+ #
442
+ # The parser relies on the type being a symbolized URI and the value being
443
+ # a string, if there is no type. If there is a type, then the value takes
444
+ # on the native representation appropriate for that type.
445
+ #
446
+ # @param [Symbol] type
447
+ # @param [Object] value
448
+ # @return [Token]
449
+ def token(type, value = nil)
450
+ Token.new(type, value, :lineno => lineno)
451
+ end
452
+
453
+ ##
454
+ # Represents a lexer token.
455
+ #
456
+ # @example Creating a new token
457
+ # token = SPARQL::Grammar::Lexer::Token.new(:LANGTAG, :en)
458
+ # token.type #=> :LANGTAG
459
+ # token.value #=> "en"
460
+ #
461
+ # @see http://en.wikipedia.org/wiki/Lexical_analysis#Token
462
+ class Token
463
+ ##
464
+ # Initializes a new token instance.
465
+ #
466
+ # @param [Symbol] type
467
+ # @param [Object] value
468
+ # @param [Hash{Symbol => Object}] options
469
+ # @option options [Integer] :lineno (nil)
470
+ def initialize(type, value = nil, options = {})
471
+ @type, @value = (type ? type.to_s.to_sym : nil), value
472
+ @options = options.dup
473
+ @lineno = @options.delete(:lineno)
474
+ end
475
+
476
+ ##
477
+ # The token's symbol type.
478
+ #
479
+ # @return [Symbol]
480
+ attr_reader :type
481
+
482
+ ##
483
+ # The token's value.
484
+ #
485
+ # @return [Object]
486
+ attr_reader :value
487
+
488
+ ##
489
+ # The line number where the token was encountered.
490
+ #
491
+ # @return [Integer]
492
+ attr_reader :lineno
493
+
494
+ ##
495
+ # Any additional options for the token.
496
+ #
497
+ # @return [Hash]
498
+ attr_reader :options
499
+
500
+ ##
501
+ # Returns the attribute named by `key`.
502
+ #
503
+ # @param [Symbol] key
504
+ # @return [Object]
505
+ def [](key)
506
+ key = key.to_s.to_sym unless key.is_a?(Integer) || key.is_a?(Symbol)
507
+ case key
508
+ when 0, :type then @type
509
+ when 1, :value then @value
510
+ else nil
511
+ end
512
+ end
513
+
514
+ ##
515
+ # Returns `true` if the given `value` matches either the type or value
516
+ # of this token.
517
+ #
518
+ # @example Matching using the symbolic type
519
+ # SPARQL::Grammar::Lexer::Token.new(:NIL) === :NIL #=> true
520
+ #
521
+ # @example Matching using the string value
522
+ # SPARQL::Grammar::Lexer::Token.new(nil, "{") === "{" #=> true
523
+ #
524
+ # @param [Symbol, String] value
525
+ # @return [Boolean]
526
+ def ===(value)
527
+ case value
528
+ when Symbol then value == @type
529
+ when ::String then value.to_s == @value.to_s
530
+ else value == @value
531
+ end
532
+ end
533
+
534
+ ##
535
+ # Returns a hash table representation of this token.
536
+ #
537
+ # @return [Hash]
538
+ def to_hash
539
+ {:type => @type, :value => @value}
540
+ end
541
+
542
+ ##
543
+ # Returns type, if not nil, otherwise value
544
+ def representation
545
+ @type ? @type : @value
546
+ end
547
+
548
+ ##
549
+ # Returns an array representation of this token.
550
+ #
551
+ # @return [Array]
552
+ def to_a
553
+ [@type, @value]
554
+ end
555
+
556
+ ##
557
+ # Returns a developer-friendly representation of this token.
558
+ #
559
+ # @return [String]
560
+ def inspect
561
+ to_hash.inspect
562
+ end
563
+ end # class Token
564
+
565
+ ##
566
+ # Raised for errors during lexical analysis.
567
+ #
568
+ # @example Raising a lexer error
569
+ # raise SPARQL::Grammar::Lexer::Error.new(
570
+ # "invalid token '%' on line 10",
571
+ # :input => query, :token => '%', :lineno => 9)
572
+ #
573
+ # @see http://ruby-doc.org/core/classes/StandardError.html
574
+ class Error < StandardError
575
+ ##
576
+ # The input string associated with the error.
577
+ #
578
+ # @return [String]
579
+ attr_reader :input
580
+
581
+ ##
582
+ # The invalid token which triggered the error.
583
+ #
584
+ # @return [String]
585
+ attr_reader :token
586
+
587
+ ##
588
+ # The line number where the error occurred.
589
+ #
590
+ # @return [Integer]
591
+ attr_reader :lineno
592
+
593
+ ##
594
+ # Initializes a new lexer error instance.
595
+ #
596
+ # @param [String, #to_s] message
597
+ # @param [Hash{Symbol => Object}] options
598
+ # @option options [String] :input (nil)
599
+ # @option options [String] :token (nil)
600
+ # @option options [Integer] :lineno (nil)
601
+ def initialize(message, options = {})
602
+ @input = options[:input]
603
+ @token = options[:token]
604
+ @lineno = options[:lineno]
605
+ super(message.to_s)
606
+ end
607
+ end # class Error
608
+ end # class Lexer
609
+ end; end # module SPARQL::Grammar