sparql 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. data/AUTHORS +3 -0
  2. data/CREDITS +0 -0
  3. data/README.markdown +103 -53
  4. data/UNLICENSE +24 -0
  5. data/VERSION +1 -0
  6. data/bin/sparql +87 -0
  7. data/lib/sparql.rb +105 -22
  8. data/lib/sparql/algebra.rb +369 -0
  9. data/lib/sparql/algebra/evaluatable.rb +37 -0
  10. data/lib/sparql/algebra/expression.rb +284 -0
  11. data/lib/sparql/algebra/extensions.rb +159 -0
  12. data/lib/sparql/algebra/operator.rb +492 -0
  13. data/lib/sparql/algebra/operator/add.rb +34 -0
  14. data/lib/sparql/algebra/operator/and.rb +65 -0
  15. data/lib/sparql/algebra/operator/asc.rb +29 -0
  16. data/lib/sparql/algebra/operator/ask.rb +46 -0
  17. data/lib/sparql/algebra/operator/base.rb +46 -0
  18. data/lib/sparql/algebra/operator/bgp.rb +26 -0
  19. data/lib/sparql/algebra/operator/bound.rb +48 -0
  20. data/lib/sparql/algebra/operator/compare.rb +84 -0
  21. data/lib/sparql/algebra/operator/construct.rb +85 -0
  22. data/lib/sparql/algebra/operator/dataset.rb +77 -0
  23. data/lib/sparql/algebra/operator/datatype.rb +42 -0
  24. data/lib/sparql/algebra/operator/desc.rb +17 -0
  25. data/lib/sparql/algebra/operator/describe.rb +71 -0
  26. data/lib/sparql/algebra/operator/distinct.rb +50 -0
  27. data/lib/sparql/algebra/operator/divide.rb +43 -0
  28. data/lib/sparql/algebra/operator/equal.rb +32 -0
  29. data/lib/sparql/algebra/operator/exprlist.rb +52 -0
  30. data/lib/sparql/algebra/operator/filter.rb +71 -0
  31. data/lib/sparql/algebra/operator/graph.rb +28 -0
  32. data/lib/sparql/algebra/operator/greater_than.rb +32 -0
  33. data/lib/sparql/algebra/operator/greater_than_or_equal.rb +33 -0
  34. data/lib/sparql/algebra/operator/is_blank.rb +35 -0
  35. data/lib/sparql/algebra/operator/is_iri.rb +37 -0
  36. data/lib/sparql/algebra/operator/is_literal.rb +36 -0
  37. data/lib/sparql/algebra/operator/join.rb +67 -0
  38. data/lib/sparql/algebra/operator/lang.rb +29 -0
  39. data/lib/sparql/algebra/operator/lang_matches.rb +53 -0
  40. data/lib/sparql/algebra/operator/left_join.rb +95 -0
  41. data/lib/sparql/algebra/operator/less_than.rb +32 -0
  42. data/lib/sparql/algebra/operator/less_than_or_equal.rb +32 -0
  43. data/lib/sparql/algebra/operator/minus.rb +31 -0
  44. data/lib/sparql/algebra/operator/multiply.rb +34 -0
  45. data/lib/sparql/algebra/operator/not.rb +35 -0
  46. data/lib/sparql/algebra/operator/not_equal.rb +26 -0
  47. data/lib/sparql/algebra/operator/or.rb +65 -0
  48. data/lib/sparql/algebra/operator/order.rb +69 -0
  49. data/lib/sparql/algebra/operator/plus.rb +31 -0
  50. data/lib/sparql/algebra/operator/prefix.rb +45 -0
  51. data/lib/sparql/algebra/operator/project.rb +46 -0
  52. data/lib/sparql/algebra/operator/reduced.rb +47 -0
  53. data/lib/sparql/algebra/operator/regex.rb +70 -0
  54. data/lib/sparql/algebra/operator/same_term.rb +46 -0
  55. data/lib/sparql/algebra/operator/slice.rb +60 -0
  56. data/lib/sparql/algebra/operator/str.rb +35 -0
  57. data/lib/sparql/algebra/operator/subtract.rb +32 -0
  58. data/lib/sparql/algebra/operator/union.rb +55 -0
  59. data/lib/sparql/algebra/query.rb +99 -0
  60. data/lib/sparql/algebra/sxp_extensions.rb +35 -0
  61. data/lib/sparql/algebra/version.rb +20 -0
  62. data/lib/sparql/extensions.rb +102 -0
  63. data/lib/sparql/grammar.rb +298 -0
  64. data/lib/sparql/grammar/lexer.rb +609 -0
  65. data/lib/sparql/grammar/parser.rb +1383 -0
  66. data/lib/sparql/grammar/parser/meta.rb +1801 -0
  67. data/lib/sparql/results.rb +220 -0
  68. data/lib/sparql/version.rb +20 -0
  69. metadata +232 -62
  70. data/Rakefile +0 -22
  71. data/coverage/index.html +0 -252
  72. data/coverage/lib-sparql-execute_sparql_rb.html +0 -621
  73. data/coverage/lib-sparql_rb.html +0 -622
  74. data/lib/sparql/execute_sparql.rb +0 -27
  75. data/lib/sparql/sparql.treetop +0 -159
  76. data/sparql.gemspec +0 -16
  77. data/spec/spec.opts +0 -2
  78. data/spec/spec_helper.rb +0 -24
  79. data/spec/unit/graph_parsing_spec.rb +0 -76
  80. data/spec/unit/iri_parsing_spec.rb +0 -46
  81. data/spec/unit/prefixed_names_parsing_spec.rb +0 -40
  82. data/spec/unit/primitives_parsing_spec.rb +0 -26
  83. data/spec/unit/sparql_parsing_spec.rb +0 -72
  84. data/spec/unit/variables_parsing_spec.rb +0 -36
@@ -0,0 +1,609 @@
1
+ require 'strscan' unless defined?(StringScanner)
2
+ require 'bigdecimal' unless defined?(BigDecimal)
3
+
4
+ module SPARQL; module Grammar
5
+ ##
6
+ # A lexical analyzer for the SPARQL 1.0 grammar.
7
+ #
8
+ # Note that productions [80]-[85] have been incorporated directly into
9
+ # [77], [78], [79].
10
+ #
11
+ # @example Tokenizing a SPARQL query string
12
+ # query = "SELECT * WHERE { ?s ?p ?o }"
13
+ # lexer = SPARQL::Grammar::Lexer.tokenize(query)
14
+ # lexer.each_token do |token|
15
+ # puts token.inspect
16
+ # end
17
+ #
18
+ # @example Handling error conditions
19
+ # begin
20
+ # SPARQL::Grammar::Lexer.tokenize(query)
21
+ # rescue SPARQL::Grammar::Lexer::Error => error
22
+ # warn error.inspect
23
+ # end
24
+ #
25
+ # @see http://www.w3.org/TR/rdf-sparql-query/#grammar
26
+ # @see http://en.wikipedia.org/wiki/Lexical_analysis
27
+ class Lexer
28
+ include Enumerable
29
+
30
+ ESCAPE_CHARS = {
31
+ '\t' => "\t", # \u0009 (tab)
32
+ '\n' => "\n", # \u000A (line feed)
33
+ '\r' => "\r", # \u000D (carriage return)
34
+ '\b' => "\b", # \u0008 (backspace)
35
+ '\f' => "\f", # \u000C (form feed)
36
+ '\\"' => '"', # \u0022 (quotation mark, double quote mark)
37
+ '\\\'' => '\'', # \u0027 (apostrophe-quote, single quote mark)
38
+ '\\\\' => '\\' # \u005C (backslash)
39
+ }
40
+ ESCAPE_CHAR4 = /\\u([0-9A-Fa-f]{4,4})/ # \uXXXX
41
+ ESCAPE_CHAR8 = /\\U([0-9A-Fa-f]{8,8})/ # \UXXXXXXXX
42
+ ESCAPE_CHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}/
43
+
44
+ ##
45
+ # Unicode regular expressions for Ruby 1.9+ with the Oniguruma engine.
46
+ module Unicode
47
+ if RUBY_VERSION >= '1.9'
48
+ U_CHARS1 = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
49
+ [\\u00C0-\\u00D6]|[\\u00D8-\\u00F6]|[\\u00F8-\\u02FF]|
50
+ [\\u0370-\\u037D]|[\\u037F-\\u1FFF]|[\\u200C-\\u200D]|
51
+ [\\u2070-\\u218F]|[\\u2C00-\\u2FEF]|[\\u3001-\\uD7FF]|
52
+ [\\uF900-\\uFDCF]|[\\uFDF0-\\uFFFD]|[\\u{10000}-\\u{EFFFF}]
53
+ EOS
54
+ U_CHARS2 = Regexp.compile("\\u00B7|[\\u0300-\\u036F]|[\\u203F-\\u2040]")
55
+ end
56
+ end
57
+
58
+ ##
59
+ # UTF-8 regular expressions for Ruby 1.8.x.
60
+ module UTF_8
61
+ if RUBY_VERSION < '1.9'
62
+ U_CHARS1 = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
63
+ \\xC3[\\x80-\\x96]| (?# [\\u00C0-\\u00D6]|)
64
+ \\xC3[\\x98-\\xB6]| (?# [\\u00D8-\\u00F6]|)
65
+ \\xC3[\\xB8-\\xBF]|[\\xC4-\\xCB][\\x80-\\xBF]| (?# [\\u00F8-\\u02FF]|)
66
+ \\xCD[\\xB0-\\xBD]| (?# [\\u0370-\\u037D]|)
67
+ \\xCD\\xBF|[\\xCE-\\xDF][\\x80-\\xBF]| (?# [\\u037F-\\u1FFF]|)
68
+ \\xE0[\\xA0-\\xBF][\\x80-\\xBF]| (?# ...)
69
+ \\xE1[\\x80-\\xBF][\\x80-\\xBF]| (?# ...)
70
+ \\xE2\\x80[\\x8C-\\x8D]| (?# [\\u200C-\\u200D]|)
71
+ \\xE2\\x81[\\xB0-\\xBF]| (?# [\\u2070-\\u218F]|)
72
+ \\xE2[\\x82-\\x85][\\x80-\\xBF]| (?# ...)
73
+ \\xE2\\x86[\\x80-\\x8F]| (?# ...)
74
+ \\xE2[\\xB0-\\xBE][\\x80-\\xBF]| (?# [\\u2C00-\\u2FEF]|)
75
+ \\xE2\\xBF[\\x80-\\xAF]| (?# ...)
76
+ \\xE3\\x80[\\x81-\\xBF]| (?# [\\u3001-\\uD7FF]|)
77
+ \\xE3[\\x81-\\xBF][\\x80-\\xBF]| (?# ...)
78
+ [\\xE4-\\xEC][\\x80-\\xBF][\\x80-\\xBF]| (?# ...)
79
+ \\xED[\\x80-\\x9F][\\x80-\\xBF]| (?# ...)
80
+ \\xEF[\\xA4-\\xB6][\\x80-\\xBF]| (?# [\\uF900-\\uFDCF]|)
81
+ \\xEF\\xB7[\\x80-\\x8F]| (?# ...)
82
+ \\xEF\\xB7[\\xB0-\\xBF]| (?# [\\uFDF0-\\uFFFD]|)
83
+ \\xEF[\\xB8-\\xBE][\\x80-\\xBF]| (?# ...)
84
+ \\xEF\\xBF[\\x80-\\xBD]| (?# ...)
85
+ \\xF0[\\x90-\\xBF][\\x80-\\xBF][\\x80-\\xBF]| (?# [\\u{10000}-\\u{EFFFF}])
86
+ [\\xF1-\\xF2][\\x80-\\xBF][\\x80-\\xBF][\\x80-\\xBF]|
87
+ \\xF3[\\x80-\\xAF][\\x80-\\xBF][\\x80-\\xBF] (?# ...)
88
+ EOS
89
+ U_CHARS2 = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
90
+ \\xC2\\xB7| (?# \\u00B7|)
91
+ \\xCC[\\x80-\\xBF]|\\xCD[\\x80-\\xAF]| (?# [\\u0300-\\u036F]|)
92
+ \\xE2\\x80\\xBF|\\xE2\\x81\\x80 (?# [\\u203F-\\u2040])
93
+ EOS
94
+ end
95
+ end
96
+
97
+ include RUBY_VERSION >= '1.9' ? Unicode : UTF_8
98
+
99
+ KEYWORD = /#{KEYWORDS.join('|')}|#{FUNCTIONS.join('|')}/i
100
+ DELIMITER = /\^\^|[{}()\[\],;\.]/
101
+ OPERATOR = /a|\|\||&&|!=|<=|>=|[!=<>+\-*\/]/
102
+ COMMENT = /#.*/
103
+
104
+ PN_CHARS_BASE = /[A-Z]|[a-z]|#{U_CHARS1}/ # [95]
105
+ PN_CHARS_U = /_|#{PN_CHARS_BASE}/ # [96]
106
+ VARNAME = /(?:[0-9]|#{PN_CHARS_U})
107
+ (?:[0-9]|#{PN_CHARS_U}|#{U_CHARS2})*/x # [97]
108
+ PN_CHARS = /-|[0-9]|#{PN_CHARS_U}|#{U_CHARS2}/ # [98]
109
+ PN_CHARS_BODY = /(?:(?:\.|#{PN_CHARS})*#{PN_CHARS})?/
110
+ PN_PREFIX = /#{PN_CHARS_BASE}#{PN_CHARS_BODY}/ # [99]
111
+ PN_LOCAL = /(?:[0-9]|#{PN_CHARS_U})#{PN_CHARS_BODY}/ # [100]
112
+
113
+ IRI_REF = /<([^<>"{}|^`\\\x00-\x20]*)>/ # [70]
114
+ PNAME_NS = /(#{PN_PREFIX}?):/ # [71]
115
+ PNAME_LN = /#{PNAME_NS}(#{PN_LOCAL})/ # [72]
116
+ BLANK_NODE_LABEL = /_:(#{PN_LOCAL})/ # [73]
117
+ VAR1 = /\?(#{VARNAME})/ # [74]
118
+ VAR2 = /\$(#{VARNAME})/ # [75]
119
+ LANGTAG = /@([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)/ # [76]
120
+ INTEGER = /[0-9]+/ # [77]
121
+ DECIMAL = /(?:[0-9]+\.[0-9]*|\.[0-9]+)/ # [78]
122
+ EXPONENT = /[eE][+-]?[0-9]+/ # [86]
123
+ DOUBLE = /(?:[0-9]+\.[0-9]*|\.[0-9]+|[0-9]+)#{EXPONENT}/ # [79]
124
+ ECHAR = /\\[tbnrf\\"']/ # [91]
125
+ STRING_LITERAL1 = /'((?:[^\x27\x5C\x0A\x0D]|#{ECHAR})*)'/ # [87]
126
+ STRING_LITERAL2 = /"((?:[^\x22\x5C\x0A\x0D]|#{ECHAR})*)"/ # [88]
127
+ STRING_LITERAL_LONG1 = /'''((?:(?:'|'')?(?:[^'\\]|#{ECHAR})+)*)'''/m # [89]
128
+ STRING_LITERAL_LONG2 = /"""((?:(?:"|"")?(?:[^"\\]|#{ECHAR})+)*)"""/m # [90]
129
+ WS = /\x20|\x09|\x0D|\x0A/ # [93]
130
+ NIL = /\(#{WS}*\)/ # [92]
131
+ ANON = /\[#{WS}*\]/ # [94]
132
+
133
+ BooleanLiteral = /true|false/ # [65]
134
+ String = /#{STRING_LITERAL_LONG1}|#{STRING_LITERAL_LONG2}|
135
+ #{STRING_LITERAL1}|#{STRING_LITERAL2}/x # [66]
136
+
137
+ # Make all defined regular expression constants immutable:
138
+ constants.each { |name| const_get(name).freeze }
139
+
140
+ ##
141
+ # Returns a copy of the given `input` string with all `\uXXXX` and
142
+ # `\UXXXXXXXX` Unicode codepoint escape sequences replaced with their
143
+ # unescaped UTF-8 character counterparts.
144
+ #
145
+ # @param [String] input
146
+ # @return [String]
147
+ # @see http://www.w3.org/TR/rdf-sparql-query/#codepointEscape
148
+ def self.unescape_codepoints(input)
149
+ string = input.dup
150
+ string.force_encoding(Encoding::ASCII_8BIT) if string.respond_to?(:force_encoding) # Ruby 1.9+
151
+
152
+ # Decode \uXXXX and \UXXXXXXXX code points:
153
+ string.gsub!(ESCAPE_CHAR) do
154
+ s = [($1 || $2).hex].pack('U*')
155
+ s.respond_to?(:force_encoding) ? s.force_encoding(Encoding::ASCII_8BIT) : s
156
+ end
157
+
158
+ string.force_encoding(Encoding::UTF_8) if string.respond_to?(:force_encoding) # Ruby 1.9+
159
+ string
160
+ end
161
+
162
+ ##
163
+ # Returns a copy of the given `input` string with all string escape
164
+ # sequences (e.g. `\n` and `\t`) replaced with their unescaped UTF-8
165
+ # character counterparts.
166
+ #
167
+ # @param [String] input
168
+ # @return [String]
169
+ # @see http://www.w3.org/TR/rdf-sparql-query/#grammarEscapes
170
+ def self.unescape_string(input)
171
+ input.gsub(ECHAR) { |escaped| ESCAPE_CHARS[escaped] }
172
+ end
173
+
174
+ ##
175
+ # Tokenizes the given `input` string or stream.
176
+ #
177
+ # @param [String, #to_s] input
178
+ # @param [Hash{Symbol => Object}] options
179
+ # @yield [lexer]
180
+ # @yieldparam [Lexer] lexer
181
+ # @return [Lexer]
182
+ # @raise [Lexer::Error] on invalid input
183
+ def self.tokenize(input, options = {}, &block)
184
+ lexer = self.new(input, options)
185
+ block_given? ? block.call(lexer) : lexer
186
+ end
187
+
188
+ ##
189
+ # Initializes a new lexer instance.
190
+ #
191
+ # @param [String, #to_s] input
192
+ # @param [Hash{Symbol => Object}] options
193
+ def initialize(input = nil, options = {})
194
+ @options = options.dup
195
+ self.input = input if input
196
+ end
197
+
198
+ ##
199
+ # Any additional options for the lexer.
200
+ #
201
+ # @return [Hash]
202
+ attr_reader :options
203
+
204
+ ##
205
+ # The current input string being processed.
206
+ #
207
+ # @return [String]
208
+ attr_accessor :input
209
+
210
+ ##
211
+ # The current line number (zero-based).
212
+ #
213
+ # @return [Integer]
214
+ attr_reader :lineno
215
+
216
+ ##
217
+ # @param [String, #to_s] input
218
+ # @return [void]
219
+ def input=(input)
220
+ @input = case input
221
+ when ::String then input
222
+ when IO, StringIO then input.read
223
+ else input.to_s
224
+ end
225
+ @input = @input.dup
226
+ @input.force_encoding(Encoding::UTF_8) if @input.respond_to?(:force_encoding) # Ruby 1.9+
227
+ @input = self.class.unescape_codepoints(@input)
228
+ @lineno = 0
229
+ end
230
+
231
+ ##
232
+ # Returns `true` if the input string is lexically valid.
233
+ #
234
+ # To be considered valid, the input string must contain more than zero
235
+ # tokens, and must not contain any invalid tokens.
236
+ #
237
+ # @return [Boolean]
238
+ def valid?
239
+ begin
240
+ !count.zero?
241
+ rescue Error
242
+ false
243
+ end
244
+ end
245
+
246
+ ##
247
+ # Enumerates each token in the input string.
248
+ #
249
+ # @yield [token]
250
+ # @yieldparam [Token] token
251
+ # @return [Enumerator]
252
+ def each_token(&block)
253
+ if block_given?
254
+ @lineno = 0
255
+ @scanner = StringScanner.new(@input)
256
+ until scanner.eos?
257
+ case
258
+ when skip_whitespace
259
+ when skip_comment
260
+ when token = match_token
261
+ yield token
262
+ else
263
+ lexeme = (@scanner.rest.split(/#{WS}|#{COMMENT}/).first rescue nil) || @scanner.rest
264
+ raise Error.new("invalid token #{lexeme.inspect} on line #{lineno + 1}",
265
+ :input => input, :token => lexeme, :lineno => lineno)
266
+ end
267
+ end
268
+ @scanner = nil
269
+ end
270
+ enum_for(:each_token)
271
+ end
272
+ alias_method :each, :each_token
273
+
274
+ protected
275
+
276
+ # @return [StringScanner]
277
+ attr_reader :scanner
278
+
279
+ # @see http://www.w3.org/TR/rdf-sparql-query/#whitespace
280
+ def skip_whitespace
281
+ # skip all white space, but keep track of the current line number
282
+ if matched = scanner.scan(WS)
283
+ @lineno += matched.count("\n")
284
+ matched
285
+ end
286
+ end
287
+
288
+ # @see http://www.w3.org/TR/rdf-sparql-query/#grammarComments
289
+ def skip_comment
290
+ # skip the remainder of the current line
291
+ skipped = scanner.skip(COMMENT)
292
+ end
293
+
294
+ def match_token
295
+ match_var1 ||
296
+ match_var2 ||
297
+ match_iri_ref ||
298
+ match_pname_ln ||
299
+ match_pname_ns ||
300
+ match_string_long_1 ||
301
+ match_string_long_2 ||
302
+ match_string_1 ||
303
+ match_string_2 ||
304
+ match_langtag ||
305
+ match_double ||
306
+ match_decimal ||
307
+ match_integer ||
308
+ match_boolean_literal ||
309
+ match_blank_node_label||
310
+ match_nil ||
311
+ match_anon ||
312
+ match_keyword ||
313
+ match_delimiter ||
314
+ match_operator
315
+ end
316
+
317
+ def match_var1
318
+ if matched = scanner.scan(VAR1)
319
+ token(:VAR1, scanner[1].to_s)
320
+ end
321
+ end
322
+
323
+ def match_var2
324
+ if matched = scanner.scan(VAR2)
325
+ token(:VAR2, scanner[1].to_s)
326
+ end
327
+ end
328
+
329
+ def match_iri_ref
330
+ if matched = scanner.scan(IRI_REF)
331
+ token(:IRI_REF, scanner[1].to_s)
332
+ end
333
+ end
334
+
335
+ def match_pname_ln
336
+ if matched = scanner.scan(PNAME_LN)
337
+ token(:PNAME_LN, [scanner[1].empty? ? nil : scanner[1].to_s, scanner[2].to_s])
338
+ end
339
+ end
340
+
341
+ def match_pname_ns
342
+ if matched = scanner.scan(PNAME_NS)
343
+ token(:PNAME_NS, scanner[1].empty? ? nil : scanner[1].to_s)
344
+ end
345
+ end
346
+
347
+ def match_string_long_1
348
+ if matched = scanner.scan(STRING_LITERAL_LONG1)
349
+ token(:STRING_LITERAL_LONG1, self.class.unescape_string(scanner[1]))
350
+ end
351
+ end
352
+
353
+ def match_string_long_2
354
+ if matched = scanner.scan(STRING_LITERAL_LONG2)
355
+ token(:STRING_LITERAL_LONG2, self.class.unescape_string(scanner[1]))
356
+ end
357
+ end
358
+
359
+ def match_string_1
360
+ if matched = scanner.scan(STRING_LITERAL1)
361
+ token(:STRING_LITERAL1, self.class.unescape_string(scanner[1]))
362
+ end
363
+ end
364
+
365
+ def match_string_2
366
+ if matched = scanner.scan(STRING_LITERAL2)
367
+ token(:STRING_LITERAL2, self.class.unescape_string(scanner[1]))
368
+ end
369
+ end
370
+
371
+ def match_langtag
372
+ if matched = scanner.scan(LANGTAG)
373
+ token(:LANGTAG, scanner[1].to_s)
374
+ end
375
+ end
376
+
377
+ def match_double
378
+ if matched = scanner.scan(DOUBLE)
379
+ token(:DOUBLE, matched)
380
+ end
381
+ end
382
+
383
+ def match_decimal
384
+ if matched = scanner.scan(DECIMAL)
385
+ token(:DECIMAL, matched)
386
+ end
387
+ end
388
+
389
+ def match_integer
390
+ if matched = scanner.scan(INTEGER)
391
+ token(:INTEGER, matched)
392
+ end
393
+ end
394
+
395
+ def match_boolean_literal
396
+ if matched = scanner.scan(BooleanLiteral)
397
+ token(:BooleanLiteral, matched)
398
+ end
399
+ end
400
+
401
+ def match_blank_node_label
402
+ if matched = scanner.scan(BLANK_NODE_LABEL)
403
+ token(:BLANK_NODE_LABEL, scanner[1].to_s)
404
+ end
405
+ end
406
+
407
+ def match_nil
408
+ if matched = scanner.scan(NIL)
409
+ token(:NIL)
410
+ end
411
+ end
412
+
413
+ def match_anon
414
+ if matched = scanner.scan(ANON)
415
+ token(:ANON)
416
+ end
417
+ end
418
+
419
+ def match_keyword
420
+ if matched = scanner.scan(KEYWORD)
421
+ token(nil, matched.upcase.to_s)
422
+ end
423
+ end
424
+
425
+ def match_delimiter
426
+ if matched = scanner.scan(DELIMITER)
427
+ token(nil, matched.to_s)
428
+ end
429
+ end
430
+
431
+ def match_operator
432
+ if matched = scanner.scan(OPERATOR)
433
+ token(nil, matched.to_s)
434
+ end
435
+ end
436
+
437
+ protected
438
+
439
+ ##
440
+ # Constructs a new token object annotated with the current line number.
441
+ #
442
+ # The parser relies on the type being a symbolized URI and the value being
443
+ # a string, if there is no type. If there is a type, then the value takes
444
+ # on the native representation appropriate for that type.
445
+ #
446
+ # @param [Symbol] type
447
+ # @param [Object] value
448
+ # @return [Token]
449
+ def token(type, value = nil)
450
+ Token.new(type, value, :lineno => lineno)
451
+ end
452
+
453
+ ##
454
+ # Represents a lexer token.
455
+ #
456
+ # @example Creating a new token
457
+ # token = SPARQL::Grammar::Lexer::Token.new(:LANGTAG, :en)
458
+ # token.type #=> :LANGTAG
459
+ # token.value #=> "en"
460
+ #
461
+ # @see http://en.wikipedia.org/wiki/Lexical_analysis#Token
462
+ class Token
463
+ ##
464
+ # Initializes a new token instance.
465
+ #
466
+ # @param [Symbol] type
467
+ # @param [Object] value
468
+ # @param [Hash{Symbol => Object}] options
469
+ # @option options [Integer] :lineno (nil)
470
+ def initialize(type, value = nil, options = {})
471
+ @type, @value = (type ? type.to_s.to_sym : nil), value
472
+ @options = options.dup
473
+ @lineno = @options.delete(:lineno)
474
+ end
475
+
476
+ ##
477
+ # The token's symbol type.
478
+ #
479
+ # @return [Symbol]
480
+ attr_reader :type
481
+
482
+ ##
483
+ # The token's value.
484
+ #
485
+ # @return [Object]
486
+ attr_reader :value
487
+
488
+ ##
489
+ # The line number where the token was encountered.
490
+ #
491
+ # @return [Integer]
492
+ attr_reader :lineno
493
+
494
+ ##
495
+ # Any additional options for the token.
496
+ #
497
+ # @return [Hash]
498
+ attr_reader :options
499
+
500
+ ##
501
+ # Returns the attribute named by `key`.
502
+ #
503
+ # @param [Symbol] key
504
+ # @return [Object]
505
+ def [](key)
506
+ key = key.to_s.to_sym unless key.is_a?(Integer) || key.is_a?(Symbol)
507
+ case key
508
+ when 0, :type then @type
509
+ when 1, :value then @value
510
+ else nil
511
+ end
512
+ end
513
+
514
+ ##
515
+ # Returns `true` if the given `value` matches either the type or value
516
+ # of this token.
517
+ #
518
+ # @example Matching using the symbolic type
519
+ # SPARQL::Grammar::Lexer::Token.new(:NIL) === :NIL #=> true
520
+ #
521
+ # @example Matching using the string value
522
+ # SPARQL::Grammar::Lexer::Token.new(nil, "{") === "{" #=> true
523
+ #
524
+ # @param [Symbol, String] value
525
+ # @return [Boolean]
526
+ def ===(value)
527
+ case value
528
+ when Symbol then value == @type
529
+ when ::String then value.to_s == @value.to_s
530
+ else value == @value
531
+ end
532
+ end
533
+
534
+ ##
535
+ # Returns a hash table representation of this token.
536
+ #
537
+ # @return [Hash]
538
+ def to_hash
539
+ {:type => @type, :value => @value}
540
+ end
541
+
542
+ ##
543
+ # Returns type, if not nil, otherwise value
544
+ def representation
545
+ @type ? @type : @value
546
+ end
547
+
548
+ ##
549
+ # Returns an array representation of this token.
550
+ #
551
+ # @return [Array]
552
+ def to_a
553
+ [@type, @value]
554
+ end
555
+
556
+ ##
557
+ # Returns a developer-friendly representation of this token.
558
+ #
559
+ # @return [String]
560
+ def inspect
561
+ to_hash.inspect
562
+ end
563
+ end # class Token
564
+
565
+ ##
566
+ # Raised for errors during lexical analysis.
567
+ #
568
+ # @example Raising a lexer error
569
+ # raise SPARQL::Grammar::Lexer::Error.new(
570
+ # "invalid token '%' on line 10",
571
+ # :input => query, :token => '%', :lineno => 9)
572
+ #
573
+ # @see http://ruby-doc.org/core/classes/StandardError.html
574
+ class Error < StandardError
575
+ ##
576
+ # The input string associated with the error.
577
+ #
578
+ # @return [String]
579
+ attr_reader :input
580
+
581
+ ##
582
+ # The invalid token which triggered the error.
583
+ #
584
+ # @return [String]
585
+ attr_reader :token
586
+
587
+ ##
588
+ # The line number where the error occurred.
589
+ #
590
+ # @return [Integer]
591
+ attr_reader :lineno
592
+
593
+ ##
594
+ # Initializes a new lexer error instance.
595
+ #
596
+ # @param [String, #to_s] message
597
+ # @param [Hash{Symbol => Object}] options
598
+ # @option options [String] :input (nil)
599
+ # @option options [String] :token (nil)
600
+ # @option options [Integer] :lineno (nil)
601
+ def initialize(message, options = {})
602
+ @input = options[:input]
603
+ @token = options[:token]
604
+ @lineno = options[:lineno]
605
+ super(message.to_s)
606
+ end
607
+ end # class Error
608
+ end # class Lexer
609
+ end; end # module SPARQL::Grammar