sparql 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/AUTHORS +3 -0
- data/CREDITS +0 -0
- data/README.markdown +103 -53
- data/UNLICENSE +24 -0
- data/VERSION +1 -0
- data/bin/sparql +87 -0
- data/lib/sparql.rb +105 -22
- data/lib/sparql/algebra.rb +369 -0
- data/lib/sparql/algebra/evaluatable.rb +37 -0
- data/lib/sparql/algebra/expression.rb +284 -0
- data/lib/sparql/algebra/extensions.rb +159 -0
- data/lib/sparql/algebra/operator.rb +492 -0
- data/lib/sparql/algebra/operator/add.rb +34 -0
- data/lib/sparql/algebra/operator/and.rb +65 -0
- data/lib/sparql/algebra/operator/asc.rb +29 -0
- data/lib/sparql/algebra/operator/ask.rb +46 -0
- data/lib/sparql/algebra/operator/base.rb +46 -0
- data/lib/sparql/algebra/operator/bgp.rb +26 -0
- data/lib/sparql/algebra/operator/bound.rb +48 -0
- data/lib/sparql/algebra/operator/compare.rb +84 -0
- data/lib/sparql/algebra/operator/construct.rb +85 -0
- data/lib/sparql/algebra/operator/dataset.rb +77 -0
- data/lib/sparql/algebra/operator/datatype.rb +42 -0
- data/lib/sparql/algebra/operator/desc.rb +17 -0
- data/lib/sparql/algebra/operator/describe.rb +71 -0
- data/lib/sparql/algebra/operator/distinct.rb +50 -0
- data/lib/sparql/algebra/operator/divide.rb +43 -0
- data/lib/sparql/algebra/operator/equal.rb +32 -0
- data/lib/sparql/algebra/operator/exprlist.rb +52 -0
- data/lib/sparql/algebra/operator/filter.rb +71 -0
- data/lib/sparql/algebra/operator/graph.rb +28 -0
- data/lib/sparql/algebra/operator/greater_than.rb +32 -0
- data/lib/sparql/algebra/operator/greater_than_or_equal.rb +33 -0
- data/lib/sparql/algebra/operator/is_blank.rb +35 -0
- data/lib/sparql/algebra/operator/is_iri.rb +37 -0
- data/lib/sparql/algebra/operator/is_literal.rb +36 -0
- data/lib/sparql/algebra/operator/join.rb +67 -0
- data/lib/sparql/algebra/operator/lang.rb +29 -0
- data/lib/sparql/algebra/operator/lang_matches.rb +53 -0
- data/lib/sparql/algebra/operator/left_join.rb +95 -0
- data/lib/sparql/algebra/operator/less_than.rb +32 -0
- data/lib/sparql/algebra/operator/less_than_or_equal.rb +32 -0
- data/lib/sparql/algebra/operator/minus.rb +31 -0
- data/lib/sparql/algebra/operator/multiply.rb +34 -0
- data/lib/sparql/algebra/operator/not.rb +35 -0
- data/lib/sparql/algebra/operator/not_equal.rb +26 -0
- data/lib/sparql/algebra/operator/or.rb +65 -0
- data/lib/sparql/algebra/operator/order.rb +69 -0
- data/lib/sparql/algebra/operator/plus.rb +31 -0
- data/lib/sparql/algebra/operator/prefix.rb +45 -0
- data/lib/sparql/algebra/operator/project.rb +46 -0
- data/lib/sparql/algebra/operator/reduced.rb +47 -0
- data/lib/sparql/algebra/operator/regex.rb +70 -0
- data/lib/sparql/algebra/operator/same_term.rb +46 -0
- data/lib/sparql/algebra/operator/slice.rb +60 -0
- data/lib/sparql/algebra/operator/str.rb +35 -0
- data/lib/sparql/algebra/operator/subtract.rb +32 -0
- data/lib/sparql/algebra/operator/union.rb +55 -0
- data/lib/sparql/algebra/query.rb +99 -0
- data/lib/sparql/algebra/sxp_extensions.rb +35 -0
- data/lib/sparql/algebra/version.rb +20 -0
- data/lib/sparql/extensions.rb +102 -0
- data/lib/sparql/grammar.rb +298 -0
- data/lib/sparql/grammar/lexer.rb +609 -0
- data/lib/sparql/grammar/parser.rb +1383 -0
- data/lib/sparql/grammar/parser/meta.rb +1801 -0
- data/lib/sparql/results.rb +220 -0
- data/lib/sparql/version.rb +20 -0
- metadata +232 -62
- data/Rakefile +0 -22
- data/coverage/index.html +0 -252
- data/coverage/lib-sparql-execute_sparql_rb.html +0 -621
- data/coverage/lib-sparql_rb.html +0 -622
- data/lib/sparql/execute_sparql.rb +0 -27
- data/lib/sparql/sparql.treetop +0 -159
- data/sparql.gemspec +0 -16
- data/spec/spec.opts +0 -2
- data/spec/spec_helper.rb +0 -24
- data/spec/unit/graph_parsing_spec.rb +0 -76
- data/spec/unit/iri_parsing_spec.rb +0 -46
- data/spec/unit/prefixed_names_parsing_spec.rb +0 -40
- data/spec/unit/primitives_parsing_spec.rb +0 -26
- data/spec/unit/sparql_parsing_spec.rb +0 -72
- data/spec/unit/variables_parsing_spec.rb +0 -36
@@ -0,0 +1,609 @@
|
|
1
|
+
require 'strscan' unless defined?(StringScanner)
|
2
|
+
require 'bigdecimal' unless defined?(BigDecimal)
|
3
|
+
|
4
|
+
module SPARQL; module Grammar
|
5
|
+
##
|
6
|
+
# A lexical analyzer for the SPARQL 1.0 grammar.
|
7
|
+
#
|
8
|
+
# Note that productions [80]-[85] have been incorporated directly into
|
9
|
+
# [77], [78], [79].
|
10
|
+
#
|
11
|
+
# @example Tokenizing a SPARQL query string
|
12
|
+
# query = "SELECT * WHERE { ?s ?p ?o }"
|
13
|
+
# lexer = SPARQL::Grammar::Lexer.tokenize(query)
|
14
|
+
# lexer.each_token do |token|
|
15
|
+
# puts token.inspect
|
16
|
+
# end
|
17
|
+
#
|
18
|
+
# @example Handling error conditions
|
19
|
+
# begin
|
20
|
+
# SPARQL::Grammar::Lexer.tokenize(query)
|
21
|
+
# rescue SPARQL::Grammar::Lexer::Error => error
|
22
|
+
# warn error.inspect
|
23
|
+
# end
|
24
|
+
#
|
25
|
+
# @see http://www.w3.org/TR/rdf-sparql-query/#grammar
|
26
|
+
# @see http://en.wikipedia.org/wiki/Lexical_analysis
|
27
|
+
class Lexer
|
28
|
+
include Enumerable
|
29
|
+
|
30
|
+
ESCAPE_CHARS = {
|
31
|
+
'\t' => "\t", # \u0009 (tab)
|
32
|
+
'\n' => "\n", # \u000A (line feed)
|
33
|
+
'\r' => "\r", # \u000D (carriage return)
|
34
|
+
'\b' => "\b", # \u0008 (backspace)
|
35
|
+
'\f' => "\f", # \u000C (form feed)
|
36
|
+
'\\"' => '"', # \u0022 (quotation mark, double quote mark)
|
37
|
+
'\\\'' => '\'', # \u0027 (apostrophe-quote, single quote mark)
|
38
|
+
'\\\\' => '\\' # \u005C (backslash)
|
39
|
+
}
|
40
|
+
ESCAPE_CHAR4 = /\\u([0-9A-Fa-f]{4,4})/ # \uXXXX
|
41
|
+
ESCAPE_CHAR8 = /\\U([0-9A-Fa-f]{8,8})/ # \UXXXXXXXX
|
42
|
+
ESCAPE_CHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}/
|
43
|
+
|
44
|
+
##
|
45
|
+
# Unicode regular expressions for Ruby 1.9+ with the Oniguruma engine.
|
46
|
+
module Unicode
|
47
|
+
if RUBY_VERSION >= '1.9'
|
48
|
+
U_CHARS1 = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
|
49
|
+
[\\u00C0-\\u00D6]|[\\u00D8-\\u00F6]|[\\u00F8-\\u02FF]|
|
50
|
+
[\\u0370-\\u037D]|[\\u037F-\\u1FFF]|[\\u200C-\\u200D]|
|
51
|
+
[\\u2070-\\u218F]|[\\u2C00-\\u2FEF]|[\\u3001-\\uD7FF]|
|
52
|
+
[\\uF900-\\uFDCF]|[\\uFDF0-\\uFFFD]|[\\u{10000}-\\u{EFFFF}]
|
53
|
+
EOS
|
54
|
+
U_CHARS2 = Regexp.compile("\\u00B7|[\\u0300-\\u036F]|[\\u203F-\\u2040]")
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
##
|
59
|
+
# UTF-8 regular expressions for Ruby 1.8.x.
|
60
|
+
module UTF_8
|
61
|
+
if RUBY_VERSION < '1.9'
|
62
|
+
U_CHARS1 = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
|
63
|
+
\\xC3[\\x80-\\x96]| (?# [\\u00C0-\\u00D6]|)
|
64
|
+
\\xC3[\\x98-\\xB6]| (?# [\\u00D8-\\u00F6]|)
|
65
|
+
\\xC3[\\xB8-\\xBF]|[\\xC4-\\xCB][\\x80-\\xBF]| (?# [\\u00F8-\\u02FF]|)
|
66
|
+
\\xCD[\\xB0-\\xBD]| (?# [\\u0370-\\u037D]|)
|
67
|
+
\\xCD\\xBF|[\\xCE-\\xDF][\\x80-\\xBF]| (?# [\\u037F-\\u1FFF]|)
|
68
|
+
\\xE0[\\xA0-\\xBF][\\x80-\\xBF]| (?# ...)
|
69
|
+
\\xE1[\\x80-\\xBF][\\x80-\\xBF]| (?# ...)
|
70
|
+
\\xE2\\x80[\\x8C-\\x8D]| (?# [\\u200C-\\u200D]|)
|
71
|
+
\\xE2\\x81[\\xB0-\\xBF]| (?# [\\u2070-\\u218F]|)
|
72
|
+
\\xE2[\\x82-\\x85][\\x80-\\xBF]| (?# ...)
|
73
|
+
\\xE2\\x86[\\x80-\\x8F]| (?# ...)
|
74
|
+
\\xE2[\\xB0-\\xBE][\\x80-\\xBF]| (?# [\\u2C00-\\u2FEF]|)
|
75
|
+
\\xE2\\xBF[\\x80-\\xAF]| (?# ...)
|
76
|
+
\\xE3\\x80[\\x81-\\xBF]| (?# [\\u3001-\\uD7FF]|)
|
77
|
+
\\xE3[\\x81-\\xBF][\\x80-\\xBF]| (?# ...)
|
78
|
+
[\\xE4-\\xEC][\\x80-\\xBF][\\x80-\\xBF]| (?# ...)
|
79
|
+
\\xED[\\x80-\\x9F][\\x80-\\xBF]| (?# ...)
|
80
|
+
\\xEF[\\xA4-\\xB6][\\x80-\\xBF]| (?# [\\uF900-\\uFDCF]|)
|
81
|
+
\\xEF\\xB7[\\x80-\\x8F]| (?# ...)
|
82
|
+
\\xEF\\xB7[\\xB0-\\xBF]| (?# [\\uFDF0-\\uFFFD]|)
|
83
|
+
\\xEF[\\xB8-\\xBE][\\x80-\\xBF]| (?# ...)
|
84
|
+
\\xEF\\xBF[\\x80-\\xBD]| (?# ...)
|
85
|
+
\\xF0[\\x90-\\xBF][\\x80-\\xBF][\\x80-\\xBF]| (?# [\\u{10000}-\\u{EFFFF}])
|
86
|
+
[\\xF1-\\xF2][\\x80-\\xBF][\\x80-\\xBF][\\x80-\\xBF]|
|
87
|
+
\\xF3[\\x80-\\xAF][\\x80-\\xBF][\\x80-\\xBF] (?# ...)
|
88
|
+
EOS
|
89
|
+
U_CHARS2 = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
|
90
|
+
\\xC2\\xB7| (?# \\u00B7|)
|
91
|
+
\\xCC[\\x80-\\xBF]|\\xCD[\\x80-\\xAF]| (?# [\\u0300-\\u036F]|)
|
92
|
+
\\xE2\\x80\\xBF|\\xE2\\x81\\x80 (?# [\\u203F-\\u2040])
|
93
|
+
EOS
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
include RUBY_VERSION >= '1.9' ? Unicode : UTF_8
|
98
|
+
|
99
|
+
KEYWORD = /#{KEYWORDS.join('|')}|#{FUNCTIONS.join('|')}/i
|
100
|
+
DELIMITER = /\^\^|[{}()\[\],;\.]/
|
101
|
+
OPERATOR = /a|\|\||&&|!=|<=|>=|[!=<>+\-*\/]/
|
102
|
+
COMMENT = /#.*/
|
103
|
+
|
104
|
+
PN_CHARS_BASE = /[A-Z]|[a-z]|#{U_CHARS1}/ # [95]
|
105
|
+
PN_CHARS_U = /_|#{PN_CHARS_BASE}/ # [96]
|
106
|
+
VARNAME = /(?:[0-9]|#{PN_CHARS_U})
|
107
|
+
(?:[0-9]|#{PN_CHARS_U}|#{U_CHARS2})*/x # [97]
|
108
|
+
PN_CHARS = /-|[0-9]|#{PN_CHARS_U}|#{U_CHARS2}/ # [98]
|
109
|
+
PN_CHARS_BODY = /(?:(?:\.|#{PN_CHARS})*#{PN_CHARS})?/
|
110
|
+
PN_PREFIX = /#{PN_CHARS_BASE}#{PN_CHARS_BODY}/ # [99]
|
111
|
+
PN_LOCAL = /(?:[0-9]|#{PN_CHARS_U})#{PN_CHARS_BODY}/ # [100]
|
112
|
+
|
113
|
+
IRI_REF = /<([^<>"{}|^`\\\x00-\x20]*)>/ # [70]
|
114
|
+
PNAME_NS = /(#{PN_PREFIX}?):/ # [71]
|
115
|
+
PNAME_LN = /#{PNAME_NS}(#{PN_LOCAL})/ # [72]
|
116
|
+
BLANK_NODE_LABEL = /_:(#{PN_LOCAL})/ # [73]
|
117
|
+
VAR1 = /\?(#{VARNAME})/ # [74]
|
118
|
+
VAR2 = /\$(#{VARNAME})/ # [75]
|
119
|
+
LANGTAG = /@([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)/ # [76]
|
120
|
+
INTEGER = /[0-9]+/ # [77]
|
121
|
+
DECIMAL = /(?:[0-9]+\.[0-9]*|\.[0-9]+)/ # [78]
|
122
|
+
EXPONENT = /[eE][+-]?[0-9]+/ # [86]
|
123
|
+
DOUBLE = /(?:[0-9]+\.[0-9]*|\.[0-9]+|[0-9]+)#{EXPONENT}/ # [79]
|
124
|
+
ECHAR = /\\[tbnrf\\"']/ # [91]
|
125
|
+
STRING_LITERAL1 = /'((?:[^\x27\x5C\x0A\x0D]|#{ECHAR})*)'/ # [87]
|
126
|
+
STRING_LITERAL2 = /"((?:[^\x22\x5C\x0A\x0D]|#{ECHAR})*)"/ # [88]
|
127
|
+
STRING_LITERAL_LONG1 = /'''((?:(?:'|'')?(?:[^'\\]|#{ECHAR})+)*)'''/m # [89]
|
128
|
+
STRING_LITERAL_LONG2 = /"""((?:(?:"|"")?(?:[^"\\]|#{ECHAR})+)*)"""/m # [90]
|
129
|
+
WS = /\x20|\x09|\x0D|\x0A/ # [93]
|
130
|
+
NIL = /\(#{WS}*\)/ # [92]
|
131
|
+
ANON = /\[#{WS}*\]/ # [94]
|
132
|
+
|
133
|
+
BooleanLiteral = /true|false/ # [65]
|
134
|
+
String = /#{STRING_LITERAL_LONG1}|#{STRING_LITERAL_LONG2}|
|
135
|
+
#{STRING_LITERAL1}|#{STRING_LITERAL2}/x # [66]
|
136
|
+
|
137
|
+
# Make all defined regular expression constants immutable:
|
138
|
+
constants.each { |name| const_get(name).freeze }
|
139
|
+
|
140
|
+
##
|
141
|
+
# Returns a copy of the given `input` string with all `\uXXXX` and
|
142
|
+
# `\UXXXXXXXX` Unicode codepoint escape sequences replaced with their
|
143
|
+
# unescaped UTF-8 character counterparts.
|
144
|
+
#
|
145
|
+
# @param [String] input
|
146
|
+
# @return [String]
|
147
|
+
# @see http://www.w3.org/TR/rdf-sparql-query/#codepointEscape
|
148
|
+
def self.unescape_codepoints(input)
|
149
|
+
string = input.dup
|
150
|
+
string.force_encoding(Encoding::ASCII_8BIT) if string.respond_to?(:force_encoding) # Ruby 1.9+
|
151
|
+
|
152
|
+
# Decode \uXXXX and \UXXXXXXXX code points:
|
153
|
+
string.gsub!(ESCAPE_CHAR) do
|
154
|
+
s = [($1 || $2).hex].pack('U*')
|
155
|
+
s.respond_to?(:force_encoding) ? s.force_encoding(Encoding::ASCII_8BIT) : s
|
156
|
+
end
|
157
|
+
|
158
|
+
string.force_encoding(Encoding::UTF_8) if string.respond_to?(:force_encoding) # Ruby 1.9+
|
159
|
+
string
|
160
|
+
end
|
161
|
+
|
162
|
+
##
|
163
|
+
# Returns a copy of the given `input` string with all string escape
|
164
|
+
# sequences (e.g. `\n` and `\t`) replaced with their unescaped UTF-8
|
165
|
+
# character counterparts.
|
166
|
+
#
|
167
|
+
# @param [String] input
|
168
|
+
# @return [String]
|
169
|
+
# @see http://www.w3.org/TR/rdf-sparql-query/#grammarEscapes
|
170
|
+
def self.unescape_string(input)
|
171
|
+
input.gsub(ECHAR) { |escaped| ESCAPE_CHARS[escaped] }
|
172
|
+
end
|
173
|
+
|
174
|
+
##
|
175
|
+
# Tokenizes the given `input` string or stream.
|
176
|
+
#
|
177
|
+
# @param [String, #to_s] input
|
178
|
+
# @param [Hash{Symbol => Object}] options
|
179
|
+
# @yield [lexer]
|
180
|
+
# @yieldparam [Lexer] lexer
|
181
|
+
# @return [Lexer]
|
182
|
+
# @raise [Lexer::Error] on invalid input
|
183
|
+
def self.tokenize(input, options = {}, &block)
|
184
|
+
lexer = self.new(input, options)
|
185
|
+
block_given? ? block.call(lexer) : lexer
|
186
|
+
end
|
187
|
+
|
188
|
+
##
|
189
|
+
# Initializes a new lexer instance.
|
190
|
+
#
|
191
|
+
# @param [String, #to_s] input
|
192
|
+
# @param [Hash{Symbol => Object}] options
|
193
|
+
def initialize(input = nil, options = {})
|
194
|
+
@options = options.dup
|
195
|
+
self.input = input if input
|
196
|
+
end
|
197
|
+
|
198
|
+
##
|
199
|
+
# Any additional options for the lexer.
|
200
|
+
#
|
201
|
+
# @return [Hash]
|
202
|
+
attr_reader :options
|
203
|
+
|
204
|
+
##
|
205
|
+
# The current input string being processed.
|
206
|
+
#
|
207
|
+
# @return [String]
|
208
|
+
attr_accessor :input
|
209
|
+
|
210
|
+
##
|
211
|
+
# The current line number (zero-based).
|
212
|
+
#
|
213
|
+
# @return [Integer]
|
214
|
+
attr_reader :lineno
|
215
|
+
|
216
|
+
##
|
217
|
+
# @param [String, #to_s] input
|
218
|
+
# @return [void]
|
219
|
+
def input=(input)
|
220
|
+
@input = case input
|
221
|
+
when ::String then input
|
222
|
+
when IO, StringIO then input.read
|
223
|
+
else input.to_s
|
224
|
+
end
|
225
|
+
@input = @input.dup
|
226
|
+
@input.force_encoding(Encoding::UTF_8) if @input.respond_to?(:force_encoding) # Ruby 1.9+
|
227
|
+
@input = self.class.unescape_codepoints(@input)
|
228
|
+
@lineno = 0
|
229
|
+
end
|
230
|
+
|
231
|
+
##
|
232
|
+
# Returns `true` if the input string is lexically valid.
|
233
|
+
#
|
234
|
+
# To be considered valid, the input string must contain more than zero
|
235
|
+
# tokens, and must not contain any invalid tokens.
|
236
|
+
#
|
237
|
+
# @return [Boolean]
|
238
|
+
def valid?
|
239
|
+
begin
|
240
|
+
!count.zero?
|
241
|
+
rescue Error
|
242
|
+
false
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
##
|
247
|
+
# Enumerates each token in the input string.
|
248
|
+
#
|
249
|
+
# @yield [token]
|
250
|
+
# @yieldparam [Token] token
|
251
|
+
# @return [Enumerator]
|
252
|
+
def each_token(&block)
|
253
|
+
if block_given?
|
254
|
+
@lineno = 0
|
255
|
+
@scanner = StringScanner.new(@input)
|
256
|
+
until scanner.eos?
|
257
|
+
case
|
258
|
+
when skip_whitespace
|
259
|
+
when skip_comment
|
260
|
+
when token = match_token
|
261
|
+
yield token
|
262
|
+
else
|
263
|
+
lexeme = (@scanner.rest.split(/#{WS}|#{COMMENT}/).first rescue nil) || @scanner.rest
|
264
|
+
raise Error.new("invalid token #{lexeme.inspect} on line #{lineno + 1}",
|
265
|
+
:input => input, :token => lexeme, :lineno => lineno)
|
266
|
+
end
|
267
|
+
end
|
268
|
+
@scanner = nil
|
269
|
+
end
|
270
|
+
enum_for(:each_token)
|
271
|
+
end
|
272
|
+
alias_method :each, :each_token
|
273
|
+
|
274
|
+
protected
|
275
|
+
|
276
|
+
# @return [StringScanner]
|
277
|
+
attr_reader :scanner
|
278
|
+
|
279
|
+
# @see http://www.w3.org/TR/rdf-sparql-query/#whitespace
|
280
|
+
def skip_whitespace
|
281
|
+
# skip all white space, but keep track of the current line number
|
282
|
+
if matched = scanner.scan(WS)
|
283
|
+
@lineno += matched.count("\n")
|
284
|
+
matched
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
# @see http://www.w3.org/TR/rdf-sparql-query/#grammarComments
|
289
|
+
def skip_comment
|
290
|
+
# skip the remainder of the current line
|
291
|
+
skipped = scanner.skip(COMMENT)
|
292
|
+
end
|
293
|
+
|
294
|
+
def match_token
|
295
|
+
match_var1 ||
|
296
|
+
match_var2 ||
|
297
|
+
match_iri_ref ||
|
298
|
+
match_pname_ln ||
|
299
|
+
match_pname_ns ||
|
300
|
+
match_string_long_1 ||
|
301
|
+
match_string_long_2 ||
|
302
|
+
match_string_1 ||
|
303
|
+
match_string_2 ||
|
304
|
+
match_langtag ||
|
305
|
+
match_double ||
|
306
|
+
match_decimal ||
|
307
|
+
match_integer ||
|
308
|
+
match_boolean_literal ||
|
309
|
+
match_blank_node_label||
|
310
|
+
match_nil ||
|
311
|
+
match_anon ||
|
312
|
+
match_keyword ||
|
313
|
+
match_delimiter ||
|
314
|
+
match_operator
|
315
|
+
end
|
316
|
+
|
317
|
+
def match_var1
|
318
|
+
if matched = scanner.scan(VAR1)
|
319
|
+
token(:VAR1, scanner[1].to_s)
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
def match_var2
|
324
|
+
if matched = scanner.scan(VAR2)
|
325
|
+
token(:VAR2, scanner[1].to_s)
|
326
|
+
end
|
327
|
+
end
|
328
|
+
|
329
|
+
def match_iri_ref
|
330
|
+
if matched = scanner.scan(IRI_REF)
|
331
|
+
token(:IRI_REF, scanner[1].to_s)
|
332
|
+
end
|
333
|
+
end
|
334
|
+
|
335
|
+
def match_pname_ln
|
336
|
+
if matched = scanner.scan(PNAME_LN)
|
337
|
+
token(:PNAME_LN, [scanner[1].empty? ? nil : scanner[1].to_s, scanner[2].to_s])
|
338
|
+
end
|
339
|
+
end
|
340
|
+
|
341
|
+
def match_pname_ns
|
342
|
+
if matched = scanner.scan(PNAME_NS)
|
343
|
+
token(:PNAME_NS, scanner[1].empty? ? nil : scanner[1].to_s)
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
def match_string_long_1
|
348
|
+
if matched = scanner.scan(STRING_LITERAL_LONG1)
|
349
|
+
token(:STRING_LITERAL_LONG1, self.class.unescape_string(scanner[1]))
|
350
|
+
end
|
351
|
+
end
|
352
|
+
|
353
|
+
def match_string_long_2
|
354
|
+
if matched = scanner.scan(STRING_LITERAL_LONG2)
|
355
|
+
token(:STRING_LITERAL_LONG2, self.class.unescape_string(scanner[1]))
|
356
|
+
end
|
357
|
+
end
|
358
|
+
|
359
|
+
def match_string_1
|
360
|
+
if matched = scanner.scan(STRING_LITERAL1)
|
361
|
+
token(:STRING_LITERAL1, self.class.unescape_string(scanner[1]))
|
362
|
+
end
|
363
|
+
end
|
364
|
+
|
365
|
+
def match_string_2
|
366
|
+
if matched = scanner.scan(STRING_LITERAL2)
|
367
|
+
token(:STRING_LITERAL2, self.class.unescape_string(scanner[1]))
|
368
|
+
end
|
369
|
+
end
|
370
|
+
|
371
|
+
def match_langtag
|
372
|
+
if matched = scanner.scan(LANGTAG)
|
373
|
+
token(:LANGTAG, scanner[1].to_s)
|
374
|
+
end
|
375
|
+
end
|
376
|
+
|
377
|
+
def match_double
|
378
|
+
if matched = scanner.scan(DOUBLE)
|
379
|
+
token(:DOUBLE, matched)
|
380
|
+
end
|
381
|
+
end
|
382
|
+
|
383
|
+
def match_decimal
|
384
|
+
if matched = scanner.scan(DECIMAL)
|
385
|
+
token(:DECIMAL, matched)
|
386
|
+
end
|
387
|
+
end
|
388
|
+
|
389
|
+
def match_integer
|
390
|
+
if matched = scanner.scan(INTEGER)
|
391
|
+
token(:INTEGER, matched)
|
392
|
+
end
|
393
|
+
end
|
394
|
+
|
395
|
+
def match_boolean_literal
|
396
|
+
if matched = scanner.scan(BooleanLiteral)
|
397
|
+
token(:BooleanLiteral, matched)
|
398
|
+
end
|
399
|
+
end
|
400
|
+
|
401
|
+
def match_blank_node_label
|
402
|
+
if matched = scanner.scan(BLANK_NODE_LABEL)
|
403
|
+
token(:BLANK_NODE_LABEL, scanner[1].to_s)
|
404
|
+
end
|
405
|
+
end
|
406
|
+
|
407
|
+
def match_nil
|
408
|
+
if matched = scanner.scan(NIL)
|
409
|
+
token(:NIL)
|
410
|
+
end
|
411
|
+
end
|
412
|
+
|
413
|
+
def match_anon
|
414
|
+
if matched = scanner.scan(ANON)
|
415
|
+
token(:ANON)
|
416
|
+
end
|
417
|
+
end
|
418
|
+
|
419
|
+
def match_keyword
|
420
|
+
if matched = scanner.scan(KEYWORD)
|
421
|
+
token(nil, matched.upcase.to_s)
|
422
|
+
end
|
423
|
+
end
|
424
|
+
|
425
|
+
def match_delimiter
|
426
|
+
if matched = scanner.scan(DELIMITER)
|
427
|
+
token(nil, matched.to_s)
|
428
|
+
end
|
429
|
+
end
|
430
|
+
|
431
|
+
def match_operator
|
432
|
+
if matched = scanner.scan(OPERATOR)
|
433
|
+
token(nil, matched.to_s)
|
434
|
+
end
|
435
|
+
end
|
436
|
+
|
437
|
+
protected
|
438
|
+
|
439
|
+
##
|
440
|
+
# Constructs a new token object annotated with the current line number.
|
441
|
+
#
|
442
|
+
# The parser relies on the type being a symbolized URI and the value being
|
443
|
+
# a string, if there is no type. If there is a type, then the value takes
|
444
|
+
# on the native representation appropriate for that type.
|
445
|
+
#
|
446
|
+
# @param [Symbol] type
|
447
|
+
# @param [Object] value
|
448
|
+
# @return [Token]
|
449
|
+
def token(type, value = nil)
|
450
|
+
Token.new(type, value, :lineno => lineno)
|
451
|
+
end
|
452
|
+
|
453
|
+
##
|
454
|
+
# Represents a lexer token.
|
455
|
+
#
|
456
|
+
# @example Creating a new token
|
457
|
+
# token = SPARQL::Grammar::Lexer::Token.new(:LANGTAG, :en)
|
458
|
+
# token.type #=> :LANGTAG
|
459
|
+
# token.value #=> "en"
|
460
|
+
#
|
461
|
+
# @see http://en.wikipedia.org/wiki/Lexical_analysis#Token
|
462
|
+
class Token
|
463
|
+
##
|
464
|
+
# Initializes a new token instance.
|
465
|
+
#
|
466
|
+
# @param [Symbol] type
|
467
|
+
# @param [Object] value
|
468
|
+
# @param [Hash{Symbol => Object}] options
|
469
|
+
# @option options [Integer] :lineno (nil)
|
470
|
+
def initialize(type, value = nil, options = {})
|
471
|
+
@type, @value = (type ? type.to_s.to_sym : nil), value
|
472
|
+
@options = options.dup
|
473
|
+
@lineno = @options.delete(:lineno)
|
474
|
+
end
|
475
|
+
|
476
|
+
##
|
477
|
+
# The token's symbol type.
|
478
|
+
#
|
479
|
+
# @return [Symbol]
|
480
|
+
attr_reader :type
|
481
|
+
|
482
|
+
##
|
483
|
+
# The token's value.
|
484
|
+
#
|
485
|
+
# @return [Object]
|
486
|
+
attr_reader :value
|
487
|
+
|
488
|
+
##
|
489
|
+
# The line number where the token was encountered.
|
490
|
+
#
|
491
|
+
# @return [Integer]
|
492
|
+
attr_reader :lineno
|
493
|
+
|
494
|
+
##
|
495
|
+
# Any additional options for the token.
|
496
|
+
#
|
497
|
+
# @return [Hash]
|
498
|
+
attr_reader :options
|
499
|
+
|
500
|
+
##
|
501
|
+
# Returns the attribute named by `key`.
|
502
|
+
#
|
503
|
+
# @param [Symbol] key
|
504
|
+
# @return [Object]
|
505
|
+
def [](key)
|
506
|
+
key = key.to_s.to_sym unless key.is_a?(Integer) || key.is_a?(Symbol)
|
507
|
+
case key
|
508
|
+
when 0, :type then @type
|
509
|
+
when 1, :value then @value
|
510
|
+
else nil
|
511
|
+
end
|
512
|
+
end
|
513
|
+
|
514
|
+
##
|
515
|
+
# Returns `true` if the given `value` matches either the type or value
|
516
|
+
# of this token.
|
517
|
+
#
|
518
|
+
# @example Matching using the symbolic type
|
519
|
+
# SPARQL::Grammar::Lexer::Token.new(:NIL) === :NIL #=> true
|
520
|
+
#
|
521
|
+
# @example Matching using the string value
|
522
|
+
# SPARQL::Grammar::Lexer::Token.new(nil, "{") === "{" #=> true
|
523
|
+
#
|
524
|
+
# @param [Symbol, String] value
|
525
|
+
# @return [Boolean]
|
526
|
+
def ===(value)
|
527
|
+
case value
|
528
|
+
when Symbol then value == @type
|
529
|
+
when ::String then value.to_s == @value.to_s
|
530
|
+
else value == @value
|
531
|
+
end
|
532
|
+
end
|
533
|
+
|
534
|
+
##
|
535
|
+
# Returns a hash table representation of this token.
|
536
|
+
#
|
537
|
+
# @return [Hash]
|
538
|
+
def to_hash
|
539
|
+
{:type => @type, :value => @value}
|
540
|
+
end
|
541
|
+
|
542
|
+
##
|
543
|
+
# Returns type, if not nil, otherwise value
|
544
|
+
def representation
|
545
|
+
@type ? @type : @value
|
546
|
+
end
|
547
|
+
|
548
|
+
##
|
549
|
+
# Returns an array representation of this token.
|
550
|
+
#
|
551
|
+
# @return [Array]
|
552
|
+
def to_a
|
553
|
+
[@type, @value]
|
554
|
+
end
|
555
|
+
|
556
|
+
##
|
557
|
+
# Returns a developer-friendly representation of this token.
|
558
|
+
#
|
559
|
+
# @return [String]
|
560
|
+
def inspect
|
561
|
+
to_hash.inspect
|
562
|
+
end
|
563
|
+
end # class Token
|
564
|
+
|
565
|
+
##
|
566
|
+
# Raised for errors during lexical analysis.
|
567
|
+
#
|
568
|
+
# @example Raising a lexer error
|
569
|
+
# raise SPARQL::Grammar::Lexer::Error.new(
|
570
|
+
# "invalid token '%' on line 10",
|
571
|
+
# :input => query, :token => '%', :lineno => 9)
|
572
|
+
#
|
573
|
+
# @see http://ruby-doc.org/core/classes/StandardError.html
|
574
|
+
class Error < StandardError
|
575
|
+
##
|
576
|
+
# The input string associated with the error.
|
577
|
+
#
|
578
|
+
# @return [String]
|
579
|
+
attr_reader :input
|
580
|
+
|
581
|
+
##
|
582
|
+
# The invalid token which triggered the error.
|
583
|
+
#
|
584
|
+
# @return [String]
|
585
|
+
attr_reader :token
|
586
|
+
|
587
|
+
##
|
588
|
+
# The line number where the error occurred.
|
589
|
+
#
|
590
|
+
# @return [Integer]
|
591
|
+
attr_reader :lineno
|
592
|
+
|
593
|
+
##
|
594
|
+
# Initializes a new lexer error instance.
|
595
|
+
#
|
596
|
+
# @param [String, #to_s] message
|
597
|
+
# @param [Hash{Symbol => Object}] options
|
598
|
+
# @option options [String] :input (nil)
|
599
|
+
# @option options [String] :token (nil)
|
600
|
+
# @option options [Integer] :lineno (nil)
|
601
|
+
def initialize(message, options = {})
|
602
|
+
@input = options[:input]
|
603
|
+
@token = options[:token]
|
604
|
+
@lineno = options[:lineno]
|
605
|
+
super(message.to_s)
|
606
|
+
end
|
607
|
+
end # class Error
|
608
|
+
end # class Lexer
|
609
|
+
end; end # module SPARQL::Grammar
|