sparql 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/AUTHORS +3 -0
- data/CREDITS +0 -0
- data/README.markdown +103 -53
- data/UNLICENSE +24 -0
- data/VERSION +1 -0
- data/bin/sparql +87 -0
- data/lib/sparql.rb +105 -22
- data/lib/sparql/algebra.rb +369 -0
- data/lib/sparql/algebra/evaluatable.rb +37 -0
- data/lib/sparql/algebra/expression.rb +284 -0
- data/lib/sparql/algebra/extensions.rb +159 -0
- data/lib/sparql/algebra/operator.rb +492 -0
- data/lib/sparql/algebra/operator/add.rb +34 -0
- data/lib/sparql/algebra/operator/and.rb +65 -0
- data/lib/sparql/algebra/operator/asc.rb +29 -0
- data/lib/sparql/algebra/operator/ask.rb +46 -0
- data/lib/sparql/algebra/operator/base.rb +46 -0
- data/lib/sparql/algebra/operator/bgp.rb +26 -0
- data/lib/sparql/algebra/operator/bound.rb +48 -0
- data/lib/sparql/algebra/operator/compare.rb +84 -0
- data/lib/sparql/algebra/operator/construct.rb +85 -0
- data/lib/sparql/algebra/operator/dataset.rb +77 -0
- data/lib/sparql/algebra/operator/datatype.rb +42 -0
- data/lib/sparql/algebra/operator/desc.rb +17 -0
- data/lib/sparql/algebra/operator/describe.rb +71 -0
- data/lib/sparql/algebra/operator/distinct.rb +50 -0
- data/lib/sparql/algebra/operator/divide.rb +43 -0
- data/lib/sparql/algebra/operator/equal.rb +32 -0
- data/lib/sparql/algebra/operator/exprlist.rb +52 -0
- data/lib/sparql/algebra/operator/filter.rb +71 -0
- data/lib/sparql/algebra/operator/graph.rb +28 -0
- data/lib/sparql/algebra/operator/greater_than.rb +32 -0
- data/lib/sparql/algebra/operator/greater_than_or_equal.rb +33 -0
- data/lib/sparql/algebra/operator/is_blank.rb +35 -0
- data/lib/sparql/algebra/operator/is_iri.rb +37 -0
- data/lib/sparql/algebra/operator/is_literal.rb +36 -0
- data/lib/sparql/algebra/operator/join.rb +67 -0
- data/lib/sparql/algebra/operator/lang.rb +29 -0
- data/lib/sparql/algebra/operator/lang_matches.rb +53 -0
- data/lib/sparql/algebra/operator/left_join.rb +95 -0
- data/lib/sparql/algebra/operator/less_than.rb +32 -0
- data/lib/sparql/algebra/operator/less_than_or_equal.rb +32 -0
- data/lib/sparql/algebra/operator/minus.rb +31 -0
- data/lib/sparql/algebra/operator/multiply.rb +34 -0
- data/lib/sparql/algebra/operator/not.rb +35 -0
- data/lib/sparql/algebra/operator/not_equal.rb +26 -0
- data/lib/sparql/algebra/operator/or.rb +65 -0
- data/lib/sparql/algebra/operator/order.rb +69 -0
- data/lib/sparql/algebra/operator/plus.rb +31 -0
- data/lib/sparql/algebra/operator/prefix.rb +45 -0
- data/lib/sparql/algebra/operator/project.rb +46 -0
- data/lib/sparql/algebra/operator/reduced.rb +47 -0
- data/lib/sparql/algebra/operator/regex.rb +70 -0
- data/lib/sparql/algebra/operator/same_term.rb +46 -0
- data/lib/sparql/algebra/operator/slice.rb +60 -0
- data/lib/sparql/algebra/operator/str.rb +35 -0
- data/lib/sparql/algebra/operator/subtract.rb +32 -0
- data/lib/sparql/algebra/operator/union.rb +55 -0
- data/lib/sparql/algebra/query.rb +99 -0
- data/lib/sparql/algebra/sxp_extensions.rb +35 -0
- data/lib/sparql/algebra/version.rb +20 -0
- data/lib/sparql/extensions.rb +102 -0
- data/lib/sparql/grammar.rb +298 -0
- data/lib/sparql/grammar/lexer.rb +609 -0
- data/lib/sparql/grammar/parser.rb +1383 -0
- data/lib/sparql/grammar/parser/meta.rb +1801 -0
- data/lib/sparql/results.rb +220 -0
- data/lib/sparql/version.rb +20 -0
- metadata +232 -62
- data/Rakefile +0 -22
- data/coverage/index.html +0 -252
- data/coverage/lib-sparql-execute_sparql_rb.html +0 -621
- data/coverage/lib-sparql_rb.html +0 -622
- data/lib/sparql/execute_sparql.rb +0 -27
- data/lib/sparql/sparql.treetop +0 -159
- data/sparql.gemspec +0 -16
- data/spec/spec.opts +0 -2
- data/spec/spec_helper.rb +0 -24
- data/spec/unit/graph_parsing_spec.rb +0 -76
- data/spec/unit/iri_parsing_spec.rb +0 -46
- data/spec/unit/prefixed_names_parsing_spec.rb +0 -40
- data/spec/unit/primitives_parsing_spec.rb +0 -26
- data/spec/unit/sparql_parsing_spec.rb +0 -72
- data/spec/unit/variables_parsing_spec.rb +0 -36
@@ -0,0 +1,609 @@
|
|
1
|
+
require 'strscan' unless defined?(StringScanner)
|
2
|
+
require 'bigdecimal' unless defined?(BigDecimal)
|
3
|
+
|
4
|
+
module SPARQL; module Grammar
|
5
|
+
##
|
6
|
+
# A lexical analyzer for the SPARQL 1.0 grammar.
|
7
|
+
#
|
8
|
+
# Note that productions [80]-[85] have been incorporated directly into
|
9
|
+
# [77], [78], [79].
|
10
|
+
#
|
11
|
+
# @example Tokenizing a SPARQL query string
|
12
|
+
# query = "SELECT * WHERE { ?s ?p ?o }"
|
13
|
+
# lexer = SPARQL::Grammar::Lexer.tokenize(query)
|
14
|
+
# lexer.each_token do |token|
|
15
|
+
# puts token.inspect
|
16
|
+
# end
|
17
|
+
#
|
18
|
+
# @example Handling error conditions
|
19
|
+
# begin
|
20
|
+
# SPARQL::Grammar::Lexer.tokenize(query)
|
21
|
+
# rescue SPARQL::Grammar::Lexer::Error => error
|
22
|
+
# warn error.inspect
|
23
|
+
# end
|
24
|
+
#
|
25
|
+
# @see http://www.w3.org/TR/rdf-sparql-query/#grammar
|
26
|
+
# @see http://en.wikipedia.org/wiki/Lexical_analysis
|
27
|
+
class Lexer
|
28
|
+
include Enumerable
|
29
|
+
|
30
|
+
ESCAPE_CHARS = {
|
31
|
+
'\t' => "\t", # \u0009 (tab)
|
32
|
+
'\n' => "\n", # \u000A (line feed)
|
33
|
+
'\r' => "\r", # \u000D (carriage return)
|
34
|
+
'\b' => "\b", # \u0008 (backspace)
|
35
|
+
'\f' => "\f", # \u000C (form feed)
|
36
|
+
'\\"' => '"', # \u0022 (quotation mark, double quote mark)
|
37
|
+
'\\\'' => '\'', # \u0027 (apostrophe-quote, single quote mark)
|
38
|
+
'\\\\' => '\\' # \u005C (backslash)
|
39
|
+
}
|
40
|
+
ESCAPE_CHAR4 = /\\u([0-9A-Fa-f]{4,4})/ # \uXXXX
|
41
|
+
ESCAPE_CHAR8 = /\\U([0-9A-Fa-f]{8,8})/ # \UXXXXXXXX
|
42
|
+
ESCAPE_CHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}/
|
43
|
+
|
44
|
+
##
|
45
|
+
# Unicode regular expressions for Ruby 1.9+ with the Oniguruma engine.
|
46
|
+
module Unicode
|
47
|
+
if RUBY_VERSION >= '1.9'
|
48
|
+
U_CHARS1 = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
|
49
|
+
[\\u00C0-\\u00D6]|[\\u00D8-\\u00F6]|[\\u00F8-\\u02FF]|
|
50
|
+
[\\u0370-\\u037D]|[\\u037F-\\u1FFF]|[\\u200C-\\u200D]|
|
51
|
+
[\\u2070-\\u218F]|[\\u2C00-\\u2FEF]|[\\u3001-\\uD7FF]|
|
52
|
+
[\\uF900-\\uFDCF]|[\\uFDF0-\\uFFFD]|[\\u{10000}-\\u{EFFFF}]
|
53
|
+
EOS
|
54
|
+
U_CHARS2 = Regexp.compile("\\u00B7|[\\u0300-\\u036F]|[\\u203F-\\u2040]")
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
##
|
59
|
+
# UTF-8 regular expressions for Ruby 1.8.x.
|
60
|
+
module UTF_8
|
61
|
+
if RUBY_VERSION < '1.9'
|
62
|
+
U_CHARS1 = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
|
63
|
+
\\xC3[\\x80-\\x96]| (?# [\\u00C0-\\u00D6]|)
|
64
|
+
\\xC3[\\x98-\\xB6]| (?# [\\u00D8-\\u00F6]|)
|
65
|
+
\\xC3[\\xB8-\\xBF]|[\\xC4-\\xCB][\\x80-\\xBF]| (?# [\\u00F8-\\u02FF]|)
|
66
|
+
\\xCD[\\xB0-\\xBD]| (?# [\\u0370-\\u037D]|)
|
67
|
+
\\xCD\\xBF|[\\xCE-\\xDF][\\x80-\\xBF]| (?# [\\u037F-\\u1FFF]|)
|
68
|
+
\\xE0[\\xA0-\\xBF][\\x80-\\xBF]| (?# ...)
|
69
|
+
\\xE1[\\x80-\\xBF][\\x80-\\xBF]| (?# ...)
|
70
|
+
\\xE2\\x80[\\x8C-\\x8D]| (?# [\\u200C-\\u200D]|)
|
71
|
+
\\xE2\\x81[\\xB0-\\xBF]| (?# [\\u2070-\\u218F]|)
|
72
|
+
\\xE2[\\x82-\\x85][\\x80-\\xBF]| (?# ...)
|
73
|
+
\\xE2\\x86[\\x80-\\x8F]| (?# ...)
|
74
|
+
\\xE2[\\xB0-\\xBE][\\x80-\\xBF]| (?# [\\u2C00-\\u2FEF]|)
|
75
|
+
\\xE2\\xBF[\\x80-\\xAF]| (?# ...)
|
76
|
+
\\xE3\\x80[\\x81-\\xBF]| (?# [\\u3001-\\uD7FF]|)
|
77
|
+
\\xE3[\\x81-\\xBF][\\x80-\\xBF]| (?# ...)
|
78
|
+
[\\xE4-\\xEC][\\x80-\\xBF][\\x80-\\xBF]| (?# ...)
|
79
|
+
\\xED[\\x80-\\x9F][\\x80-\\xBF]| (?# ...)
|
80
|
+
\\xEF[\\xA4-\\xB6][\\x80-\\xBF]| (?# [\\uF900-\\uFDCF]|)
|
81
|
+
\\xEF\\xB7[\\x80-\\x8F]| (?# ...)
|
82
|
+
\\xEF\\xB7[\\xB0-\\xBF]| (?# [\\uFDF0-\\uFFFD]|)
|
83
|
+
\\xEF[\\xB8-\\xBE][\\x80-\\xBF]| (?# ...)
|
84
|
+
\\xEF\\xBF[\\x80-\\xBD]| (?# ...)
|
85
|
+
\\xF0[\\x90-\\xBF][\\x80-\\xBF][\\x80-\\xBF]| (?# [\\u{10000}-\\u{EFFFF}])
|
86
|
+
[\\xF1-\\xF2][\\x80-\\xBF][\\x80-\\xBF][\\x80-\\xBF]|
|
87
|
+
\\xF3[\\x80-\\xAF][\\x80-\\xBF][\\x80-\\xBF] (?# ...)
|
88
|
+
EOS
|
89
|
+
U_CHARS2 = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
|
90
|
+
\\xC2\\xB7| (?# \\u00B7|)
|
91
|
+
\\xCC[\\x80-\\xBF]|\\xCD[\\x80-\\xAF]| (?# [\\u0300-\\u036F]|)
|
92
|
+
\\xE2\\x80\\xBF|\\xE2\\x81\\x80 (?# [\\u203F-\\u2040])
|
93
|
+
EOS
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
include RUBY_VERSION >= '1.9' ? Unicode : UTF_8
|
98
|
+
|
99
|
+
KEYWORD = /#{KEYWORDS.join('|')}|#{FUNCTIONS.join('|')}/i
|
100
|
+
DELIMITER = /\^\^|[{}()\[\],;\.]/
|
101
|
+
OPERATOR = /a|\|\||&&|!=|<=|>=|[!=<>+\-*\/]/
|
102
|
+
COMMENT = /#.*/
|
103
|
+
|
104
|
+
PN_CHARS_BASE = /[A-Z]|[a-z]|#{U_CHARS1}/ # [95]
|
105
|
+
PN_CHARS_U = /_|#{PN_CHARS_BASE}/ # [96]
|
106
|
+
VARNAME = /(?:[0-9]|#{PN_CHARS_U})
|
107
|
+
(?:[0-9]|#{PN_CHARS_U}|#{U_CHARS2})*/x # [97]
|
108
|
+
PN_CHARS = /-|[0-9]|#{PN_CHARS_U}|#{U_CHARS2}/ # [98]
|
109
|
+
PN_CHARS_BODY = /(?:(?:\.|#{PN_CHARS})*#{PN_CHARS})?/
|
110
|
+
PN_PREFIX = /#{PN_CHARS_BASE}#{PN_CHARS_BODY}/ # [99]
|
111
|
+
PN_LOCAL = /(?:[0-9]|#{PN_CHARS_U})#{PN_CHARS_BODY}/ # [100]
|
112
|
+
|
113
|
+
IRI_REF = /<([^<>"{}|^`\\\x00-\x20]*)>/ # [70]
|
114
|
+
PNAME_NS = /(#{PN_PREFIX}?):/ # [71]
|
115
|
+
PNAME_LN = /#{PNAME_NS}(#{PN_LOCAL})/ # [72]
|
116
|
+
BLANK_NODE_LABEL = /_:(#{PN_LOCAL})/ # [73]
|
117
|
+
VAR1 = /\?(#{VARNAME})/ # [74]
|
118
|
+
VAR2 = /\$(#{VARNAME})/ # [75]
|
119
|
+
LANGTAG = /@([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)/ # [76]
|
120
|
+
INTEGER = /[0-9]+/ # [77]
|
121
|
+
DECIMAL = /(?:[0-9]+\.[0-9]*|\.[0-9]+)/ # [78]
|
122
|
+
EXPONENT = /[eE][+-]?[0-9]+/ # [86]
|
123
|
+
DOUBLE = /(?:[0-9]+\.[0-9]*|\.[0-9]+|[0-9]+)#{EXPONENT}/ # [79]
|
124
|
+
ECHAR = /\\[tbnrf\\"']/ # [91]
|
125
|
+
STRING_LITERAL1 = /'((?:[^\x27\x5C\x0A\x0D]|#{ECHAR})*)'/ # [87]
|
126
|
+
STRING_LITERAL2 = /"((?:[^\x22\x5C\x0A\x0D]|#{ECHAR})*)"/ # [88]
|
127
|
+
STRING_LITERAL_LONG1 = /'''((?:(?:'|'')?(?:[^'\\]|#{ECHAR})+)*)'''/m # [89]
|
128
|
+
STRING_LITERAL_LONG2 = /"""((?:(?:"|"")?(?:[^"\\]|#{ECHAR})+)*)"""/m # [90]
|
129
|
+
WS = /\x20|\x09|\x0D|\x0A/ # [93]
|
130
|
+
NIL = /\(#{WS}*\)/ # [92]
|
131
|
+
ANON = /\[#{WS}*\]/ # [94]
|
132
|
+
|
133
|
+
BooleanLiteral = /true|false/ # [65]
|
134
|
+
String = /#{STRING_LITERAL_LONG1}|#{STRING_LITERAL_LONG2}|
|
135
|
+
#{STRING_LITERAL1}|#{STRING_LITERAL2}/x # [66]
|
136
|
+
|
137
|
+
# Make all defined regular expression constants immutable:
|
138
|
+
constants.each { |name| const_get(name).freeze }
|
139
|
+
|
140
|
+
##
|
141
|
+
# Returns a copy of the given `input` string with all `\uXXXX` and
|
142
|
+
# `\UXXXXXXXX` Unicode codepoint escape sequences replaced with their
|
143
|
+
# unescaped UTF-8 character counterparts.
|
144
|
+
#
|
145
|
+
# @param [String] input
|
146
|
+
# @return [String]
|
147
|
+
# @see http://www.w3.org/TR/rdf-sparql-query/#codepointEscape
|
148
|
+
def self.unescape_codepoints(input)
|
149
|
+
string = input.dup
|
150
|
+
string.force_encoding(Encoding::ASCII_8BIT) if string.respond_to?(:force_encoding) # Ruby 1.9+
|
151
|
+
|
152
|
+
# Decode \uXXXX and \UXXXXXXXX code points:
|
153
|
+
string.gsub!(ESCAPE_CHAR) do
|
154
|
+
s = [($1 || $2).hex].pack('U*')
|
155
|
+
s.respond_to?(:force_encoding) ? s.force_encoding(Encoding::ASCII_8BIT) : s
|
156
|
+
end
|
157
|
+
|
158
|
+
string.force_encoding(Encoding::UTF_8) if string.respond_to?(:force_encoding) # Ruby 1.9+
|
159
|
+
string
|
160
|
+
end
|
161
|
+
|
162
|
+
##
|
163
|
+
# Returns a copy of the given `input` string with all string escape
|
164
|
+
# sequences (e.g. `\n` and `\t`) replaced with their unescaped UTF-8
|
165
|
+
# character counterparts.
|
166
|
+
#
|
167
|
+
# @param [String] input
|
168
|
+
# @return [String]
|
169
|
+
# @see http://www.w3.org/TR/rdf-sparql-query/#grammarEscapes
|
170
|
+
def self.unescape_string(input)
|
171
|
+
input.gsub(ECHAR) { |escaped| ESCAPE_CHARS[escaped] }
|
172
|
+
end
|
173
|
+
|
174
|
+
##
|
175
|
+
# Tokenizes the given `input` string or stream.
|
176
|
+
#
|
177
|
+
# @param [String, #to_s] input
|
178
|
+
# @param [Hash{Symbol => Object}] options
|
179
|
+
# @yield [lexer]
|
180
|
+
# @yieldparam [Lexer] lexer
|
181
|
+
# @return [Lexer]
|
182
|
+
# @raise [Lexer::Error] on invalid input
|
183
|
+
def self.tokenize(input, options = {}, &block)
|
184
|
+
lexer = self.new(input, options)
|
185
|
+
block_given? ? block.call(lexer) : lexer
|
186
|
+
end
|
187
|
+
|
188
|
+
##
|
189
|
+
# Initializes a new lexer instance.
|
190
|
+
#
|
191
|
+
# @param [String, #to_s] input
|
192
|
+
# @param [Hash{Symbol => Object}] options
|
193
|
+
def initialize(input = nil, options = {})
|
194
|
+
@options = options.dup
|
195
|
+
self.input = input if input
|
196
|
+
end
|
197
|
+
|
198
|
+
##
|
199
|
+
# Any additional options for the lexer.
|
200
|
+
#
|
201
|
+
# @return [Hash]
|
202
|
+
attr_reader :options
|
203
|
+
|
204
|
+
##
|
205
|
+
# The current input string being processed.
|
206
|
+
#
|
207
|
+
# @return [String]
|
208
|
+
attr_accessor :input
|
209
|
+
|
210
|
+
##
|
211
|
+
# The current line number (zero-based).
|
212
|
+
#
|
213
|
+
# @return [Integer]
|
214
|
+
attr_reader :lineno
|
215
|
+
|
216
|
+
##
|
217
|
+
# @param [String, #to_s] input
|
218
|
+
# @return [void]
|
219
|
+
def input=(input)
|
220
|
+
@input = case input
|
221
|
+
when ::String then input
|
222
|
+
when IO, StringIO then input.read
|
223
|
+
else input.to_s
|
224
|
+
end
|
225
|
+
@input = @input.dup
|
226
|
+
@input.force_encoding(Encoding::UTF_8) if @input.respond_to?(:force_encoding) # Ruby 1.9+
|
227
|
+
@input = self.class.unescape_codepoints(@input)
|
228
|
+
@lineno = 0
|
229
|
+
end
|
230
|
+
|
231
|
+
##
|
232
|
+
# Returns `true` if the input string is lexically valid.
|
233
|
+
#
|
234
|
+
# To be considered valid, the input string must contain more than zero
|
235
|
+
# tokens, and must not contain any invalid tokens.
|
236
|
+
#
|
237
|
+
# @return [Boolean]
|
238
|
+
def valid?
|
239
|
+
begin
|
240
|
+
!count.zero?
|
241
|
+
rescue Error
|
242
|
+
false
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
##
|
247
|
+
# Enumerates each token in the input string.
|
248
|
+
#
|
249
|
+
# @yield [token]
|
250
|
+
# @yieldparam [Token] token
|
251
|
+
# @return [Enumerator]
|
252
|
+
def each_token(&block)
|
253
|
+
if block_given?
|
254
|
+
@lineno = 0
|
255
|
+
@scanner = StringScanner.new(@input)
|
256
|
+
until scanner.eos?
|
257
|
+
case
|
258
|
+
when skip_whitespace
|
259
|
+
when skip_comment
|
260
|
+
when token = match_token
|
261
|
+
yield token
|
262
|
+
else
|
263
|
+
lexeme = (@scanner.rest.split(/#{WS}|#{COMMENT}/).first rescue nil) || @scanner.rest
|
264
|
+
raise Error.new("invalid token #{lexeme.inspect} on line #{lineno + 1}",
|
265
|
+
:input => input, :token => lexeme, :lineno => lineno)
|
266
|
+
end
|
267
|
+
end
|
268
|
+
@scanner = nil
|
269
|
+
end
|
270
|
+
enum_for(:each_token)
|
271
|
+
end
|
272
|
+
alias_method :each, :each_token
|
273
|
+
|
274
|
+
protected
|
275
|
+
|
276
|
+
# @return [StringScanner]
|
277
|
+
attr_reader :scanner
|
278
|
+
|
279
|
+
# @see http://www.w3.org/TR/rdf-sparql-query/#whitespace
|
280
|
+
def skip_whitespace
|
281
|
+
# skip all white space, but keep track of the current line number
|
282
|
+
if matched = scanner.scan(WS)
|
283
|
+
@lineno += matched.count("\n")
|
284
|
+
matched
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
# @see http://www.w3.org/TR/rdf-sparql-query/#grammarComments
|
289
|
+
def skip_comment
|
290
|
+
# skip the remainder of the current line
|
291
|
+
skipped = scanner.skip(COMMENT)
|
292
|
+
end
|
293
|
+
|
294
|
+
def match_token
|
295
|
+
match_var1 ||
|
296
|
+
match_var2 ||
|
297
|
+
match_iri_ref ||
|
298
|
+
match_pname_ln ||
|
299
|
+
match_pname_ns ||
|
300
|
+
match_string_long_1 ||
|
301
|
+
match_string_long_2 ||
|
302
|
+
match_string_1 ||
|
303
|
+
match_string_2 ||
|
304
|
+
match_langtag ||
|
305
|
+
match_double ||
|
306
|
+
match_decimal ||
|
307
|
+
match_integer ||
|
308
|
+
match_boolean_literal ||
|
309
|
+
match_blank_node_label||
|
310
|
+
match_nil ||
|
311
|
+
match_anon ||
|
312
|
+
match_keyword ||
|
313
|
+
match_delimiter ||
|
314
|
+
match_operator
|
315
|
+
end
|
316
|
+
|
317
|
+
def match_var1
|
318
|
+
if matched = scanner.scan(VAR1)
|
319
|
+
token(:VAR1, scanner[1].to_s)
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
def match_var2
|
324
|
+
if matched = scanner.scan(VAR2)
|
325
|
+
token(:VAR2, scanner[1].to_s)
|
326
|
+
end
|
327
|
+
end
|
328
|
+
|
329
|
+
def match_iri_ref
|
330
|
+
if matched = scanner.scan(IRI_REF)
|
331
|
+
token(:IRI_REF, scanner[1].to_s)
|
332
|
+
end
|
333
|
+
end
|
334
|
+
|
335
|
+
def match_pname_ln
|
336
|
+
if matched = scanner.scan(PNAME_LN)
|
337
|
+
token(:PNAME_LN, [scanner[1].empty? ? nil : scanner[1].to_s, scanner[2].to_s])
|
338
|
+
end
|
339
|
+
end
|
340
|
+
|
341
|
+
def match_pname_ns
|
342
|
+
if matched = scanner.scan(PNAME_NS)
|
343
|
+
token(:PNAME_NS, scanner[1].empty? ? nil : scanner[1].to_s)
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
def match_string_long_1
|
348
|
+
if matched = scanner.scan(STRING_LITERAL_LONG1)
|
349
|
+
token(:STRING_LITERAL_LONG1, self.class.unescape_string(scanner[1]))
|
350
|
+
end
|
351
|
+
end
|
352
|
+
|
353
|
+
def match_string_long_2
|
354
|
+
if matched = scanner.scan(STRING_LITERAL_LONG2)
|
355
|
+
token(:STRING_LITERAL_LONG2, self.class.unescape_string(scanner[1]))
|
356
|
+
end
|
357
|
+
end
|
358
|
+
|
359
|
+
def match_string_1
|
360
|
+
if matched = scanner.scan(STRING_LITERAL1)
|
361
|
+
token(:STRING_LITERAL1, self.class.unescape_string(scanner[1]))
|
362
|
+
end
|
363
|
+
end
|
364
|
+
|
365
|
+
def match_string_2
|
366
|
+
if matched = scanner.scan(STRING_LITERAL2)
|
367
|
+
token(:STRING_LITERAL2, self.class.unescape_string(scanner[1]))
|
368
|
+
end
|
369
|
+
end
|
370
|
+
|
371
|
+
def match_langtag
|
372
|
+
if matched = scanner.scan(LANGTAG)
|
373
|
+
token(:LANGTAG, scanner[1].to_s)
|
374
|
+
end
|
375
|
+
end
|
376
|
+
|
377
|
+
def match_double
|
378
|
+
if matched = scanner.scan(DOUBLE)
|
379
|
+
token(:DOUBLE, matched)
|
380
|
+
end
|
381
|
+
end
|
382
|
+
|
383
|
+
def match_decimal
|
384
|
+
if matched = scanner.scan(DECIMAL)
|
385
|
+
token(:DECIMAL, matched)
|
386
|
+
end
|
387
|
+
end
|
388
|
+
|
389
|
+
def match_integer
|
390
|
+
if matched = scanner.scan(INTEGER)
|
391
|
+
token(:INTEGER, matched)
|
392
|
+
end
|
393
|
+
end
|
394
|
+
|
395
|
+
def match_boolean_literal
|
396
|
+
if matched = scanner.scan(BooleanLiteral)
|
397
|
+
token(:BooleanLiteral, matched)
|
398
|
+
end
|
399
|
+
end
|
400
|
+
|
401
|
+
def match_blank_node_label
|
402
|
+
if matched = scanner.scan(BLANK_NODE_LABEL)
|
403
|
+
token(:BLANK_NODE_LABEL, scanner[1].to_s)
|
404
|
+
end
|
405
|
+
end
|
406
|
+
|
407
|
+
def match_nil
|
408
|
+
if matched = scanner.scan(NIL)
|
409
|
+
token(:NIL)
|
410
|
+
end
|
411
|
+
end
|
412
|
+
|
413
|
+
def match_anon
|
414
|
+
if matched = scanner.scan(ANON)
|
415
|
+
token(:ANON)
|
416
|
+
end
|
417
|
+
end
|
418
|
+
|
419
|
+
def match_keyword
|
420
|
+
if matched = scanner.scan(KEYWORD)
|
421
|
+
token(nil, matched.upcase.to_s)
|
422
|
+
end
|
423
|
+
end
|
424
|
+
|
425
|
+
def match_delimiter
|
426
|
+
if matched = scanner.scan(DELIMITER)
|
427
|
+
token(nil, matched.to_s)
|
428
|
+
end
|
429
|
+
end
|
430
|
+
|
431
|
+
def match_operator
|
432
|
+
if matched = scanner.scan(OPERATOR)
|
433
|
+
token(nil, matched.to_s)
|
434
|
+
end
|
435
|
+
end
|
436
|
+
|
437
|
+
protected
|
438
|
+
|
439
|
+
##
|
440
|
+
# Constructs a new token object annotated with the current line number.
|
441
|
+
#
|
442
|
+
# The parser relies on the type being a symbolized URI and the value being
|
443
|
+
# a string, if there is no type. If there is a type, then the value takes
|
444
|
+
# on the native representation appropriate for that type.
|
445
|
+
#
|
446
|
+
# @param [Symbol] type
|
447
|
+
# @param [Object] value
|
448
|
+
# @return [Token]
|
449
|
+
def token(type, value = nil)
|
450
|
+
Token.new(type, value, :lineno => lineno)
|
451
|
+
end
|
452
|
+
|
453
|
+
##
|
454
|
+
# Represents a lexer token.
|
455
|
+
#
|
456
|
+
# @example Creating a new token
|
457
|
+
# token = SPARQL::Grammar::Lexer::Token.new(:LANGTAG, :en)
|
458
|
+
# token.type #=> :LANGTAG
|
459
|
+
# token.value #=> "en"
|
460
|
+
#
|
461
|
+
# @see http://en.wikipedia.org/wiki/Lexical_analysis#Token
|
462
|
+
class Token
|
463
|
+
##
|
464
|
+
# Initializes a new token instance.
|
465
|
+
#
|
466
|
+
# @param [Symbol] type
|
467
|
+
# @param [Object] value
|
468
|
+
# @param [Hash{Symbol => Object}] options
|
469
|
+
# @option options [Integer] :lineno (nil)
|
470
|
+
def initialize(type, value = nil, options = {})
|
471
|
+
@type, @value = (type ? type.to_s.to_sym : nil), value
|
472
|
+
@options = options.dup
|
473
|
+
@lineno = @options.delete(:lineno)
|
474
|
+
end
|
475
|
+
|
476
|
+
##
|
477
|
+
# The token's symbol type.
|
478
|
+
#
|
479
|
+
# @return [Symbol]
|
480
|
+
attr_reader :type
|
481
|
+
|
482
|
+
##
|
483
|
+
# The token's value.
|
484
|
+
#
|
485
|
+
# @return [Object]
|
486
|
+
attr_reader :value
|
487
|
+
|
488
|
+
##
|
489
|
+
# The line number where the token was encountered.
|
490
|
+
#
|
491
|
+
# @return [Integer]
|
492
|
+
attr_reader :lineno
|
493
|
+
|
494
|
+
##
|
495
|
+
# Any additional options for the token.
|
496
|
+
#
|
497
|
+
# @return [Hash]
|
498
|
+
attr_reader :options
|
499
|
+
|
500
|
+
##
|
501
|
+
# Returns the attribute named by `key`.
|
502
|
+
#
|
503
|
+
# @param [Symbol] key
|
504
|
+
# @return [Object]
|
505
|
+
def [](key)
|
506
|
+
key = key.to_s.to_sym unless key.is_a?(Integer) || key.is_a?(Symbol)
|
507
|
+
case key
|
508
|
+
when 0, :type then @type
|
509
|
+
when 1, :value then @value
|
510
|
+
else nil
|
511
|
+
end
|
512
|
+
end
|
513
|
+
|
514
|
+
##
|
515
|
+
# Returns `true` if the given `value` matches either the type or value
|
516
|
+
# of this token.
|
517
|
+
#
|
518
|
+
# @example Matching using the symbolic type
|
519
|
+
# SPARQL::Grammar::Lexer::Token.new(:NIL) === :NIL #=> true
|
520
|
+
#
|
521
|
+
# @example Matching using the string value
|
522
|
+
# SPARQL::Grammar::Lexer::Token.new(nil, "{") === "{" #=> true
|
523
|
+
#
|
524
|
+
# @param [Symbol, String] value
|
525
|
+
# @return [Boolean]
|
526
|
+
def ===(value)
|
527
|
+
case value
|
528
|
+
when Symbol then value == @type
|
529
|
+
when ::String then value.to_s == @value.to_s
|
530
|
+
else value == @value
|
531
|
+
end
|
532
|
+
end
|
533
|
+
|
534
|
+
##
|
535
|
+
# Returns a hash table representation of this token.
|
536
|
+
#
|
537
|
+
# @return [Hash]
|
538
|
+
def to_hash
|
539
|
+
{:type => @type, :value => @value}
|
540
|
+
end
|
541
|
+
|
542
|
+
##
|
543
|
+
# Returns type, if not nil, otherwise value
|
544
|
+
def representation
|
545
|
+
@type ? @type : @value
|
546
|
+
end
|
547
|
+
|
548
|
+
##
|
549
|
+
# Returns an array representation of this token.
|
550
|
+
#
|
551
|
+
# @return [Array]
|
552
|
+
def to_a
|
553
|
+
[@type, @value]
|
554
|
+
end
|
555
|
+
|
556
|
+
##
|
557
|
+
# Returns a developer-friendly representation of this token.
|
558
|
+
#
|
559
|
+
# @return [String]
|
560
|
+
def inspect
|
561
|
+
to_hash.inspect
|
562
|
+
end
|
563
|
+
end # class Token
|
564
|
+
|
565
|
+
##
|
566
|
+
# Raised for errors during lexical analysis.
|
567
|
+
#
|
568
|
+
# @example Raising a lexer error
|
569
|
+
# raise SPARQL::Grammar::Lexer::Error.new(
|
570
|
+
# "invalid token '%' on line 10",
|
571
|
+
# :input => query, :token => '%', :lineno => 9)
|
572
|
+
#
|
573
|
+
# @see http://ruby-doc.org/core/classes/StandardError.html
|
574
|
+
class Error < StandardError
|
575
|
+
##
|
576
|
+
# The input string associated with the error.
|
577
|
+
#
|
578
|
+
# @return [String]
|
579
|
+
attr_reader :input
|
580
|
+
|
581
|
+
##
|
582
|
+
# The invalid token which triggered the error.
|
583
|
+
#
|
584
|
+
# @return [String]
|
585
|
+
attr_reader :token
|
586
|
+
|
587
|
+
##
|
588
|
+
# The line number where the error occurred.
|
589
|
+
#
|
590
|
+
# @return [Integer]
|
591
|
+
attr_reader :lineno
|
592
|
+
|
593
|
+
##
|
594
|
+
# Initializes a new lexer error instance.
|
595
|
+
#
|
596
|
+
# @param [String, #to_s] message
|
597
|
+
# @param [Hash{Symbol => Object}] options
|
598
|
+
# @option options [String] :input (nil)
|
599
|
+
# @option options [String] :token (nil)
|
600
|
+
# @option options [Integer] :lineno (nil)
|
601
|
+
def initialize(message, options = {})
|
602
|
+
@input = options[:input]
|
603
|
+
@token = options[:token]
|
604
|
+
@lineno = options[:lineno]
|
605
|
+
super(message.to_s)
|
606
|
+
end
|
607
|
+
end # class Error
|
608
|
+
end # class Lexer
|
609
|
+
end; end # module SPARQL::Grammar
|