rdf 3.1.1 → 3.1.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
+
3
+ require 'strscan'
4
+
2
5
  module RDF::NTriples
3
6
  ##
4
7
  # N-Triples parser.
@@ -25,6 +28,10 @@ module RDF::NTriples
25
28
  # end
26
29
  # end
27
30
  #
31
+ # ** RDFStar (RDF*)
32
+ #
33
+ # Supports statements as resources using `<<s p o>>`.
34
+ #
28
35
  # @see http://www.w3.org/TR/rdf-testcases/#ntriples
29
36
  # @see http://www.w3.org/TR/n-triples/
30
37
  class Reader < RDF::Reader
@@ -70,6 +77,10 @@ module RDF::NTriples
70
77
  # 22
71
78
  STRING_LITERAL_QUOTE = /"((?:[^\"\\\n\r]|#{ECHAR}|#{UCHAR})*)"/.freeze
72
79
 
80
+ # RDF*
81
+ ST_START = /^<</.freeze
82
+ ST_END = /^\s*>>/.freeze
83
+
73
84
  # @see http://www.w3.org/TR/rdf-testcases/#ntrip_grammar
74
85
  COMMENT = /^#\s*(.*)$/.freeze
75
86
  NODEID = /^#{BLANK_NODE_LABEL}/.freeze
@@ -202,7 +213,7 @@ module RDF::NTriples
202
213
  begin
203
214
  read_statement
204
215
  rescue RDF::ReaderError
205
- value = read_uriref || read_node || read_literal
216
+ value = read_uriref || read_node || read_literal || read_rdfstar
206
217
  log_recover
207
218
  value
208
219
  end
@@ -218,9 +229,9 @@ module RDF::NTriples
218
229
 
219
230
  begin
220
231
  unless blank? || read_comment
221
- subject = read_uriref || read_node || fail_subject
232
+ subject = read_uriref || read_node || read_rdfstar || fail_subject
222
233
  predicate = read_uriref(intern: true) || fail_predicate
223
- object = read_uriref || read_node || read_literal || fail_object
234
+ object = read_uriref || read_node || read_literal || read_rdfstar || fail_object
224
235
 
225
236
  if validate? && !read_eos
226
237
  log_error("Expected end of statement (found: #{current_line.inspect})", lineno: lineno, exception: RDF::ReaderError)
@@ -234,6 +245,20 @@ module RDF::NTriples
234
245
  end
235
246
  end
236
247
 
248
+ ##
249
+ # @return [RDF::Statement]
250
+ def read_rdfstar
251
+ if @options[:rdfstar] && match(ST_START)
252
+ subject = read_uriref || read_node || read_rdfstar || fail_subject
253
+ predicate = read_uriref(intern: true) || fail_predicate
254
+ object = read_uriref || read_node || read_literal || read_rdfstar || fail_object
255
+ if !match(ST_END)
256
+ log_error("Expected end of statement (found: #{current_line.inspect})", lineno: lineno, exception: RDF::ReaderError)
257
+ end
258
+ RDF::Statement.new(subject, predicate, object)
259
+ end
260
+ end
261
+
237
262
  ##
238
263
  # @return [Boolean]
239
264
  # @see http://www.w3.org/TR/rdf-testcases/#ntrip_grammar (comment)
@@ -221,6 +221,15 @@ module RDF::NTriples
221
221
  format_triple(*statement.to_triple, **options)
222
222
  end
223
223
 
224
+ ##
225
+ # Returns the N-Triples representation of an RDF* reified statement.
226
+ #
227
+ # @param [RDF::Statement] statement
228
+ # @param [Hash{Symbol => Object}] options ({})
229
+ # @return [String]
230
+ def format_rdfstar(statement, **options)
231
+ "<<%s %s %s>>" % statement.to_a.map { |value| format_term(value, **options) }
232
+ end
224
233
  ##
225
234
  # Returns the N-Triples representation of a triple.
226
235
  #
@@ -247,15 +247,22 @@ module RDF
247
247
  # Optimizes this query by reordering its constituent triple patterns
248
248
  # according to their cost estimates.
249
249
  #
250
+ # Optional patterns have greater cost than non-optional patterns so they will always come after non-optional patterns
251
+ #
250
252
  # @param [Hash{Symbol => Object}] options
251
253
  # any additional options for optimization
252
254
  # @return [self]
253
255
  # @see RDF::Query::Pattern#cost
254
256
  # @since 0.3.0
255
257
  def optimize!(**options)
256
- @patterns.sort! do |a, b|
258
+ optional, required = @patterns.partition(&:optional?)
259
+ required.sort! do |a, b|
260
+ (a.cost || 0) <=> (b.cost || 0)
261
+ end
262
+ optional.sort! do |a, b|
257
263
  (a.cost || 0) <=> (b.cost || 0)
258
264
  end
265
+ @patterns = required + optional
259
266
  self
260
267
  end
261
268
 
@@ -289,6 +296,8 @@ module RDF
289
296
  # any additional keyword options
290
297
  # @option options [Hash{Symbol => RDF::Term}] bindings
291
298
  # optional variable bindings to use
299
+ # @option options [Boolean] :optimize
300
+ # Optimize query before execution.
292
301
  # @option options [RDF::Query::Solutions] solutions
293
302
  # optional initial solutions for chained queries
294
303
  # @yield [solution]
@@ -311,6 +320,7 @@ module RDF
311
320
  return @solutions
312
321
  end
313
322
 
323
+ self.optimize! if options[:optimize]
314
324
  patterns = @patterns
315
325
  graph_name = name if graph_name.nil?
316
326
  @graph_name = graph_name unless graph_name.nil?
@@ -505,7 +515,7 @@ module RDF
505
515
  # @return [RDF::Query]
506
516
  def dup
507
517
  patterns = @patterns.map {|p| p.dup}
508
- Query.new(patterns, solutions: @solutions.dup, **options)
518
+ Query.new(patterns, graph_name: graph_name, solutions: @solutions.dup, **options)
509
519
  end
510
520
 
511
521
  ##
@@ -77,14 +77,15 @@ module RDF; class Query
77
77
  ##
78
78
  # Returns the normalization of the specified `hash_pattern`.
79
79
  #
80
- # @param [Hash{Symbol => Object}] hash_pattern (Hash.new)
81
- # the query pattern as a hash.
82
- # @param [Hash{Symbol => Object}] options (Hash.new)
83
- # any additional normalization options.
84
- # @option options [String] :anonymous_subject_format ("__%s__")
85
- # the string format for anonymous subjects.
86
- # @return [Hash{Symbol => Object}]
87
- # the resulting query pattern as a normalized hash.
80
+ # @overload normalize!(hash_pattern, **options)
81
+ # @param [Hash{Symbol => Object}] hash_pattern (Hash.new)
82
+ # the query pattern as a hash.
83
+ # @param [Hash{Symbol => Object}] **options
84
+ # any additional normalization options.
85
+ # @option options [String] :anonymous_subject_format ("__%s__")
86
+ # the string format for anonymous subjects.
87
+ # @return [Hash{Symbol => Object}]
88
+ # the resulting query pattern as a normalized hash.
88
89
  def normalize!(*args)
89
90
  hash_pattern = args.shift
90
91
  options = args.shift || {}
@@ -50,10 +50,12 @@ module RDF; class Query
50
50
 
51
51
  # Estmate cost positionally, with variables being least expensive as objects, then predicates, then subjects, then graph_names.
52
52
  # XXX does not consider bound variables, which would need to be dynamically calculated.
53
- @cost = (@object.nil? || @object.is_a?(Variable) ? 1 : 0) +
54
- (@predicate.nil? || @predicate.is_a?(Variable) ? 2 : 0) +
55
- (@subject.nil? || @subject.is_a?(Variable) ? 4 : 0) +
56
- (@graph_name.is_a?(Variable) ? 8 : 0)
53
+ @cost = (@object.nil? || @object.is_a?(Variable) ? 8 : 0) +
54
+ (@predicate.nil? || @predicate.is_a?(Variable) ? 4 : 0) +
55
+ (@subject.nil? || @subject.is_a?(Variable) ? 2 : 0) +
56
+ (@graph_name.is_a?(Variable) ? 1 : 0) +
57
+ (@object.is_a?(Pattern) ? (@object.cost * 4) : 0) +
58
+ (@subject.is_a?(Pattern) ? (@subject.cost * 2) : 0)
57
59
  super
58
60
  end
59
61
 
@@ -84,10 +86,10 @@ module RDF; class Query
84
86
  # @return [Boolean] `true` or `false`
85
87
  # @since 0.3.0
86
88
  def has_variables?
87
- subject.is_a?(Variable) ||
88
- predicate.is_a?(Variable) ||
89
- object.is_a?(Variable) ||
90
- graph_name.is_a?(Variable)
89
+ subject && subject.variable? ||
90
+ predicate && predicate.variable? ||
91
+ object && object.variable? ||
92
+ graph_name && graph_name.variable?
91
93
  end
92
94
  alias_method :variables?, :has_variables?
93
95
 
@@ -117,13 +119,33 @@ module RDF; class Query
117
119
  false
118
120
  end
119
121
 
122
+ ##
123
+ # Checks pattern equality against a statement, considering nesting.
124
+ #
125
+ # * A pattern which has a pattern as a subject or an object, matches
126
+ # a statement having a statement as a subject or an object using {#eql?}.
127
+ #
128
+ # @param [Statement] other
129
+ # @return [Boolean]
130
+ #
131
+ # @see RDF::URI#==
132
+ # @see RDF::Node#==
133
+ # @see RDF::Literal#==
134
+ # @see RDF::Query::Variable#==
135
+ def eql?(other)
136
+ return false unless other.is_a?(Statement) && (self.graph_name || false) == (other.graph_name || false)
137
+
138
+ predicate == other.predicate &&
139
+ (subject.is_a?(Pattern) ? subject.eql?(other.subject) : subject == other.subject) &&
140
+ (object.is_a?(Pattern) ? object.eql?(other.object) : object == other.object)
141
+ end
142
+
120
143
  ##
121
144
  # Executes this query pattern on the given `queryable` object.
122
145
  #
123
146
  # Values are matched using using Queryable#query_pattern.
124
147
  #
125
- # If the optional `bindings` are given, variables will be substituted with their values
126
- # when executing the query.
148
+ # If the optional `bindings` are given, variables will be substituted with their values when executing the query.
127
149
  #
128
150
  # To match triples only in the default graph, set graph_name to `false`.
129
151
  #
@@ -159,16 +181,10 @@ module RDF; class Query
159
181
 
160
182
  # No, some terms actually refer to the same variable...
161
183
  else
162
- # Figure out which terms refer to the same variable:
163
- terms = variables.each_key.find do |name|
164
- terms = variable_terms(name)
165
- break terms if terms.size > 1
166
- end
184
+ # Considering embedding, figure out if variables that may appear more than once resolve to the same value.
185
+ vars = variables.keys
167
186
  queryable.query(query).select do |statement|
168
- # Only yield those matching statements where the variable
169
- # constraint is also satisfied:
170
- # FIXME: `Array#uniq` uses `#eql?` and `#hash`, not `#==`
171
- if terms.map { |term| statement.send(term) }.uniq.size.equal?(1)
187
+ if vars.all? {|var| self.var_values(var, statement).uniq.size == 1}
172
188
  yield statement if block_given?
173
189
  true
174
190
  end
@@ -198,6 +214,8 @@ module RDF; class Query
198
214
  solution[predicate.to_sym] = statement.predicate if predicate.is_a?(Variable)
199
215
  solution[object.to_sym] = statement.object if object.is_a?(Variable)
200
216
  solution[graph_name.to_sym] = statement.graph_name if graph_name.is_a?(Variable)
217
+ solution.merge!(subject.solution(statement.subject)) if subject.respond_to?(:solution)
218
+ solution.merge!(object.solution(statement.object)) if object.respond_to?(:solution)
201
219
  end
202
220
  end
203
221
 
@@ -210,8 +228,11 @@ module RDF; class Query
210
228
  # @param [Symbol, #to_sym] name
211
229
  # an optional variable name
212
230
  # @return [Array<Symbol>]
231
+ # @deprecated use {#var_values} instead
213
232
  # @since 0.3.0
214
233
  def variable_terms(name = nil)
234
+ warn "[DEPRECATION] RDF::Query::Pattern#variable_terms is deprecated and will be removed in a future version.\n" +
235
+ "Called from #{Gem.location_of_caller.join(':')}"
215
236
  terms = []
216
237
  terms << :subject if subject.is_a?(Variable) && (!name || name.eql?(subject.name))
217
238
  terms << :predicate if predicate.is_a?(Variable) && (!name || name.eql?(predicate.name))
@@ -220,6 +241,20 @@ module RDF; class Query
220
241
  terms
221
242
  end
222
243
 
244
+ ##
245
+ # Returns all values the statement in the same pattern position
246
+ #
247
+ # @param [Symbol] var
248
+ # @param [RDF::Statement] statement
249
+ # @return [Array<RDF::Term>]
250
+ def var_values(var, statement)
251
+ [:subject, :predicate, :object, :graph_name].map do |position|
252
+ po = self.send(position)
253
+ so = statement.send(position)
254
+ po.var_values(var, so) if po.respond_to?(:var_values)
255
+ end.flatten.compact
256
+ end
257
+
223
258
  ##
224
259
  # Returns the number of variables in this pattern.
225
260
  #
@@ -229,7 +264,8 @@ module RDF; class Query
229
264
  # @return [Integer] (0..3)
230
265
  def variable_count
231
266
  [subject, predicate, object, graph_name].inject(0) do |memo, term|
232
- memo += (term.is_a?(Variable) ? 1 : 0)
267
+ memo += (term.is_a?(Variable) ? 1 :
268
+ (term.respond_to?(:variable_count) ? term.variable_count : 0))
233
269
  end
234
270
  end
235
271
  alias_method :cardinality, :variable_count
@@ -243,7 +279,7 @@ module RDF; class Query
243
279
  # @return [Hash{Symbol => Variable}]
244
280
  def variables
245
281
  [subject, predicate, object, graph_name].inject({}) do |memo, term|
246
- term.is_a?(Variable) ? memo.merge(term.variables) : memo
282
+ term && term.variable? ? memo.merge(term.variables) : memo
247
283
  end
248
284
  end
249
285
 
@@ -254,8 +290,10 @@ module RDF; class Query
254
290
  # @return [self]
255
291
  def bind(solution)
256
292
  self.to_quad.each_with_index do |term, index|
257
- if term && term.variable? && solution[term]
293
+ if term.is_a?(Variable) && solution[term]
258
294
  self[index] = solution[term]
295
+ elsif term.is_a?(Pattern)
296
+ term.bind(solution)
259
297
  end
260
298
  end
261
299
  self
@@ -283,10 +321,10 @@ module RDF; class Query
283
321
  # @return [Hash{Symbol => RDF::Term}]
284
322
  def bindings
285
323
  bindings = {}
286
- bindings.merge!(subject.bindings) if subject.is_a?(Variable)
287
- bindings.merge!(predicate.bindings) if predicate.is_a?(Variable)
288
- bindings.merge!(object.bindings) if object.is_a?(Variable)
289
- bindings.merge!(graph_name.bindings) if graph_name.is_a?(Variable)
324
+ bindings.merge!(subject.bindings) if subject && subject.variable?
325
+ bindings.merge!(predicate.bindings) if predicate && predicate.variable?
326
+ bindings.merge!(object.bindings) if object && object.variable?
327
+ bindings.merge!(graph_name.bindings) if graph_name && graph_name.variable?
290
328
  bindings
291
329
  end
292
330
 
@@ -327,18 +365,7 @@ module RDF; class Query
327
365
  #
328
366
  # @return [String]
329
367
  def to_s
330
- StringIO.open do |buffer| # FIXME in RDF::Statement
331
- buffer << 'OPTIONAL ' if optional?
332
- buffer << [subject, predicate, object].map do |r|
333
- r.is_a?(RDF::Query::Variable) ? r.to_s : RDF::NTriples.serialize(r)
334
- end.join(" ")
335
- buffer << case graph_name
336
- when nil, false then " ."
337
- when Variable then " #{graph_name.to_s} ."
338
- else " #{RDF::NTriples.serialize(graph_name)} ."
339
- end
340
- buffer.string
341
- end
368
+ (optional? ? 'OPTIONAL ' : '') + super
342
369
  end
343
370
  end # Pattern
344
371
  end; end # RDF::Query
@@ -195,12 +195,31 @@ class RDF::Query
195
195
  # Merges the bindings from the given `other` query solution into this
196
196
  # one, overwriting any existing ones having the same name.
197
197
  #
198
+ # ## RDFStar (RDF*)
199
+ #
200
+ # If merging a binding for a statement to a pattern,
201
+ # merge their embedded solutions.
202
+ #
198
203
  # @param [RDF::Query::Solution, #to_h] other
199
204
  # another query solution or hash bindings
200
205
  # @return [void] self
201
206
  # @since 0.3.0
202
207
  def merge!(other)
203
- @bindings.merge!(other.to_h)
208
+ @bindings.merge!(other.to_h) do |key, v1, v2|
209
+ # Don't merge a pattern over a statement
210
+ # This happens because JOIN does a reverse merge,
211
+ # and a pattern is set in v2.
212
+ v2.is_a?(Pattern) ? v1 : v2
213
+ end
214
+ # Merge bindings from patterns
215
+ embedded_solutions = []
216
+ @bindings.each do |k, v|
217
+ if v.is_a?(Pattern) && other[k].is_a?(RDF::Statement)
218
+ embedded_solutions << v.solution(other[k])
219
+ end
220
+ end
221
+ # Merge embedded solutions
222
+ embedded_solutions.each {|soln| merge!(soln)}
204
223
  self
205
224
  end
206
225
 
@@ -66,13 +66,15 @@ module RDF; class Query
66
66
  #
67
67
  # @return [Array<Symbol>]
68
68
  def variable_names
69
- variables = self.inject({}) do |result, solution|
70
- solution.each_name do |name|
71
- result[name] ||= true
69
+ @variable_names ||= begin
70
+ variables = self.inject({}) do |result, solution|
71
+ solution.each_name do |name|
72
+ result[name] ||= true
73
+ end
74
+ result
72
75
  end
73
- result
76
+ variables.keys
74
77
  end
75
- variables.keys
76
78
  end
77
79
 
78
80
  ##
@@ -136,6 +138,7 @@ module RDF; class Query
136
138
  # @yieldreturn [Boolean]
137
139
  # @return [self]
138
140
  def filter(criteria = {})
141
+ @variable_names = nil
139
142
  if block_given?
140
143
  self.reject! do |solution|
141
144
  !yield(solution.is_a?(Solution) ? solution : Solution.new(solution))
@@ -223,6 +226,13 @@ module RDF; class Query
223
226
  solution.bindings.delete_if { |k, v| !variables.include?(k.to_sym) }
224
227
  end
225
228
  end
229
+
230
+ # Make sure variable_names are ordered by projected variables
231
+ projected_vars, vars = variables.map(&:to_sym), variable_names
232
+ vars = variable_names
233
+
234
+ # Maintain projected order, and add any non-projected variables
235
+ @variable_names = (projected_vars & vars) + (vars - projected_vars)
226
236
  self
227
237
  end
228
238
  alias_method :select, :project
@@ -65,7 +65,7 @@ class RDF::Query
65
65
  # the variable name
66
66
  # @param [RDF::Term] value
67
67
  # an optional variable value
68
- # @param [Boolean] distinguished (true) Also interpreted by leading '??' or '$$' in name.
68
+ # @param [Boolean] distinguished (true) Also interpreted by leading '?' or '$' in name. If non-distinguished, '??' or '$$'.
69
69
  # @param [Boolean] existential (true) Also interpreted by leading '$' in name
70
70
  def initialize(name = nil, value = nil, distinguished: nil, existential: nil)
71
71
  name = (name || "g#{__id__.to_i.abs}").to_s
@@ -157,12 +157,24 @@ class RDF::Query
157
157
  ##
158
158
  # Rebinds this variable to the given `value`.
159
159
  #
160
- # @param [RDF::Term] value
161
- # @return [RDF::Term] the previous value, if any.
160
+ # @overload bind(value)
161
+ # @param [RDF::Query::Solution] value
162
+ # @return [self] the bound variable
163
+ #
164
+ # @overload bind(value)
165
+ # @param [RDF::Term] value
166
+ # @return [RDF::Term] the previous value, if any.
162
167
  def bind(value)
163
- old_value = self.value
164
- self.value = value
165
- old_value
168
+ if value.is_a?(RDF::Query::Solution)
169
+ self.value = value.to_h.fetch(name, self.value)
170
+ self
171
+ else
172
+ warn "[DEPRECATION] RDF::Query::Variable#bind should be used with a solution, not a term.\n" +
173
+ "Called from #{Gem.location_of_caller.join(':')}"
174
+ old_value = self.value
175
+ self.value = value
176
+ old_value
177
+ end
166
178
  end
167
179
  alias_method :bind!, :bind
168
180
 
@@ -234,6 +246,16 @@ class RDF::Query
234
246
  end
235
247
  end
236
248
 
249
+ ##
250
+ # Returns term if var is the same as this variable.
251
+ #
252
+ # @param [Symbol] var
253
+ # @param [RDF::Term] term
254
+ # @return [RDF::Term]
255
+ def var_values(var, term)
256
+ term if var == name
257
+ end
258
+
237
259
  ##
238
260
  # Returns a string representation of this variable.
239
261
  #
@@ -253,5 +275,6 @@ class RDF::Query
253
275
  prefix = distinguished? ? (existential? ? '$' : '?') : (existential? ? '$$' : '??')
254
276
  unbound? ? "#{prefix}#{name}" : "#{prefix}#{name}=#{value}"
255
277
  end
278
+ alias_method :to_base, :to_s
256
279
  end # Variable
257
280
  end # RDF::Query