rdf 0.3.0.pre → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -7,30 +7,27 @@ module RDF; class Query
7
7
  # @since 0.2.2
8
8
  def self.from(pattern, options = {})
9
9
  case pattern
10
- when Pattern then pattern
11
- when Statement then self.new(options.merge(pattern.to_hash))
12
- when Hash then self.new(options.merge(pattern))
13
- when Array then self.new(pattern[0], pattern[1], pattern[2], options.merge(:context => pattern[3]))
14
- else raise ArgumentError.new("expected RDF::Query::Pattern, RDF::Statement, Hash, or Array, but got #{pattern.inspect}")
10
+ when Pattern then pattern
11
+ when Array, Statement
12
+ self.new(pattern[0], pattern[1], pattern[2], options.merge(:context => pattern[3]))
13
+ when Hash then self.new(options.merge(pattern))
14
+ else raise ArgumentError, "expected RDF::Query::Pattern, RDF::Statement, Hash, or Array, but got #{pattern.inspect}"
15
15
  end
16
16
  end
17
17
 
18
- # @return [Hash{Symbol => Object}]
19
- attr_reader :options
20
-
21
18
  ##
22
19
  # @overload initialize(options = {})
23
20
  # @param [Hash{Symbol => Object}] options
24
21
  # @option options [Variable, Resource] :subject (nil)
25
22
  # @option options [Variable, URI] :predicate (nil)
26
- # @option options [Variable, Value] :object (nil)
23
+ # @option options [Variable, Term] :object (nil)
27
24
  # @option options [Variable, Resource] :context (nil)
28
25
  # @option options [Boolean] :optional (false)
29
26
  #
30
27
  # @overload initialize(subject, predicate, object, options = {})
31
28
  # @param [Variable, Resource] subject
32
29
  # @param [Variable, URI] predicate
33
- # @param [Variable, Value] object
30
+ # @param [Variable, Term] object
34
31
  # @param [Hash{Symbol => Object}] options
35
32
  # @option options [Variable, Resource] :context (nil)
36
33
  # @option options [Boolean] :optional (false)
@@ -48,6 +45,63 @@ module RDF; class Query
48
45
  super
49
46
  end
50
47
 
48
+ ##
49
+ # Any additional options for this pattern.
50
+ #
51
+ # @return [Hash]
52
+ attr_reader :options
53
+
54
+ ##
55
+ # The estimated cost of this pattern (for query optimization).
56
+ #
57
+ # @return [Numeric]
58
+ attr_accessor :cost
59
+
60
+ ##
61
+ # Returns `true` if this is a blank pattern, with all terms being `nil`.
62
+ #
63
+ # @return [Boolean] `true` or `false`
64
+ # @since 0.3.0
65
+ def blank?
66
+ subject.nil? && predicate.nil? && object.nil? && context.nil?
67
+ end
68
+
69
+ ##
70
+ # Returns `true` if this is a constant pattern, with all terms being
71
+ # either URIs, blank nodes, or literals.
72
+ #
73
+ # A constant pattern is structurally and functionally equivalent to an
74
+ # RDF statement.
75
+ #
76
+ # @return [Boolean] `true` or `false`
77
+ # @since 0.3.0
78
+ def constant?
79
+ !(variable?)
80
+ end
81
+
82
+ ##
83
+ # Returns `true` if this is a variable pattern, with any term being
84
+ # `nil` or a variable.
85
+ #
86
+ # @return [Boolean] `true` or `false`
87
+ # @since 0.3.0
88
+ def variable?
89
+ subject.nil? || predicate.nil? || object.nil? || context.nil? || has_variables?
90
+ end
91
+
92
+ ##
93
+ # Returns `true` if this pattern contains any variables.
94
+ #
95
+ # @return [Boolean] `true` or `false`
96
+ # @since 0.3.0
97
+ def has_variables?
98
+ subject.is_a?(Variable) ||
99
+ predicate.is_a?(Variable) ||
100
+ object.is_a?(Variable) ||
101
+ context.is_a?(Variable)
102
+ end
103
+ alias_method :variables?, :has_variables?
104
+
51
105
  ##
52
106
  # Returns `true` if this is an optional pattern.
53
107
  #
@@ -55,7 +109,7 @@ module RDF; class Query
55
109
  # Pattern.new(:s, :p, :o).optional? #=> false
56
110
  # Pattern.new(:s, :p, :o, :optional => true).optional? #=> true
57
111
  #
58
- # @return [Boolean]
112
+ # @return [Boolean] `true` or `false`
59
113
  # @since 0.3.0
60
114
  def optional?
61
115
  !!options[:optional]
@@ -74,7 +128,7 @@ module RDF; class Query
74
128
  #
75
129
  # @param [RDF::Queryable] queryable
76
130
  # the graph or repository to query
77
- # @param [Hash{Symbol => RDF::Value}] bindings
131
+ # @param [Hash{Symbol => RDF::Term}] bindings
78
132
  # optional variable bindings to use
79
133
  # @yield [statement]
80
134
  # each matching statement
@@ -83,44 +137,35 @@ module RDF; class Query
83
137
  # @return [Enumerator]
84
138
  # an enumerator yielding matching statements
85
139
  # @see RDF::Queryable#query
140
+ # @since 0.3.0
86
141
  def execute(queryable, bindings = {}, &block)
87
- variables = self.variables
142
+ query = {
143
+ :subject => subject && subject.variable? ? bindings[subject.to_sym] : subject,
144
+ :predicate => predicate && predicate.variable? ? bindings[predicate.to_sym] : predicate,
145
+ :object => object && object.variable? ? bindings[object.to_sym] : object,
146
+ # TODO: context handling?
147
+ }
88
148
 
89
- # Does this pattern contain any variables?
90
- if variables.empty?
91
- # With no variables to worry about, we will let the repository
92
- # implementation yield matching statements directly:
93
- queryable.query(self, &block)
149
+ # Do all the variable terms refer to distinct variables?
150
+ variables = self.variables
151
+ if variable_count == variables.size
152
+ # If so, we can just let the repository implementation handle
153
+ # everything and yield matching statements directly:
154
+ queryable.query(query, &block)
94
155
 
95
- # Yes, this pattern uses at least one variable...
156
+ # No, some terms actually refer to the same variable...
96
157
  else
97
- query = {
98
- :subject => subject && subject.variable? ? bindings[subject.to_sym] : subject,
99
- :predicate => predicate && predicate.variable? ? bindings[predicate.to_sym] : predicate,
100
- :object => object && object.variable? ? bindings[object.to_sym] : object,
101
- # TODO: context handling?
102
- }
103
-
104
- # Do all the variable terms refer to distinct variables?
105
- if variable_count == variables.size
106
- # If so, we can just let the repository implementation handle
107
- # everything and yield matching statements directly:
108
- queryable.query(query, &block)
109
-
110
- # No, some terms actually refer to the same variable...
111
- else
112
- # Figure out which terms refer to the same variable:
113
- terms = variables.each_key.find do |name|
114
- terms = variable_terms(name)
115
- break terms if terms.size > 1
116
- end
117
- queryable.query(query) do |statement|
118
- # Only yield those matching statements where the variable
119
- # constraint is also satisfied:
120
- # FIXME: `Array#uniq` uses `#eql?` and `#hash`, not `#==`
121
- if matches = terms.map { |term| statement.send(term) }.uniq.size.equal?(1)
122
- block.call(statement)
123
- end
158
+ # Figure out which terms refer to the same variable:
159
+ terms = variables.each_key.find do |name|
160
+ terms = variable_terms(name)
161
+ break terms if terms.size > 1
162
+ end
163
+ queryable.query(query) do |statement|
164
+ # Only yield those matching statements where the variable
165
+ # constraint is also satisfied:
166
+ # FIXME: `Array#uniq` uses `#eql?` and `#hash`, not `#==`
167
+ if matches = terms.map { |term| statement.send(term) }.uniq.size.equal?(1)
168
+ block.call(statement)
124
169
  end
125
170
  end
126
171
  end
@@ -136,6 +181,7 @@ module RDF; class Query
136
181
  # @param [RDF::Statement] statement
137
182
  # an RDF statement to bind terms from
138
183
  # @return [RDF::Query::Solution]
184
+ # @since 0.3.0
139
185
  def solution(statement)
140
186
  RDF::Query::Solution.new do |solution|
141
187
  solution[subject.to_sym] = statement.subject if subject.variable?
@@ -144,16 +190,6 @@ module RDF; class Query
144
190
  end
145
191
  end
146
192
 
147
- ##
148
- # Returns `true` if this pattern contains any variables.
149
- #
150
- # @return [Boolean] `true` or `false`
151
- def variables?
152
- subject.is_a?(Variable) ||
153
- predicate.is_a?(Variable) ||
154
- object.is_a?(Variable)
155
- end
156
-
157
193
  ##
158
194
  # Returns the variable terms in this pattern.
159
195
  #
@@ -222,7 +258,7 @@ module RDF; class Query
222
258
  ##
223
259
  # Returns all bindings in this pattern.
224
260
  #
225
- # @return [Hash{Symbol => Value}]
261
+ # @return [Hash{Symbol => RDF::Term}]
226
262
  def bindings
227
263
  bindings = {}
228
264
  bindings.merge!(subject.bindings) if subject.is_a?(Variable)
@@ -234,7 +270,7 @@ module RDF; class Query
234
270
  ##
235
271
  # Returns `true` if all variables in this pattern are bound.
236
272
  #
237
- # @return [Boolean]
273
+ # @return [Boolean] `true` or `false`
238
274
  def bound?
239
275
  !variables.empty? && variables.values.all?(&:bound?)
240
276
  end
@@ -250,7 +286,7 @@ module RDF; class Query
250
286
  ##
251
287
  # Returns `true` if all variables in this pattern are unbound.
252
288
  #
253
- # @return [Boolean]
289
+ # @return [Boolean] `true` or `false`
254
290
  def unbound?
255
291
  !variables.empty? && variables.values.all?(&:unbound?)
256
292
  end
@@ -31,7 +31,7 @@ class RDF::Query
31
31
  ##
32
32
  # Initializes the query solution.
33
33
  #
34
- # @param [Hash{Symbol => RDF::Value}] bindings
34
+ # @param [Hash{Symbol => RDF::Term}] bindings
35
35
  # @yield [solution]
36
36
  def initialize(bindings = {}, &block)
37
37
  @bindings = bindings.to_hash
@@ -52,7 +52,7 @@ class RDF::Query
52
52
  #
53
53
  # @yield [name, value]
54
54
  # @yieldparam [Symbol] name
55
- # @yieldparam [RDF::Value] value
55
+ # @yieldparam [RDF::Term] value
56
56
  # @return [Enumerator]
57
57
  def each_binding(&block)
58
58
  @bindings.each(&block)
@@ -74,7 +74,7 @@ class RDF::Query
74
74
  # Enumerates over every variable value in this solution.
75
75
  #
76
76
  # @yield [value]
77
- # @yieldparam [RDF::Value] value
77
+ # @yieldparam [RDF::Term] value
78
78
  # @return [Enumerator]
79
79
  def each_value(&block)
80
80
  @bindings.each_value(&block)
@@ -129,7 +129,7 @@ class RDF::Query
129
129
  #
130
130
  # @param [Symbol, #to_sym] name
131
131
  # the variable name
132
- # @return [RDF::Value]
132
+ # @return [RDF::Term]
133
133
  def [](name)
134
134
  @bindings[name.to_sym]
135
135
  end
@@ -139,8 +139,8 @@ class RDF::Query
139
139
  #
140
140
  # @param [Symbol, #to_sym] name
141
141
  # the variable name
142
- # @param [RDF::Value] value
143
- # @return [RDF::Value]
142
+ # @param [RDF::Term] value
143
+ # @return [RDF::Term]
144
144
  # @since 0.3.0
145
145
  def []=(name, value)
146
146
  @bindings[name.to_sym] = value
@@ -172,13 +172,13 @@ class RDF::Query
172
172
  end
173
173
 
174
174
  ##
175
- # @return [Array<Array(Symbol, RDF::Value)>}
175
+ # @return [Array<Array(Symbol, RDF::Term)>}
176
176
  def to_a
177
177
  @bindings.to_a
178
178
  end
179
179
 
180
180
  ##
181
- # @return [Hash{Symbol => RDF::Value}}
181
+ # @return [Hash{Symbol => RDF::Term}}
182
182
  def to_hash
183
183
  @bindings.dup
184
184
  end
@@ -193,7 +193,7 @@ class RDF::Query
193
193
 
194
194
  ##
195
195
  # @param [Symbol] name
196
- # @return [RDF::Value]
196
+ # @return [RDF::Term]
197
197
  def method_missing(name, *args, &block)
198
198
  if args.empty? && @bindings.has_key?(name.to_sym)
199
199
  @bindings[name.to_sym]
@@ -43,7 +43,7 @@ class RDF::Query
43
43
  # var.to_s #=> "?y=123"
44
44
  #
45
45
  class Variable
46
- include RDF::Value
46
+ include RDF::Term
47
47
 
48
48
  ##
49
49
  # The variable's name.
@@ -55,13 +55,13 @@ class RDF::Query
55
55
  ##
56
56
  # The variable's value.
57
57
  #
58
- # @return [RDF::Value]
58
+ # @return [RDF::Term]
59
59
  attr_accessor :value
60
60
 
61
61
  ##
62
62
  # @param [Symbol, #to_sym] name
63
63
  # the variable name
64
- # @param [RDF::Value] value
64
+ # @param [RDF::Term] value
65
65
  # an optional variable value
66
66
  def initialize(name = nil, value = nil)
67
67
  @name = (name || "g#{__id__.to_i.abs}").to_sym
@@ -72,7 +72,7 @@ class RDF::Query
72
72
  # Returns `true`.
73
73
  #
74
74
  # @return [Boolean]
75
- # @see RDF::Value#variable?
75
+ # @see RDF::Term#variable?
76
76
  # @since 0.1.7
77
77
  def variable?
78
78
  true
@@ -105,8 +105,8 @@ class RDF::Query
105
105
  ##
106
106
  # Rebinds this variable to the given `value`.
107
107
  #
108
- # @param [RDF::Value] value
109
- # @return [RDF::Value] the previous value, if any.
108
+ # @param [RDF::Term] value
109
+ # @return [RDF::Term] the previous value, if any.
110
110
  def bind(value)
111
111
  old_value = self.value
112
112
  self.value = value
@@ -117,7 +117,7 @@ class RDF::Query
117
117
  ##
118
118
  # Unbinds this variable, discarding any currently bound value.
119
119
  #
120
- # @return [RDF::Value] the previous value, if any.
120
+ # @return [RDF::Term] the previous value, if any.
121
121
  def unbind
122
122
  old_value = self.value
123
123
  self.value = nil
@@ -137,7 +137,7 @@ class RDF::Query
137
137
  ##
138
138
  # Returns this variable's bindings (if any) as a `Hash`.
139
139
  #
140
- # @return [Hash{Symbol => RDF::Value}]
140
+ # @return [Hash{Symbol => RDF::Term}]
141
141
  def bindings
142
142
  unbound? ? {} : {name => value}
143
143
  end
@@ -165,7 +165,7 @@ class RDF::Query
165
165
  ##
166
166
  # Compares this variable with the given value.
167
167
  #
168
- # @param [RDF::Value] other
168
+ # @param [RDF::Term] other
169
169
  # @return [Boolean]
170
170
  def ===(other)
171
171
  if unbound?
data/lib/rdf/query.rb CHANGED
@@ -21,7 +21,7 @@ module RDF
21
21
  # @example Executing a basic graph pattern query
22
22
  # graph = RDF::Graph.load('etc/doap.nt')
23
23
  # query.execute(graph).each do |solution|
24
- # solution.inspect
24
+ # puts solution.inspect
25
25
  # end
26
26
  #
27
27
  # @example Constructing and executing a query in one go (1)
@@ -153,6 +153,33 @@ module RDF
153
153
  self
154
154
  end
155
155
 
156
+ ##
157
+ # Returns an optimized copy of this query.
158
+ #
159
+ # @param [Hash{Symbol => Object}] options
160
+ # any additional options for optimization
161
+ # @return [RDF::Query] a copy of `self`
162
+ # @since 0.3.0
163
+ def optimize(options = {})
164
+ self.dup.optimize!(options)
165
+ end
166
+
167
+ ##
168
+ # Optimizes this query by reordering its constituent triple patterns
169
+ # according to their cost estimates.
170
+ #
171
+ # @param [Hash{Symbol => Object}] options
172
+ # any additional options for optimization
173
+ # @return [void] `self`
174
+ # @see RDF::Query::Pattern#cost
175
+ # @since 0.3.0
176
+ def optimize!(options = {})
177
+ @patterns.sort! do |a, b|
178
+ (a.cost || 0) <=> (b.cost || 0)
179
+ end
180
+ self
181
+ end
182
+
156
183
  ##
157
184
  # Executes this query on the given `queryable` graph or repository.
158
185
  #
@@ -164,51 +191,46 @@ module RDF
164
191
  # the resulting solution sequence
165
192
  # @see http://www.holygoat.co.uk/blog/entry/2005-10-25-1
166
193
  def execute(queryable, options = {})
167
- @solutions = Solutions.new
168
- @failed = false
169
- @patterns.each do |pattern|
170
- case pattern.variable_count
171
- when 0 # no variables
172
- if pattern.execute(queryable).empty?
173
- # return an empty solution sequence:
174
- @solutions.clear
175
- @failed = true
176
- break
177
- end
194
+ options = options.dup
178
195
 
179
- when 3 # only variables
180
- pattern.execute(queryable) do |statement|
181
- @solutions << pattern.solution(statement)
182
- end
196
+ # just so we can call #keys below without worrying
197
+ options[:bindings] ||= {}
183
198
 
184
- else case # 1 or 2 variables
199
+ @solutions = Solutions.new
200
+ # A quick empty solution simplifies the logic below; no special case for
201
+ # the first pattern
202
+ @solutions << RDF::Query::Solution.new({})
185
203
 
186
- when !@solutions.have_variables?(pattern.variables.values)
187
- if @solutions.empty?
188
- pattern.execute(queryable) do |statement|
189
- @solutions << pattern.solution(statement)
190
- end
191
- else # union
192
- old_solutions, @solutions = @solutions, Solutions.new
193
- old_solutions.each do |solution|
194
- pattern.execute(queryable) do |statement|
195
- @solutions << solution.merge(pattern.solution(statement))
196
- end
197
- end
198
- end
204
+ @patterns.each do |pattern|
205
+
206
+ old_solutions, @solutions = @solutions, Solutions.new
199
207
 
200
- else # intersection
201
- @solutions.each_with_index do |solution, index|
202
- failed = true
203
- pattern.execute(queryable, solution) do |statement|
204
- failed = false
205
- solution.merge!(pattern.solution(statement))
206
- end
207
- @solutions[index] = nil if failed && !pattern.optional?
208
+ options[:bindings].keys.each do |variable|
209
+ if pattern.variables.include?(variable)
210
+ unbound_solutions, old_solutions = old_solutions, Solutions.new
211
+ options[:bindings][variable].each do |binding|
212
+ unbound_solutions.each do |solution|
213
+ old_solutions << solution.merge(variable => binding)
208
214
  end
209
- @solutions.compact! # remove `nil` entries
215
+ end
216
+ options[:bindings].delete(variable)
217
+ end
218
+ end
219
+
220
+ old_solutions.each do |solution|
221
+ pattern.execute(queryable, solution) do |statement|
222
+ @solutions << solution.merge(pattern.solution(statement))
210
223
  end
211
224
  end
225
+
226
+ # It's important to abort failed queries quickly because later patterns
227
+ # that can have constraints are often broad without them.
228
+ # We have no solutions at all:
229
+ return @solutions if @solutions.empty?
230
+ # We have no solutions for variables we should have solutions for:
231
+ if !pattern.optional? && pattern.variables.keys.any? { |variable| !@solutions.variable_names.include?(variable) }
232
+ return Solutions.new
233
+ end
212
234
  end
213
235
  @solutions
214
236
  end
@@ -222,7 +244,7 @@ module RDF
222
244
  # @return [Boolean]
223
245
  # @see #matched?
224
246
  def failed?
225
- @failed
247
+ @solutions.empty?
226
248
  end
227
249
 
228
250
  ##
data/lib/rdf/reader.rb CHANGED
@@ -221,8 +221,8 @@ module RDF
221
221
  #
222
222
  # @return [RDF::URI]
223
223
  def prefix(name, uri = nil)
224
- name = name.respond_to?(:to_sym) ? name.to_sym : name.to_s.to_sym
225
- uri.nil? ? prefixes[name] : prefixes[name] = RDF::URI(uri)
224
+ name = name.to_s.empty? ? nil : (name.respond_to?(:to_sym) ? name.to_sym : name.to_s.to_sym)
225
+ uri.nil? ? prefixes[name] : prefixes[name] = uri
226
226
  end
227
227
  alias_method :prefix!, :prefix
228
228
 
@@ -271,7 +271,7 @@ module RDF
271
271
  # each triple
272
272
  # @yieldparam [RDF::Resource] subject
273
273
  # @yieldparam [RDF::URI] predicate
274
- # @yieldparam [RDF::Value] object
274
+ # @yieldparam [RDF::Term] object
275
275
  # @yieldreturn [void] ignored
276
276
  # @return [void]
277
277
  #
@@ -331,7 +331,7 @@ module RDF
331
331
  ##
332
332
  # Reads a triple from the input stream.
333
333
  #
334
- # @return [Array(RDF::Value)] a triple
334
+ # @return [Array(RDF::Term)] a triple
335
335
  # @raise [NotImplementedError] unless implemented in subclass
336
336
  # @abstract
337
337
  def read_triple
@@ -423,7 +423,8 @@ module RDF
423
423
  ##
424
424
  # @return [String]
425
425
  def readline
426
- @line = @input.readline.chomp
426
+ @line = @input.readline
427
+ @line.chomp!
427
428
  @line.force_encoding(encoding) if @line.respond_to?(:force_encoding) # for Ruby 1.9+
428
429
  @line
429
430
  end