rdf 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,7 +19,7 @@ module RDF
19
19
  # RDF::Format.content_types #=> {"text/plain" => [RDF::NTriples::Format]}
20
20
  #
21
21
  # @example Obtaining serialization format file extension mappings
22
- # RDF::Format.file_extensions #=> {:nt => "text/plain"}
22
+ # RDF::Format.file_extensions #=> {:nt => [RDF::NTriples::Format]}
23
23
  #
24
24
  # @example Defining a new RDF serialization format class
25
25
  # class RDF::NTriples::Format < RDF::Format
@@ -77,11 +77,18 @@ module RDF
77
77
  # @option options [String, #to_s] :file_name (nil)
78
78
  # @option options [Symbol, #to_sym] :file_extension (nil)
79
79
  # @option options [String, #to_s] :content_type (nil)
80
+ # Note that content_type will be taken from a URL opened using {RDF::Util::File.open_file}.
81
+ # @option options [String] :sample (nil)
82
+ # A sample of input used for performing format detection.
83
+ # If we find no formats, or we find more than one, and we have a sample, we can
84
+ # perform format detection to find a specific format to use, in which case
85
+ # we pick the first one we find
80
86
  # @return [Class]
87
+ # @yieldreturn [String] another way to provide a sample, allows lazy for retrieving the sample.
81
88
  #
82
89
  # @return [Class]
83
90
  def self.for(options = {})
84
- case options
91
+ format = case options
85
92
  when String
86
93
  # Find a format based on the file name
87
94
  self.for(:file_name => options)
@@ -94,15 +101,13 @@ module RDF
94
101
  # @see http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7
95
102
  mime_type = mime_type.to_s
96
103
  mime_type = mime_type.split(';').first if mime_type.include?(?;) # remove any media type parameters
97
- content_types.has_key?(mime_type) ? content_types[mime_type].first : nil
104
+ content_types[mime_type]
98
105
  # Find a format based on the file name:
99
106
  when file_name = options[:file_name]
100
107
  self.for(:file_extension => File.extname(file_name.to_s)[1..-1])
101
108
  # Find a format based on the file extension:
102
109
  when file_ext = options[:file_extension]
103
- if file_extensions.has_key?(file_ext = file_ext.to_sym)
104
- self.for(:content_type => file_extensions[file_ext])
105
- end
110
+ file_extensions[file_ext.to_sym]
106
111
  end
107
112
 
108
113
  when Symbol
@@ -112,15 +117,31 @@ module RDF
112
117
  RDF::NTriples::Format
113
118
  # For anything else, find a match based on the full class name
114
119
  else
115
- format = format.to_s.downcase
116
120
  @@subclasses.each do |klass|
117
- if klass.name.to_s.split('::').map(&:downcase).include?(format)
121
+ if klass.to_sym == format ||
122
+ klass.name.to_s.split('::').map(&:downcase).include?(format.to_s.downcase)
118
123
  return klass
119
124
  end
120
125
  end
121
126
  nil # not found
122
127
  end
123
128
  end
129
+
130
+ if format.is_a?(Array)
131
+ return format.first if format.uniq.length == 1
132
+ elsif !format.nil?
133
+ return format
134
+ end
135
+
136
+ # If we have a sample, use that for format detection
137
+ if sample = (options[:sample] if options.is_a?(Hash)) || (yield if block_given?)
138
+ # Given a sample, perform format detection across the appropriate formats, choosing
139
+ # the first that matches
140
+ format ||= @@subclasses
141
+
142
+ # Return first format that has a positive detection
143
+ format.detect {|f| f.detect(sample)}
144
+ end
124
145
  end
125
146
 
126
147
  ##
@@ -134,11 +155,21 @@ module RDF
134
155
  ##
135
156
  # Returns file extensions for known RDF serialization formats.
136
157
  #
137
- # @return [Hash{Symbol => String}]
158
+ # @return [Hash{Symbol => Array<Class>}]
138
159
  def self.file_extensions
139
160
  @@file_extensions
140
161
  end
141
162
 
163
+ ##
164
+ # Returns a symbol appropriate to use with RDF::Format.for()
165
+ # @return [Symbol]
166
+ def self.to_sym
167
+ elements = self.to_s.split("::")
168
+ sym = elements.pop
169
+ sym = elements.pop if sym == 'Format'
170
+ sym.downcase.to_s.to_sym
171
+ end
172
+
142
173
  ##
143
174
  # Retrieves or defines the reader class for this RDF serialization
144
175
  # format.
@@ -225,6 +256,24 @@ module RDF
225
256
  end
226
257
  end
227
258
 
259
+
260
+ ##
261
+ # Use a text sample to detect the format of an input file. Sub-classes implement
262
+ # a matcher sufficient to detect probably format matches, including disambiguating
263
+ # between other similar formats.
264
+ #
265
+ # Used to determine format class from loaded formats by {RDF::Format.for} when a
266
+ # match cannot be unambigiously found otherwise.
267
+ #
268
+ # @example
269
+ # RDF::NTriples::Format.detect("<a> <b> <c> .") => true
270
+ #
271
+ # @param [String] sample Beginning several bytes (~ 1K) of input.
272
+ # @return [Boolean]
273
+ def self.detect(sample)
274
+ false
275
+ end
276
+
228
277
  class << self
229
278
  alias_method :reader_class, :reader
230
279
  alias_method :writer_class, :writer
@@ -267,7 +316,7 @@ module RDF
267
316
 
268
317
  if extensions = (options[:extension] || options[:extensions])
269
318
  extensions = [extensions].flatten.map(&:to_sym)
270
- extensions.each { |ext| @@file_extensions[ext] = type }
319
+ extensions.each { |ext| (@@file_extensions[ext] ||= []) << self }
271
320
  end
272
321
  if aliases = (options[:alias] || options[:aliases])
273
322
  aliases = [aliases].flatten.each { |a| (@@content_types[a] ||= []) << self }
@@ -0,0 +1,21 @@
1
+ module RDF
2
+ ##
3
+ # An RDF type check mixin.
4
+ #
5
+ # This module implements #raise_error, which will raise RDF::TypeError.
6
+ #
7
+ # @see RDF::Value
8
+ # @see RDF::Literal
9
+ # @see RDF::Literal
10
+ module TypeCheck
11
+ ##
12
+ # Default implementation of type_error, which returns false.
13
+ # Classes including RDF::TypeCheck will raise TypeError
14
+ # instead.
15
+ #
16
+ # @raise [TypeError]
17
+ def type_error(message)
18
+ raise TypeError, message
19
+ end
20
+ end # TypeCheck
21
+ end # RDF
@@ -47,6 +47,7 @@ module RDF
47
47
  #
48
48
  # @param [String, #to_s] url
49
49
  # @param [Hash{Symbol => Object}] options
50
+ # Options from {RDF::Reader#initialize}, {RDF::Format.for} and {RDF::Graph#initialize}
50
51
  # @yield [graph]
51
52
  # @yieldparam [Graph] graph
52
53
  # @return [Graph]
@@ -56,17 +56,50 @@ module RDF
56
56
  # The canonical empty list.
57
57
  NIL = RDF::List.new(RDF.nil).freeze
58
58
 
59
+ ##
60
+ # Validate the list ensuring that
61
+ # * rdf:rest values are all BNodes are nil
62
+ # * rdf:type, if it exists, is rdf:List
63
+ # * each subject has no properties other than single-valued rdf:first, rdf:rest
64
+ # other than for the first node in the list
65
+ # @return [Boolean]
66
+ def valid?
67
+ li = subject
68
+ while li != RDF.nil do
69
+ rest = nil
70
+ firsts = rests = 0
71
+ @graph.query(:subject => li) do |st|
72
+ case st.predicate
73
+ when RDF.type
74
+ # Be tollerant about rdf:type entries, as some OWL vocabularies use it excessively
75
+ when RDF.first
76
+ firsts += 1
77
+ when RDF.rest
78
+ rest = st.object
79
+ return false unless rest.node? || rest == RDF.nil
80
+ rests += 1
81
+ else
82
+ # First node may have other properties
83
+ return false unless li == subject
84
+ end
85
+ end
86
+ return false unless firsts == 1 && rests == 1
87
+ li = rest
88
+ end
89
+ true
90
+ end
91
+
59
92
  ##
60
93
  # Returns the subject term of this list.
61
94
  #
62
- # @return [RDF::Resource]
95
+ # @attr_reader [RDF::Resource]
63
96
  attr_reader :subject
64
97
 
65
98
  ##
66
99
  # Returns the underlying graph storing the statements that constitute
67
100
  # this list.
68
101
  #
69
- # @return [RDF::Graph]
102
+ # @attr_reader [RDF::Graph]
70
103
  attr_reader :graph
71
104
 
72
105
  ##
@@ -489,7 +522,7 @@ module RDF
489
522
  ##
490
523
  # Returns the first subject term constituting this list.
491
524
  #
492
- # This is equivalent to {#subject}.
525
+ # This is equivalent to `subject`.
493
526
  #
494
527
  # @example
495
528
  # RDF::List[1, 2, 3].first_subject #=> RDF::Node(...)
@@ -2,9 +2,30 @@ module RDF
2
2
  ##
3
3
  # An RDF literal.
4
4
  #
5
+ # Subclasses of {RDF::Literal} should define DATATYPE and GRAMMAR constants, which are used
6
+ # for identifying the appropriate class to use for a datatype URI and to perform lexical
7
+ # matching on the value.
8
+ #
9
+ # Literal comparison with other {RDF::Value} instances call {RDF::Value#type_error},
10
+ # which, returns false. Implementations wishing to have {RDF::TypeError} raised
11
+ # should mix-in {RDF::TypeCheck}. This is required for strict SPARQL conformance.
12
+ #
13
+ # Specific typed literals may have behavior different from the default implementation. See
14
+ # the following defined sub-classes for specific documentation. Additional sub-classes may
15
+ # be defined, and will interoperate by defining `DATATYPE` and `GRAMMAR` constants, in addition
16
+ # other required overrides of RDF::Literal behavior.
17
+ #
18
+ # * {RDF::Literal::Boolean}
19
+ # * {RDF::Literal::Date}
20
+ # * {RDF::Literal::DateTime}
21
+ # * {RDF::Literal::Decimal}
22
+ # * {RDF::Literal::Double}
23
+ # * {RDF::Literal::Integer}
24
+ # * {RDF::Literal::Time}
25
+ #
5
26
  # @example Creating a plain literal
6
27
  # value = RDF::Literal.new("Hello, world!")
7
- # value.plain? #=> true
28
+ # value.plain? #=> true`
8
29
  #
9
30
  # @example Creating a language-tagged literal (1)
10
31
  # value = RDF::Literal.new("Hello!", :language => :en)
@@ -39,53 +60,48 @@ module RDF
39
60
  # @see http://www.w3.org/TR/rdf-concepts/#section-Literals
40
61
  # @see http://www.w3.org/TR/rdf-concepts/#section-Datatypes-intro
41
62
  class Literal
42
- autoload :Boolean, 'rdf/model/literal/boolean'
43
- autoload :Numeric, 'rdf/model/literal/numeric'
44
- autoload :Integer, 'rdf/model/literal/integer'
45
- autoload :Double, 'rdf/model/literal/double'
46
- autoload :Decimal, 'rdf/model/literal/decimal'
47
- autoload :Date, 'rdf/model/literal/date'
48
- autoload :DateTime, 'rdf/model/literal/datetime'
49
- autoload :Time, 'rdf/model/literal/time'
50
- autoload :Token, 'rdf/model/literal/token'
51
- autoload :XML, 'rdf/model/literal/xml'
63
+
64
+ private
65
+ @@subclasses = [] # @private
66
+
67
+ ##
68
+ # @private
69
+ # @return [void]
70
+ def self.inherited(child)
71
+ @@subclasses << child
72
+ super
73
+ end
74
+
75
+ public
76
+
77
+ require 'rdf/model/literal/numeric'
78
+ require 'rdf/model/literal/boolean'
79
+ require 'rdf/model/literal/decimal'
80
+ require 'rdf/model/literal/integer'
81
+ require 'rdf/model/literal/double'
82
+ require 'rdf/model/literal/date'
83
+ require 'rdf/model/literal/dateTime'
84
+ require 'rdf/model/literal/time'
85
+ require 'rdf/model/literal/token'
86
+ require 'rdf/model/literal/xml'
52
87
 
53
88
  include RDF::Term
54
89
 
90
+ ##
91
+ # @private
92
+ # Return datatype class for uri, or nil if none is found
93
+ def self.datatyped_class(uri)
94
+ @@subclasses.detect {|klass| klass.const_defined?(:DATATYPE) && klass.const_get(:DATATYPE) == uri}
95
+ end
96
+
55
97
  ##
56
98
  # @private
57
99
  def self.new(value, options = {})
58
100
  klass = case
59
101
  when !self.equal?(RDF::Literal)
60
102
  self # subclasses can be directly constructed without type dispatch
61
- when datatype = options[:datatype]
62
- case RDF::URI(datatype)
63
- when XSD.boolean
64
- RDF::Literal::Boolean
65
- when XSD.integer, XSD.long, XSD.int, XSD.short, XSD.byte
66
- RDF::Literal::Integer
67
- when XSD.double, XSD.float
68
- RDF::Literal::Double
69
- when XSD.decimal
70
- RDF::Literal::Decimal
71
- when XSD.date
72
- RDF::Literal::Date
73
- when XSD.dateTime
74
- RDF::Literal::DateTime
75
- when XSD.time
76
- RDF::Literal::Time
77
- when XSD.nonPositiveInteger, XSD.negativeInteger
78
- RDF::Literal::Integer
79
- when XSD.nonNegativeInteger, XSD.positiveInteger
80
- RDF::Literal::Integer
81
- when XSD.unsignedLong, XSD.unsignedInt, XSD.unsignedShort, XSD.unsignedByte
82
- RDF::Literal::Integer
83
- when XSD.token, XSD.language
84
- RDF::Literal::Token
85
- when RDF.XMLLiteral
86
- RDF::Literal::XML
87
- else self
88
- end
103
+ when typed_literal = datatyped_class(RDF::URI(options[:datatype]))
104
+ typed_literal
89
105
  else case value
90
106
  when ::TrueClass then RDF::Literal::Boolean
91
107
  when ::FalseClass then RDF::Literal::Boolean
@@ -123,8 +139,10 @@ module RDF
123
139
  def initialize(value, options = {})
124
140
  @object = value
125
141
  @string = options[:lexical] if options[:lexical]
142
+ @string = value if !defined?(@string) && value.is_a?(String)
126
143
  @language = options[:language].to_s.to_sym if options[:language]
127
144
  @datatype = RDF::URI(options[:datatype]) if options[:datatype]
145
+ @datatype ||= self.class.const_get(:DATATYPE) if self.class.const_defined?(:DATATYPE)
128
146
  end
129
147
 
130
148
  ##
@@ -138,30 +156,7 @@ module RDF
138
156
  ##
139
157
  # @return [Object]
140
158
  def object
141
- @object || case datatype
142
- when XSD.string, nil
143
- value
144
- when XSD.boolean
145
- %w(true 1).include?(value)
146
- when XSD.integer, XSD.long, XSD.int, XSD.short, XSD.byte
147
- value.to_i
148
- when XSD.double, XSD.float
149
- value.to_f
150
- when XSD.decimal
151
- ::BigDecimal.new(value)
152
- when XSD.date
153
- ::Date.parse(value)
154
- when XSD.dateTime
155
- ::DateTime.parse(value)
156
- when XSD.time
157
- ::Time.parse(value)
158
- when XSD.nonPositiveInteger, XSD.negativeInteger
159
- value.to_i
160
- when XSD.nonNegativeInteger, XSD.positiveInteger
161
- value.to_i
162
- when XSD.unsignedLong, XSD.unsignedInt, XSD.unsignedShort, XSD.unsignedByte
163
- value.to_i
164
- end
159
+ defined?(@object) ? @object : value
165
160
  end
166
161
 
167
162
  ##
@@ -189,7 +184,7 @@ module RDF
189
184
  end
190
185
 
191
186
  ##
192
- # Returns `true` if this literal is equal to `other`.
187
+ # Determins if `self` is the same term as `other`.
193
188
  #
194
189
  # @example
195
190
  # RDF::Literal(1).eql?(RDF::Literal(1.0)) #=> false
@@ -199,27 +194,43 @@ module RDF
199
194
  def eql?(other)
200
195
  self.equal?(other) ||
201
196
  (self.class.eql?(other.class) &&
202
- self.datatype.eql?(other.datatype) &&
203
- self == other)
197
+ self.value.eql?(other.value) &&
198
+ self.language.to_s.downcase.eql?(other.language.to_s.downcase) &&
199
+ self.datatype.eql?(other.datatype))
204
200
  end
205
201
 
206
202
  ##
207
- # Returns `true` if this literal is equivalent to `other`.
203
+ # Returns `true` if this literal is equivalent to `other` (with type check).
208
204
  #
209
205
  # @example
210
206
  # RDF::Literal(1) == RDF::Literal(1.0) #=> true
211
207
  #
212
208
  # @param [Object] other
213
209
  # @return [Boolean] `true` or `false`
210
+ #
211
+ # @see http://www.w3.org/TR/rdf-sparql-query/#func-RDFterm-equal
212
+ # @see http://www.w3.org/TR/rdf-concepts/#section-Literal-Equality
214
213
  def ==(other)
215
214
  case other
216
- when Literal
217
- self.value.eql?(other.value) &&
218
- self.language.eql?(other.language) &&
219
- self.datatype.eql?(other.datatype)
220
- when String
221
- self.plain? && self.value.eql?(other)
222
- else false
215
+ when Literal
216
+ case
217
+ when self.eql?(other)
218
+ true
219
+ when self.has_language? && self.language.to_s.downcase == other.language.to_s.downcase
220
+ # Literals with languages can compare if languages are identical
221
+ self.value == other.value
222
+ when (self.simple? || self.datatype == XSD.string) && (other.simple? || other.datatype == XSD.string)
223
+ self.value == other.value
224
+ when other.comperable_datatype?(self) || self.comperable_datatype?(other)
225
+ # Comoparing plain with undefined datatypes does not generate an error, but returns false
226
+ # From data-r2/expr-equal/eq-2-2.
227
+ false
228
+ else
229
+ type_error("unable to determine whether #{self.inspect} and #{other.inspect} are equivalent")
230
+ end
231
+ when String
232
+ self.plain? && self.value.eql?(other)
233
+ else false
223
234
  end
224
235
  end
225
236
  alias_method :===, :==
@@ -277,6 +288,34 @@ module RDF
277
288
  !valid?
278
289
  end
279
290
 
291
+ ##
292
+ # Returns `true` if the literal has a datatype and the comparison should
293
+ # return false instead of raise a type error.
294
+ #
295
+ # This behavior is intuited from SPARQL data-r2/expr-equal/eq-2-2
296
+ # @return [Boolean]
297
+ def comperable_datatype?(other)
298
+ return false unless self.plain? || self.has_language?
299
+
300
+ case other
301
+ when RDF::Literal::Numeric, RDF::Literal::Boolean,
302
+ RDF::Literal::Date, RDF::Literal::Time, RDF::Literal::DateTime
303
+ # Invald types can be compared without raising a TypeError if literal has a language (open-eq-08)
304
+ !other.valid? && self.has_language?
305
+ else
306
+ case other.datatype
307
+ when XSD.string
308
+ true
309
+ when nil
310
+ # A different language will not generate a type error
311
+ other.has_language?
312
+ else
313
+ # An unknown datatype may not be used for comparison, unless it has a language? (open-eq-8)
314
+ self.has_language?
315
+ end
316
+ end
317
+ end
318
+
280
319
  ##
281
320
  # Validates the value using {#valid?}, raising an error if the value is
282
321
  # invalid.