rdfobjects 0.7.3 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,12 @@ class Integer
4
4
  @data_type = uri
5
5
  end
6
6
  end
7
+ class Float
8
+ attr_accessor :language, :data_type
9
+ def set_data_type(uri)
10
+ @data_type = uri
11
+ end
12
+ end
7
13
  class Date
8
14
  attr_accessor :language, :data_type
9
15
  def set_data_type(uri)
@@ -37,6 +43,7 @@ class RDFObject::Literal
37
43
  when 'http://www.w3.org/2001/XMLSchema#dateTime' then DateTime.parse(value)
38
44
  when 'http://www.w3.org/2001/XMLSchema#date' then Date.parse(value)
39
45
  when 'http://www.w3.org/2001/XMLSchema#integer' then value.to_i
46
+ when 'http://www.w3.org/2001/XMLSchema#float' then value.to_f
40
47
  when 'http://www.w3.org/2001/XMLSchema#string' then value.to_s
41
48
  when 'http://www.w3.org/2001/XMLSchema#boolean'
42
49
  if value.downcase == 'true' || value == '1'
@@ -47,9 +54,12 @@ class RDFObject::Literal
47
54
  else
48
55
  value
49
56
  end
50
- if obj.to_s != value
51
- raise ArgumentError
57
+ unless obj.is_a?(Float)
58
+ raise ArgumentError if obj.to_s != value
59
+ else
60
+ raise ArgumentError if obj.to_s.sub(/\.0/,'') != value
52
61
  end
62
+
53
63
  obj.set_data_type(options[:data_type])
54
64
  obj.language = options[:language]
55
65
  obj
@@ -82,7 +82,8 @@ module RDFObject
82
82
  if doc.root.namespaces.values.index("http://www.w3.org/1999/xhtml")
83
83
  parser = RDFAParser.new(doc)
84
84
  else
85
- parser = XMLParser.new(doc)
85
+ doc = nil
86
+ parser = XMLParser.new(rdf)
86
87
  end
87
88
  rescue Nokogiri::XML::SyntaxError
88
89
  begin
@@ -186,87 +187,112 @@ module RDFObject
186
187
  end
187
188
  end
188
189
 
190
+
189
191
  class XMLParser < RDFObject::Parser
190
- #
191
- # A very unsophisticated RDF/XML Parser -- currently only parses RDF/XML that conforms to
192
- # the SimpleRdfXml convention: http://esw.w3.org/topic/SimpleRdfXml. This is a pragmatic
193
- # rather than dogmatic decision. If it is not working with your RDF/XML let me know and we
194
- # can probably fix it.
195
- #
196
-
197
- def parse
198
- if @rdfxml.namespaces.values.index("http://purl.org/rss/1.0/")
199
- fix_rss10
200
- end
201
- if @rdfxml.namespaces.values.index("http://www.w3.org/2005/sparql-results#")
202
- raise "Sorry, SPARQL not yet supported"
203
- else
204
- parse_rdfxml
205
- end
206
- @collection
192
+ def initialize(data=nil)
193
+ super(data)
194
+ @uris = []
195
+ @tags = {}
196
+ @parser = Nokogiri::XML::SAX::Parser.new(self)
207
197
  end
208
198
 
209
199
  def data=(xml)
210
- if xml.is_a?(Nokogiri::XML::Document)
200
+ if xml.is_a?(String)
211
201
  @rdfxml = xml
212
- else
213
- @rdfxml = Nokogiri::XML.parse(xml, nil, nil, Nokogiri::XML::ParseOptions::PEDANTIC)
202
+ elsif xml.respond_to?(:read)
203
+ xml.rewind
204
+ @rdfxml = xml.read
214
205
  end
215
206
  end
216
-
217
- def parse_resource_node(resource_node, collection)
218
- resource = @collection.find_or_create(resource_node.attribute_with_ns('about', "http://www.w3.org/1999/02/22-rdf-syntax-ns#").value)
219
- unless (resource_node.name == "Description" and resource_node.namespace.href == "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
220
- resource.assert("[rdf:type]", @collection.find_or_create("#{resource_node.namespace.href}#{resource_node.name}"))
207
+
208
+ def parse
209
+ @parser.parse(@rdfxml)
210
+ @collection
211
+ end
212
+
213
+ def method_missing(methName, *args)
214
+ sax_methods = [:xmldecl, :start_document, :end_document, :start_element,
215
+ :end_element, :comment, :warning, :error, :cdata_block]
216
+ unless sax_methods.index(methName)
217
+ raise NoMethodError.new("undefined method '#{methName} for #{self}", 'no_meth')
221
218
  end
222
- resource_node.children.each do | child |
223
- next if child.text?
224
- predicate = "#{child.namespace.href}#{child.name}"
225
- if object_uri = child.attribute_with_ns("resource", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
226
- obj_resource = @collection.find_or_create(object_uri.value)
227
- resource.assert(predicate, obj_resource)
228
- elsif all_text?(child)
229
- opts = {}
230
- if lang = child.attribute_with_ns("lang", "http://www.w3.org/XML/1998/namespace")
231
- opts[:language] = lang.value
232
- end
233
- if datatype = child.attribute_with_ns("datatype", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
234
- opts[:data_type] = datatype.value
235
- end
236
- resource.assert(predicate, Literal.new(child.content.strip,opts))
237
- end
238
- child.xpath("./*[@rdf:about]").each do | grandchild |
239
- gc_resource = @collection.find_or_create(grandchild.attribute_with_ns('about', "http://www.w3.org/1999/02/22-rdf-syntax-ns#").value)
240
- resource.assert(predicate, gc_resource)
241
- parse_resource_node(grandchild, collection)
242
- end
219
+ end
220
+
221
+ def attributes_to_hash(attributes)
222
+ hash = {}
223
+ attributes.each do | att |
224
+ hash[att.localname] = att.value
243
225
  end
226
+ hash
244
227
  end
245
-
246
- def all_text?(node)
247
- node.children.each do | child |
248
- return false unless child.text?
228
+
229
+ def add_layer(element_uri, resource_uri)
230
+ if @uris.length > 0 && @current_predicate
231
+ @collection[@uris.last].relate(@current_predicate, @collection.find_or_create(resource_uri))
232
+ @current_predicate = nil
249
233
  end
250
- true
234
+ @uris << resource_uri
235
+ @tags[resource_uri] = element_uri
251
236
  end
252
-
253
- def parse_rdfxml
254
- collection = []
255
- @rdfxml.root.xpath("./*[@rdf:about]").each do | resource_node |
256
- parse_resource_node(resource_node, collection)
237
+
238
+ def remove_layer(element_uri)
239
+ uris = []
240
+ @tags.each do |uri, el|
241
+ uris << uri if el == element_uri
257
242
  end
258
- end
259
-
260
- def fix_rss10
261
- @rdfxml.root.xpath('./rss:channel/rss:items/rdf:Seq/rdf:li', {"rdf"=>"http://www.w3.org/1999/02/22-rdf-syntax-ns#",
262
- "rss"=>"http://purl.org/rss/1.0/"}).each do | li |
263
- if li['resource'] && !li["rdf:resource"]
264
- li["rdf:resource"] = li["resource"]
243
+ uris.each do | uri |
244
+ if @uris.last == uri
245
+ @uris.pop
246
+ @tags.delete(uri)
247
+ break
265
248
  end
266
249
  end
250
+ @current_resource = @collection[@uris.last]
251
+ end
252
+
253
+ def start_element_namespace name, attributes = [], prefix = nil, uri = nil, ns = {}
254
+ attributes = attributes_to_hash(attributes)
255
+ if attributes["about"]
256
+ @current_resource = @collection.find_or_create(attributes['about'])
257
+ add_layer("#{uri}#{name}", @current_resource.uri)
258
+ unless "#{uri}#{name}" == "http://www.w3.org/1999/02/22-rdf-syntax-ns#Description"
259
+ @current_resource.relate("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", @collection.find_or_create("#{uri}#{name}"))
260
+ end
261
+ elsif attributes["resource"]
262
+ resource = @collection.find_or_create(attributes['resource'])
263
+ @current_resource.assert("#{uri}#{name}", resource)
264
+ else
265
+ @current_predicate = "#{uri}#{name}"
266
+ end
267
+ if attributes["datatype"] || attributes["lang"]
268
+ @literal = {}
269
+ @literal[:datatype] = attributes["datatype"] if attributes["datatype"]
270
+ @literal[:language] = attributes["lang"] if attributes["lang"]
271
+ @literal[:value] = ""
272
+ end
273
+ end
274
+
275
+
276
+ def characters text
277
+ if @current_predicate && !text.strip.empty?
278
+ @literal ||={:value=>""}
279
+ @literal[:value] << text.strip
280
+ end
267
281
  end
268
- end
269
282
 
283
+ def end_element_namespace name, prefix = nil, uri = nil
284
+ if @literal
285
+ lit = RDFObject::Literal.new(@literal[:value], {:data_type=>@literal[:datatype], :language=>@literal[:language]})
286
+ #puts "#{@current_resource.inspect} :: #{@current_predicate} == #{lit}"
287
+ @current_resource.assert(@current_predicate, lit) if @current_predicate
288
+ @literal = nil
289
+ @current_predicate = nil
290
+ else
291
+ remove_layer("#{uri}#{name}")
292
+ end
293
+ end
294
+ end
295
+
270
296
  class RDFAParser < XMLParser
271
297
  def data=(xhtml)
272
298
  if xhtml.is_a?(Nokogiri::XML::Document)
@@ -275,8 +301,7 @@ module RDFObject
275
301
  doc = Nokogiri::HTML.parse(xhtml)
276
302
  end
277
303
  xslt = Nokogiri::XSLT(open(File.dirname(__FILE__) + '/../xsl/RDFa2RDFXML.xsl'))
278
- rdfxml = xslt.apply_to(doc)
279
- @rdfxml = Nokogiri::XML.parse(rdfxml, nil, nil, Nokogiri::XML::ParseOptions::PEDANTIC)
304
+ @rdfxml = xslt.apply_to(doc)
280
305
  end
281
306
  end
282
307
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rdfobjects
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.3
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ross Singer
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-01-06 00:00:00 -05:00
12
+ date: 2010-01-08 00:00:00 -05:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency