rdfobjects 0.7.3 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,6 +4,12 @@ class Integer
4
4
  @data_type = uri
5
5
  end
6
6
  end
7
+ class Float
8
+ attr_accessor :language, :data_type
9
+ def set_data_type(uri)
10
+ @data_type = uri
11
+ end
12
+ end
7
13
  class Date
8
14
  attr_accessor :language, :data_type
9
15
  def set_data_type(uri)
@@ -37,6 +43,7 @@ class RDFObject::Literal
37
43
  when 'http://www.w3.org/2001/XMLSchema#dateTime' then DateTime.parse(value)
38
44
  when 'http://www.w3.org/2001/XMLSchema#date' then Date.parse(value)
39
45
  when 'http://www.w3.org/2001/XMLSchema#integer' then value.to_i
46
+ when 'http://www.w3.org/2001/XMLSchema#float' then value.to_f
40
47
  when 'http://www.w3.org/2001/XMLSchema#string' then value.to_s
41
48
  when 'http://www.w3.org/2001/XMLSchema#boolean'
42
49
  if value.downcase == 'true' || value == '1'
@@ -47,9 +54,12 @@ class RDFObject::Literal
47
54
  else
48
55
  value
49
56
  end
50
- if obj.to_s != value
51
- raise ArgumentError
57
+ unless obj.is_a?(Float)
58
+ raise ArgumentError if obj.to_s != value
59
+ else
60
+ raise ArgumentError if obj.to_s.sub(/\.0/,'') != value
52
61
  end
62
+
53
63
  obj.set_data_type(options[:data_type])
54
64
  obj.language = options[:language]
55
65
  obj
@@ -82,7 +82,8 @@ module RDFObject
82
82
  if doc.root.namespaces.values.index("http://www.w3.org/1999/xhtml")
83
83
  parser = RDFAParser.new(doc)
84
84
  else
85
- parser = XMLParser.new(doc)
85
+ doc = nil
86
+ parser = XMLParser.new(rdf)
86
87
  end
87
88
  rescue Nokogiri::XML::SyntaxError
88
89
  begin
@@ -186,87 +187,112 @@ module RDFObject
186
187
  end
187
188
  end
188
189
 
190
+
189
191
  class XMLParser < RDFObject::Parser
190
- #
191
- # A very unsophisticated RDF/XML Parser -- currently only parses RDF/XML that conforms to
192
- # the SimpleRdfXml convention: http://esw.w3.org/topic/SimpleRdfXml. This is a pragmatic
193
- # rather than dogmatic decision. If it is not working with your RDF/XML let me know and we
194
- # can probably fix it.
195
- #
196
-
197
- def parse
198
- if @rdfxml.namespaces.values.index("http://purl.org/rss/1.0/")
199
- fix_rss10
200
- end
201
- if @rdfxml.namespaces.values.index("http://www.w3.org/2005/sparql-results#")
202
- raise "Sorry, SPARQL not yet supported"
203
- else
204
- parse_rdfxml
205
- end
206
- @collection
192
+ def initialize(data=nil)
193
+ super(data)
194
+ @uris = []
195
+ @tags = {}
196
+ @parser = Nokogiri::XML::SAX::Parser.new(self)
207
197
  end
208
198
 
209
199
  def data=(xml)
210
- if xml.is_a?(Nokogiri::XML::Document)
200
+ if xml.is_a?(String)
211
201
  @rdfxml = xml
212
- else
213
- @rdfxml = Nokogiri::XML.parse(xml, nil, nil, Nokogiri::XML::ParseOptions::PEDANTIC)
202
+ elsif xml.respond_to?(:read)
203
+ xml.rewind
204
+ @rdfxml = xml.read
214
205
  end
215
206
  end
216
-
217
- def parse_resource_node(resource_node, collection)
218
- resource = @collection.find_or_create(resource_node.attribute_with_ns('about', "http://www.w3.org/1999/02/22-rdf-syntax-ns#").value)
219
- unless (resource_node.name == "Description" and resource_node.namespace.href == "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
220
- resource.assert("[rdf:type]", @collection.find_or_create("#{resource_node.namespace.href}#{resource_node.name}"))
207
+
208
+ def parse
209
+ @parser.parse(@rdfxml)
210
+ @collection
211
+ end
212
+
213
+ def method_missing(methName, *args)
214
+ sax_methods = [:xmldecl, :start_document, :end_document, :start_element,
215
+ :end_element, :comment, :warning, :error, :cdata_block]
216
+ unless sax_methods.index(methName)
217
+ raise NoMethodError.new("undefined method '#{methName} for #{self}", 'no_meth')
221
218
  end
222
- resource_node.children.each do | child |
223
- next if child.text?
224
- predicate = "#{child.namespace.href}#{child.name}"
225
- if object_uri = child.attribute_with_ns("resource", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
226
- obj_resource = @collection.find_or_create(object_uri.value)
227
- resource.assert(predicate, obj_resource)
228
- elsif all_text?(child)
229
- opts = {}
230
- if lang = child.attribute_with_ns("lang", "http://www.w3.org/XML/1998/namespace")
231
- opts[:language] = lang.value
232
- end
233
- if datatype = child.attribute_with_ns("datatype", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
234
- opts[:data_type] = datatype.value
235
- end
236
- resource.assert(predicate, Literal.new(child.content.strip,opts))
237
- end
238
- child.xpath("./*[@rdf:about]").each do | grandchild |
239
- gc_resource = @collection.find_or_create(grandchild.attribute_with_ns('about', "http://www.w3.org/1999/02/22-rdf-syntax-ns#").value)
240
- resource.assert(predicate, gc_resource)
241
- parse_resource_node(grandchild, collection)
242
- end
219
+ end
220
+
221
+ def attributes_to_hash(attributes)
222
+ hash = {}
223
+ attributes.each do | att |
224
+ hash[att.localname] = att.value
243
225
  end
226
+ hash
244
227
  end
245
-
246
- def all_text?(node)
247
- node.children.each do | child |
248
- return false unless child.text?
228
+
229
+ def add_layer(element_uri, resource_uri)
230
+ if @uris.length > 0 && @current_predicate
231
+ @collection[@uris.last].relate(@current_predicate, @collection.find_or_create(resource_uri))
232
+ @current_predicate = nil
249
233
  end
250
- true
234
+ @uris << resource_uri
235
+ @tags[resource_uri] = element_uri
251
236
  end
252
-
253
- def parse_rdfxml
254
- collection = []
255
- @rdfxml.root.xpath("./*[@rdf:about]").each do | resource_node |
256
- parse_resource_node(resource_node, collection)
237
+
238
+ def remove_layer(element_uri)
239
+ uris = []
240
+ @tags.each do |uri, el|
241
+ uris << uri if el == element_uri
257
242
  end
258
- end
259
-
260
- def fix_rss10
261
- @rdfxml.root.xpath('./rss:channel/rss:items/rdf:Seq/rdf:li', {"rdf"=>"http://www.w3.org/1999/02/22-rdf-syntax-ns#",
262
- "rss"=>"http://purl.org/rss/1.0/"}).each do | li |
263
- if li['resource'] && !li["rdf:resource"]
264
- li["rdf:resource"] = li["resource"]
243
+ uris.each do | uri |
244
+ if @uris.last == uri
245
+ @uris.pop
246
+ @tags.delete(uri)
247
+ break
265
248
  end
266
249
  end
250
+ @current_resource = @collection[@uris.last]
251
+ end
252
+
253
+ def start_element_namespace name, attributes = [], prefix = nil, uri = nil, ns = {}
254
+ attributes = attributes_to_hash(attributes)
255
+ if attributes["about"]
256
+ @current_resource = @collection.find_or_create(attributes['about'])
257
+ add_layer("#{uri}#{name}", @current_resource.uri)
258
+ unless "#{uri}#{name}" == "http://www.w3.org/1999/02/22-rdf-syntax-ns#Description"
259
+ @current_resource.relate("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", @collection.find_or_create("#{uri}#{name}"))
260
+ end
261
+ elsif attributes["resource"]
262
+ resource = @collection.find_or_create(attributes['resource'])
263
+ @current_resource.assert("#{uri}#{name}", resource)
264
+ else
265
+ @current_predicate = "#{uri}#{name}"
266
+ end
267
+ if attributes["datatype"] || attributes["lang"]
268
+ @literal = {}
269
+ @literal[:datatype] = attributes["datatype"] if attributes["datatype"]
270
+ @literal[:language] = attributes["lang"] if attributes["lang"]
271
+ @literal[:value] = ""
272
+ end
273
+ end
274
+
275
+
276
+ def characters text
277
+ if @current_predicate && !text.strip.empty?
278
+ @literal ||={:value=>""}
279
+ @literal[:value] << text.strip
280
+ end
267
281
  end
268
- end
269
282
 
283
+ def end_element_namespace name, prefix = nil, uri = nil
284
+ if @literal
285
+ lit = RDFObject::Literal.new(@literal[:value], {:data_type=>@literal[:datatype], :language=>@literal[:language]})
286
+ #puts "#{@current_resource.inspect} :: #{@current_predicate} == #{lit}"
287
+ @current_resource.assert(@current_predicate, lit) if @current_predicate
288
+ @literal = nil
289
+ @current_predicate = nil
290
+ else
291
+ remove_layer("#{uri}#{name}")
292
+ end
293
+ end
294
+ end
295
+
270
296
  class RDFAParser < XMLParser
271
297
  def data=(xhtml)
272
298
  if xhtml.is_a?(Nokogiri::XML::Document)
@@ -275,8 +301,7 @@ module RDFObject
275
301
  doc = Nokogiri::HTML.parse(xhtml)
276
302
  end
277
303
  xslt = Nokogiri::XSLT(open(File.dirname(__FILE__) + '/../xsl/RDFa2RDFXML.xsl'))
278
- rdfxml = xslt.apply_to(doc)
279
- @rdfxml = Nokogiri::XML.parse(rdfxml, nil, nil, Nokogiri::XML::ParseOptions::PEDANTIC)
304
+ @rdfxml = xslt.apply_to(doc)
280
305
  end
281
306
  end
282
307
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rdfobjects
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.3
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ross Singer
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-01-06 00:00:00 -05:00
12
+ date: 2010-01-08 00:00:00 -05:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency