rdfobjects 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +21 -0
- data/README +103 -0
- data/lib/rdf_objects/curies.rb +62 -0
- data/lib/rdf_objects/data_types.rb +57 -0
- data/lib/rdf_objects/http_client.rb +55 -0
- data/lib/rdf_objects/parsers.rb +274 -0
- data/lib/rdf_objects/rdf_resource.rb +216 -0
- data/lib/rdf_objects/serializers.rb +7 -0
- data/lib/rdf_objects.rb +11 -0
- data/lib/xsl/RDFa2RDFXML.xsl +677 -0
- data/lib/xsl/rdf2nt.xsl +308 -0
- data/lib/xsl/rdf2r3x.xsl +219 -0
- metadata +96 -0
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2007 Ross Singer
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
RDFObjects are intended to simplify working with RDF data by providing a (more) Ruby-like interface to resources (thanks to OpenStruct).
|
2
|
+
|
3
|
+
Installation:
|
4
|
+
sudo gem install rsinger-rdfobjects
|
5
|
+
|
6
|
+
Requirements:
|
7
|
+
* Nokogiri (the idea is for more options in the future)
|
8
|
+
* rsinger-curies
|
9
|
+
* Builder (although, ideally, this will be deprecated)
|
10
|
+
* json (or json_pure)
|
11
|
+
|
12
|
+
Usage:
|
13
|
+
>> require 'rdf_objects'
|
14
|
+
|
15
|
+
>> include RDFObject
|
16
|
+
|
17
|
+
>> Curie.add_prefixes! :skos=>"http://www.w3.org/2004/02/skos/core#"
|
18
|
+
|
19
|
+
>> resource = Resource.new('http://id.loc.gov/authorities/sh2002000569#concept')
|
20
|
+
>> resource.describe
|
21
|
+
>> resource.skos
|
22
|
+
|
23
|
+
=> {"inScheme"=>[#<RDFObject::Resource uri="http://id.loc.gov/authorities#topicalTerms">, #<RDFObject::Resource uri="http://id.loc.gov/authorities#conceptScheme">], "broader"=>[#<RDFObject::Resource skos={"prefLabel"=>"Semantic networks (Information theory)"}, uri="http://id.loc.gov/authorities/sh92004914#concept">, #<RDFObject::Resource skos={"prefLabel"=>"World Wide Web"}, uri="http://id.loc.gov/authorities/sh95000541#concept">, #<RDFObject::Resource skos={"prefLabel"=>"Semantic integration (Computer systems)"}, uri="http://id.loc.gov/authorities/sh2004000479#concept">], "closeMatch"=>#<RDFObject::Resource uri="http://stitch.cs.vu.nl/vocabularies/rameau/ark:/12148/cb14521343b">, "prefLabel"=>"Semantic Web"}
|
24
|
+
|
25
|
+
>> resource["[skos:prefLabel]"]
|
26
|
+
|
27
|
+
=> "Semantic Web"
|
28
|
+
|
29
|
+
>> resource.skos["prefLabel"]
|
30
|
+
|
31
|
+
=> "Semantic Web"
|
32
|
+
|
33
|
+
>> resource["http://www.w3.org/2004/02/skos/core#prefLabel"]
|
34
|
+
|
35
|
+
=> "Semantic Web"
|
36
|
+
(etc.)
|
37
|
+
|
38
|
+
>> resource.skos["broader"].first.skos["prefLabel"]
|
39
|
+
|
40
|
+
=> "Semantic networks (Information theory)"
|
41
|
+
|
42
|
+
Unnecessary, but helpful, way to define typed literals
|
43
|
+
>> source = Literal.new("Library of Congress Authorities", {:language=>"en"})
|
44
|
+
|
45
|
+
And assert them
|
46
|
+
|
47
|
+
>> resource.assert("http://purl.org/dc/terms/source", source)
|
48
|
+
|
49
|
+
=> ["Work cat.: 2002070545: The Semantic Web--ISWC 20002, 2002.", "ASTI on FirstSearch, May 6, 2002: in titles (semantic Web)", "Engr. index online, May 6, 2002 (identifier: Semantic Web)", "Library of Congress Authorities"]
|
50
|
+
|
51
|
+
>> resource["http://purl.org/dc/terms/source"].last.language
|
52
|
+
|
53
|
+
=> "en"
|
54
|
+
|
55
|
+
To relate a resource to another URI you can use #.resource - it will accept full uri strings, safe curies or other RDFObject::Resource objects
|
56
|
+
|
57
|
+
>> resource.relate("[skos:closeMatch]", "http://dbpedia.org/resource/Category:Semantic_Web")
|
58
|
+
|
59
|
+
=> [#<RDFObject::Resource uri="http://stitch.cs.vu.nl/vocabularies/rameau/ark:/12148/cb14521343b">, #<RDFObject::Resource uri="http://dbpedia.org/resource/Category:Semantic_Web">]
|
60
|
+
|
61
|
+
RDFObject::Resources sort of act as singletons
|
62
|
+
|
63
|
+
>> r1 = Resource.new('http://ex.org/ex/1234')
|
64
|
+
|
65
|
+
=> #<RDFObject::Resource uri="http://ex.org/ex/1234">
|
66
|
+
|
67
|
+
>> r1.object_id
|
68
|
+
|
69
|
+
=> 8996290
|
70
|
+
|
71
|
+
>> r2 = Resource.new('http://ex.org/ex/1234')
|
72
|
+
|
73
|
+
=> #<RDFObject::Resource uri="http://ex.org/ex/1234">
|
74
|
+
|
75
|
+
>> r2.object_id
|
76
|
+
|
77
|
+
=> 8996290
|
78
|
+
|
79
|
+
So relationships and assertions are always applied to the same object. These are managed in the RDFObject::Resource class:
|
80
|
+
|
81
|
+
>> Resource.instances
|
82
|
+
|
83
|
+
=> {"http://ex.org/ex/1234"=>#<RDFObject::Resource uri="http://ex.org/ex/1234">}
|
84
|
+
|
85
|
+
You can delete a single resource:
|
86
|
+
|
87
|
+
>> Resource.remove(r1)
|
88
|
+
|
89
|
+
>> Resource.instances
|
90
|
+
=> {}
|
91
|
+
|
92
|
+
Or clear the entire hash:
|
93
|
+
|
94
|
+
>> Resource.reset!
|
95
|
+
|
96
|
+
There are also very crude parsers for ntriples and rdf/xml
|
97
|
+
|
98
|
+
>> resources = Parser.parse(open('lcsh.nt').read)
|
99
|
+
|
100
|
+
>> resources.first
|
101
|
+
|
102
|
+
=> #<RDFObject::Resource n0={"altLabel"=>"Lichen ruber planus", "inScheme"=>[#<RDFObject::Resource uri="http://id.loc.gov/authorities#conceptScheme">, #<RDFObject::Resource uri="http://id.loc.gov/authorities#topicalTerms">], "prefLabel"=>"Lichen planus"}, n1={"sameAs"=>#<RDFObject::Resource uri="info:lc/authorities/sh85076767">}, uri="http://id.loc.gov/authorities/sh85076767#concept", n2={"modified"=>#<DateTime: 211644344801/86400,-1/6,2299161>}, rdf={"type"=>#<RDFObject::Resource uri="http://www.w3.org/2004/02/skos/core#Concept">}>
|
103
|
+
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'curies'
|
3
|
+
class Curie
|
4
|
+
@@namespace_counter = 0
|
5
|
+
|
6
|
+
# Returns a Curie object from a fully qualified uri (assuming it is registered)
|
7
|
+
def self.curie_from_uri(uri_string)
|
8
|
+
@@mappings.each do | prefix, uri |
|
9
|
+
if m = uri_string.match(/^#{uri}(.*)/)
|
10
|
+
return self.new(prefix, m[1]) if m[1]
|
11
|
+
end
|
12
|
+
end
|
13
|
+
false
|
14
|
+
end
|
15
|
+
|
16
|
+
# Returns the Curie prefix for a URI
|
17
|
+
def self.prefix_for(uri_string)
|
18
|
+
@@mappings.each do | prefix, uri |
|
19
|
+
if m = uri_string.match(/^#{uri}(.*)/)
|
20
|
+
return prefix
|
21
|
+
end
|
22
|
+
end
|
23
|
+
false
|
24
|
+
end
|
25
|
+
|
26
|
+
# Automatically tries to build a safe curie from a uri string.
|
27
|
+
# Assumes an RDF Schema and a flat hierarchy.
|
28
|
+
def self.create_from_uri(uri_string, prefix=nil)
|
29
|
+
if curie = self.curie_from_uri(uri_string)
|
30
|
+
return curie
|
31
|
+
end
|
32
|
+
uri = URI.parse(uri_string)
|
33
|
+
ns = nil
|
34
|
+
elem = nil
|
35
|
+
if uri.fragment
|
36
|
+
ns, elem = uri.to_s.split('#')
|
37
|
+
ns << '#'
|
38
|
+
else
|
39
|
+
elem = uri.path.split('/').last
|
40
|
+
ns = uri.to_s.sub(/#{elem}$/, '')
|
41
|
+
end
|
42
|
+
unless prefix
|
43
|
+
prefix = "n#{@@namespace_counter}"
|
44
|
+
@@namespace_counter += 1
|
45
|
+
end
|
46
|
+
Curie.add_prefixes! prefix.to_s => ns
|
47
|
+
self.curie_from_uri(uri_string)
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.get_mappings
|
51
|
+
return @@mappings
|
52
|
+
end
|
53
|
+
|
54
|
+
# Return a Curie object from a safe curie string.
|
55
|
+
def self.new_from_curie(curie_string)
|
56
|
+
unless curie_string.could_be_a_safe_curie?
|
57
|
+
raise "not a real curie"
|
58
|
+
end
|
59
|
+
prefix, resource = curie_string.curie_parts
|
60
|
+
return Curie.new(prefix, resource)
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
class Integer
|
2
|
+
attr_accessor :language, :data_type
|
3
|
+
def set_data_type(uri)
|
4
|
+
@data_type = uri
|
5
|
+
end
|
6
|
+
end
|
7
|
+
class Date
|
8
|
+
attr_accessor :language, :data_type
|
9
|
+
def set_data_type(uri)
|
10
|
+
@data_type = uri
|
11
|
+
end
|
12
|
+
end
|
13
|
+
class String
|
14
|
+
attr_accessor :language, :data_type
|
15
|
+
def set_data_type(uri)
|
16
|
+
@data_type = uri
|
17
|
+
end
|
18
|
+
end
|
19
|
+
class TrueClass
|
20
|
+
attr_accessor :language, :data_type
|
21
|
+
def set_data_type(uri)
|
22
|
+
@data_type = uri
|
23
|
+
end
|
24
|
+
end
|
25
|
+
class FalseClass
|
26
|
+
attr_accessor :language, :data_type
|
27
|
+
def set_data_type(uri)
|
28
|
+
@data_type = uri
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
|
34
|
+
class RDFObject::Literal
|
35
|
+
def self.new(value, options={})
|
36
|
+
obj = case options[:data_type]
|
37
|
+
when 'http://www.w3.org/2001/XMLSchema#dateTime' then DateTime.parse(value)
|
38
|
+
when 'http://www.w3.org/2001/XMLSchema#date' then Date.parse(value)
|
39
|
+
when 'http://www.w3.org/2001/XMLSchema#integer' then value.to_i
|
40
|
+
when 'http://www.w3.org/2001/XMLSchema#string' then value.to_s
|
41
|
+
when 'http://www.w3.org/2001/XMLSchema#boolean'
|
42
|
+
if value.downcase == 'true' || value == '1'
|
43
|
+
true
|
44
|
+
else
|
45
|
+
false
|
46
|
+
end
|
47
|
+
else
|
48
|
+
value
|
49
|
+
end
|
50
|
+
if obj.to_s != value
|
51
|
+
raise ArgumentError
|
52
|
+
end
|
53
|
+
obj.set_data_type(options[:data_type])
|
54
|
+
obj.language = options[:language]
|
55
|
+
obj
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'uri'
|
3
|
+
require 'cgi'
|
4
|
+
module RDFObject
|
5
|
+
class HTTPClient
|
6
|
+
@@proxies = {}
|
7
|
+
def self.fetch(uri)
|
8
|
+
@@proxies.each do | key, proxy |
|
9
|
+
if uri.match(key)
|
10
|
+
uri = proxy.proxy_uri(uri, ['ntriples','rdf'])
|
11
|
+
end
|
12
|
+
end
|
13
|
+
u = URI.parse(uri)
|
14
|
+
request = Net::HTTP::Get.new(u.request_uri)
|
15
|
+
request['accept'] = nil
|
16
|
+
request['accept'] = ['application/rdf+xml']
|
17
|
+
response = Net::HTTP.start(u.host, u.port) do | http |
|
18
|
+
http.request(request)
|
19
|
+
end
|
20
|
+
if response.code != "200"
|
21
|
+
raise response.message
|
22
|
+
end
|
23
|
+
response.body
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.register_proxy(uri,proxy)
|
27
|
+
@@proxies[uri] = proxy
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
class TalisPlatformProxy
|
33
|
+
attr_reader :store
|
34
|
+
@@formats = ['rdf','ntriples','turtle','json']
|
35
|
+
def initialize(store_name)
|
36
|
+
@store = store_name
|
37
|
+
end
|
38
|
+
|
39
|
+
def proxy_uri(uri, format=['rdf'])
|
40
|
+
idx = 0
|
41
|
+
best_format = nil
|
42
|
+
while !best_format
|
43
|
+
@@formats.each do | fmt |
|
44
|
+
if format[idx] == fmt
|
45
|
+
best_format = fmt
|
46
|
+
break
|
47
|
+
end
|
48
|
+
end
|
49
|
+
idx += 1
|
50
|
+
end
|
51
|
+
raise "No compatible response format!" if !best_format
|
52
|
+
"http://api.talis.com/stores/#{@store}/meta?about=#{CGI.escape(uri)}&output=#{best_format}"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,274 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'rubygems'
|
3
|
+
require 'strscan'
|
4
|
+
require 'iconv'
|
5
|
+
require 'uri'
|
6
|
+
require 'json'
|
7
|
+
require 'nokogiri'
|
8
|
+
require 'cgi'
|
9
|
+
if RUBY_VERSION < '1.9.0'
|
10
|
+
$KCODE = 'u'
|
11
|
+
require 'jcode'
|
12
|
+
end
|
13
|
+
|
14
|
+
class UTF8Parser < StringScanner
|
15
|
+
STRING = /(([\x0-\x1f]|[\\\/bfnrt]|\\u[0-9a-fA-F]{4}|[\x20-\xff])*)/nx
|
16
|
+
UNPARSED = Object.new
|
17
|
+
UNESCAPE_MAP = Hash.new { |h, k| h[k] = k.chr }
|
18
|
+
UNESCAPE_MAP.update({
|
19
|
+
?" => '"',
|
20
|
+
?\\ => '\\',
|
21
|
+
?/ => '/',
|
22
|
+
?b => "\b",
|
23
|
+
?f => "\f",
|
24
|
+
?n => "\n",
|
25
|
+
?r => "\r",
|
26
|
+
?t => "\t",
|
27
|
+
?u => nil,
|
28
|
+
})
|
29
|
+
UTF16toUTF8 = Iconv.new('utf-8', 'utf-16be')
|
30
|
+
def initialize(str)
|
31
|
+
super(str)
|
32
|
+
@string = str
|
33
|
+
end
|
34
|
+
def parse_string
|
35
|
+
if scan(STRING)
|
36
|
+
return '' if self[1].empty?
|
37
|
+
string = self[1].gsub(%r((?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff]))n) do |c|
|
38
|
+
if u = UNESCAPE_MAP[$&[1]]
|
39
|
+
u
|
40
|
+
else # \uXXXX
|
41
|
+
bytes = ''
|
42
|
+
i = 0
|
43
|
+
while c[6 * i] == ?\\ && c[6 * i + 1] == ?u
|
44
|
+
bytes << c[6 * i + 2, 2].to_i(16) << c[6 * i + 4, 2].to_i(16)
|
45
|
+
i += 1
|
46
|
+
end
|
47
|
+
UTF16toUTF8.iconv(bytes)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
if string.respond_to?(:force_encoding)
|
51
|
+
string.force_encoding(Encoding::UTF_8)
|
52
|
+
end
|
53
|
+
string
|
54
|
+
else
|
55
|
+
UNPARSED
|
56
|
+
end
|
57
|
+
rescue Iconv::Failure => e
|
58
|
+
raise StandardError, "Caught #{e.class}: #{e}"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
module RDFObject
|
62
|
+
class NTriplesParser
|
63
|
+
attr_reader :ntriple, :subject, :predicate, :data_type, :language, :literal
|
64
|
+
attr_accessor :object
|
65
|
+
def initialize(line)
|
66
|
+
@ntriple = line
|
67
|
+
if @ntriple.respond_to?(:force_encoding)
|
68
|
+
@ntriple.force_encoding("ASCII-8BIT")
|
69
|
+
end
|
70
|
+
parse_ntriple
|
71
|
+
end
|
72
|
+
|
73
|
+
def parse_ntriple
|
74
|
+
scanner = StringScanner.new(@ntriple)
|
75
|
+
@subject = scanner.scan_until(/> /)
|
76
|
+
@subject.sub!(/^</,'')
|
77
|
+
@subject.sub!(/> $/,'')
|
78
|
+
@predicate = scanner.scan_until(/> /)
|
79
|
+
@predicate.sub!(/^</,'')
|
80
|
+
@predicate.sub!(/> $/,'')
|
81
|
+
if scanner.match?(/</)
|
82
|
+
object = scanner.scan_until(/>\s?\.\s*\n?$/)
|
83
|
+
object.sub!(/^</,'')
|
84
|
+
object.sub!(/>\s?\.\s*\n?$/,'')
|
85
|
+
@object = Resource.new(object)
|
86
|
+
else
|
87
|
+
@literal = true
|
88
|
+
scanner.getch
|
89
|
+
object = scanner.scan_until(/("\s?\.\s*\n?$)|("@[A-z])|("\^\^)/)
|
90
|
+
scanner.pos=(scanner.pos-2)
|
91
|
+
object.sub!(/"..$/,'')
|
92
|
+
if object.respond_to?(:force_encoding)
|
93
|
+
object.force_encoding('utf-8').chomp!
|
94
|
+
else
|
95
|
+
uscan = UTF8Parser.new(object)
|
96
|
+
object = uscan.parse_string.chomp
|
97
|
+
end
|
98
|
+
if scanner.match?(/@/)
|
99
|
+
scanner.getch
|
100
|
+
@language = scanner.scan_until(/\s?\.\n?$/)
|
101
|
+
@language.sub!(/\s?\.\n?$/,'')
|
102
|
+
elsif scanner.match?(/\^\^/)
|
103
|
+
scanner.skip_until(/</)
|
104
|
+
@data_type = scanner.scan_until(/>/)
|
105
|
+
@data_type.sub!(/>$/,'')
|
106
|
+
end
|
107
|
+
@object = Literal.new(object,{:data_type=>@data_type,:language=>@language})
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def self.parse(resources)
|
112
|
+
collection = []
|
113
|
+
if resources.is_a?(String)
|
114
|
+
assertions = resources.split("\n")
|
115
|
+
elsif resources.is_a?(Array)
|
116
|
+
assertions = resources
|
117
|
+
elsif resources.respond_to?(:read)
|
118
|
+
assertions = resources.readlines
|
119
|
+
end
|
120
|
+
assertions.each do | assertion |
|
121
|
+
next if assertion[0, 1] == "#" # Ignore comments
|
122
|
+
triple = self.new(assertion)
|
123
|
+
resource = Resource.new(triple.subject)
|
124
|
+
resource.assert(triple.predicate, triple.object)
|
125
|
+
collection << resource
|
126
|
+
end
|
127
|
+
collection.uniq!
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
class XMLParser
|
132
|
+
#
|
133
|
+
# A very unsophisticated RDF/XML Parser -- currently only parses RDF/XML that conforms to
|
134
|
+
# the SimpleRdfXml convention: http://esw.w3.org/topic/SimpleRdfXml. This is a pragmatic
|
135
|
+
# rather than dogmatic decision. If it is not working with your RDF/XML let me know and we
|
136
|
+
# can probably fix it.
|
137
|
+
#
|
138
|
+
def self.parse(doc)
|
139
|
+
namespaces = doc.namespaces
|
140
|
+
if namespaces.index("http://purl.org/rss/1.0/")
|
141
|
+
collection = parse_rss10(doc)
|
142
|
+
elsif namespaces.index("http://www.w3.org/2005/sparql-results#")
|
143
|
+
raise "Sorry, SPARQL not yet supported"
|
144
|
+
else
|
145
|
+
collection = parse_rdfxml(doc)
|
146
|
+
end
|
147
|
+
collection.uniq
|
148
|
+
end
|
149
|
+
|
150
|
+
def self.parse_resource_node(resource_node, collection)
|
151
|
+
resource = Resource.new(resource_node.attribute_with_ns('about', "http://www.w3.org/1999/02/22-rdf-syntax-ns#").value)
|
152
|
+
unless (resource_node.name == "Description" and resource_node.namespace.href == "http://www.w3.org/1999/02/22-rdf-syntax-ns#") or
|
153
|
+
(resource_node.name == "item" and resource_node.namespace.href == "http://purl.org/rss/1.0/")
|
154
|
+
resource.assert("[rdf:type]","#{resource_node.namespace.href}#{resource_node.name}")
|
155
|
+
end
|
156
|
+
resource_node.children.each do | child |
|
157
|
+
next if child.text?
|
158
|
+
predicate = "#{child.namespace.href}#{child.name}"
|
159
|
+
if object_uri = child.attribute_with_ns("resource", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
|
160
|
+
obj_resource = Resource.new(object_uri.value)
|
161
|
+
resource.assert(predicate, obj_resource)
|
162
|
+
collection << obj_resource
|
163
|
+
elsif child.content
|
164
|
+
|
165
|
+
opts = {}
|
166
|
+
if lang = child.attribute_with_ns("lang", "http://www.w3.org/XML/1998/namespace")
|
167
|
+
opts[:language] = lang.value
|
168
|
+
end
|
169
|
+
if datatype = child.attribute_with_ns("datatype", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
|
170
|
+
opts[:data_type] = datatype.value
|
171
|
+
end
|
172
|
+
resource.assert(predicate, Literal.new(child.content,opts))
|
173
|
+
end
|
174
|
+
child.xpath("./*[@rdf:about]").each do | grandchild |
|
175
|
+
gc_resource = Resource.new(grandchild.attribute_with_ns('about', "http://www.w3.org/1999/02/22-rdf-syntax-ns#").value)
|
176
|
+
resource.assert(predicate, gc_resource)
|
177
|
+
collection << gc_resource
|
178
|
+
parse_resource_node(grandchild, collection)
|
179
|
+
end
|
180
|
+
end
|
181
|
+
collection << resource
|
182
|
+
end
|
183
|
+
|
184
|
+
def self.parse_rdfxml(doc)
|
185
|
+
collection = []
|
186
|
+
doc.root.xpath("./*[@rdf:about]").each do | resource_node |
|
187
|
+
parse_resource_node(resource_node, collection)
|
188
|
+
end
|
189
|
+
collection
|
190
|
+
end
|
191
|
+
|
192
|
+
def self.parse_rss10(doc)
|
193
|
+
collection = []
|
194
|
+
doc.root.xpath("./rss:item","rss"=>"http://purl.org/rss/1.0/").each do | resource_node |
|
195
|
+
parse_resource_node(resource_node, collection)
|
196
|
+
end
|
197
|
+
collection
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
class RDFAParser
|
202
|
+
def self.parse(doc)
|
203
|
+
xslt = Nokogiri::XSLT(open(File.dirname(__FILE__) + '/../xsl/RDFa2RDFXML.xsl'))
|
204
|
+
rdf_doc = xslt.apply_to(doc)
|
205
|
+
XMLParser.parse(Nokogiri.parse(rdf_doc))
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
class JSONParser
|
210
|
+
def self.parse(json)
|
211
|
+
collection = []
|
212
|
+
json.each_pair do |subject, assertions|
|
213
|
+
resource = Resource.new(subject)
|
214
|
+
collection << resource
|
215
|
+
assertions.each_pair do |predicate, objects|
|
216
|
+
objects.each do | object |
|
217
|
+
if object['type'] == 'literal'
|
218
|
+
opts = {}
|
219
|
+
if object['lang']
|
220
|
+
opts[:language] = object['lang']
|
221
|
+
end
|
222
|
+
if object['datatype']
|
223
|
+
opts[:data_type] = object['datatype']
|
224
|
+
end
|
225
|
+
literal = Literal.new(object['value'],opts)
|
226
|
+
resource.assert(predicate, literal)
|
227
|
+
elsif object['type'] == 'uri'
|
228
|
+
o = Resource.new(object['value'])
|
229
|
+
resource.assert(predicate, o)
|
230
|
+
collection << o
|
231
|
+
elsif object['type'] == 'bnode' # For now, we're going to treat a blank node like a URI resource.
|
232
|
+
o = Resource.new(object['value'])
|
233
|
+
resource.assert(predicate, o)
|
234
|
+
collection << o
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
238
|
+
end
|
239
|
+
collection.uniq
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
class Parser
|
244
|
+
# Choose the best format parser from an admittedly small group of choices.
|
245
|
+
def self.parse(rdf)
|
246
|
+
begin
|
247
|
+
# Check if the format is XML or RDFa
|
248
|
+
doc = Nokogiri::XML.parse(rdf, nil, nil, Nokogiri::XML::ParseOptions::PEDANTIC)
|
249
|
+
raise "Unable to parse XML/HTML document -- no namespace declared" unless doc.root.namespaces
|
250
|
+
if doc.root.namespaces.values.index("http://www.w3.org/1999/xhtml")
|
251
|
+
collection = RDFAParser.parse(doc)
|
252
|
+
else
|
253
|
+
collection = XMLParser.parse(doc)
|
254
|
+
end
|
255
|
+
rescue Nokogiri::XML::SyntaxError
|
256
|
+
begin
|
257
|
+
if rdf.respond_to?(:read)
|
258
|
+
rdf.rewind
|
259
|
+
json = JSON.parse(rdf.read)
|
260
|
+
else
|
261
|
+
json = JSON.parse(rdf)
|
262
|
+
end
|
263
|
+
collection = JSONParser.parse(json)
|
264
|
+
rescue JSON::ParserError
|
265
|
+
if rdf.respond_to?(:read)
|
266
|
+
rdf.rewind
|
267
|
+
end
|
268
|
+
collection = NTriplesParser.parse(rdf)
|
269
|
+
end
|
270
|
+
end
|
271
|
+
collection
|
272
|
+
end
|
273
|
+
end
|
274
|
+
end
|