rdfobjects 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2007 Ross Singer
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,103 @@
1
+ RDFObjects are intended to simplify working with RDF data by providing a (more) Ruby-like interface to resources (thanks to OpenStruct).
2
+
3
+ Installation:
4
+ sudo gem install rsinger-rdfobjects
5
+
6
+ Requirements:
7
+ * Nokogiri (the idea is for more options in the future)
8
+ * rsinger-curies
9
+ * Builder (although, ideally, this will be deprecated)
10
+ * json (or json_pure)
11
+
12
+ Usage:
13
+ >> require 'rdf_objects'
14
+
15
+ >> include RDFObject
16
+
17
+ >> Curie.add_prefixes! :skos=>"http://www.w3.org/2004/02/skos/core#"
18
+
19
+ >> resource = Resource.new('http://id.loc.gov/authorities/sh2002000569#concept')
20
+ >> resource.describe
21
+ >> resource.skos
22
+
23
+ => {"inScheme"=>[#<RDFObject::Resource uri="http://id.loc.gov/authorities#topicalTerms">, #<RDFObject::Resource uri="http://id.loc.gov/authorities#conceptScheme">], "broader"=>[#<RDFObject::Resource skos={"prefLabel"=>"Semantic networks (Information theory)"}, uri="http://id.loc.gov/authorities/sh92004914#concept">, #<RDFObject::Resource skos={"prefLabel"=>"World Wide Web"}, uri="http://id.loc.gov/authorities/sh95000541#concept">, #<RDFObject::Resource skos={"prefLabel"=>"Semantic integration (Computer systems)"}, uri="http://id.loc.gov/authorities/sh2004000479#concept">], "closeMatch"=>#<RDFObject::Resource uri="http://stitch.cs.vu.nl/vocabularies/rameau/ark:/12148/cb14521343b">, "prefLabel"=>"Semantic Web"}
24
+
25
+ >> resource["[skos:prefLabel]"]
26
+
27
+ => "Semantic Web"
28
+
29
+ >> resource.skos["prefLabel"]
30
+
31
+ => "Semantic Web"
32
+
33
+ >> resource["http://www.w3.org/2004/02/skos/core#prefLabel"]
34
+
35
+ => "Semantic Web"
36
+ (etc.)
37
+
38
+ >> resource.skos["broader"].first.skos["prefLabel"]
39
+
40
+ => "Semantic networks (Information theory)"
41
+
42
+ Unnecessary, but helpful, way to define typed literals
43
+ >> source = Literal.new("Library of Congress Authorities", {:language=>"en"})
44
+
45
+ And assert them
46
+
47
+ >> resource.assert("http://purl.org/dc/terms/source", source)
48
+
49
+ => ["Work cat.: 2002070545: The Semantic Web--ISWC 20002, 2002.", "ASTI on FirstSearch, May 6, 2002: in titles (semantic Web)", "Engr. index online, May 6, 2002 (identifier: Semantic Web)", "Library of Congress Authorities"]
50
+
51
+ >> resource["http://purl.org/dc/terms/source"].last.language
52
+
53
+ => "en"
54
+
55
+ To relate a resource to another URI you can use #.resource - it will accept full uri strings, safe curies or other RDFObject::Resource objects
56
+
57
+ >> resource.relate("[skos:closeMatch]", "http://dbpedia.org/resource/Category:Semantic_Web")
58
+
59
+ => [#<RDFObject::Resource uri="http://stitch.cs.vu.nl/vocabularies/rameau/ark:/12148/cb14521343b">, #<RDFObject::Resource uri="http://dbpedia.org/resource/Category:Semantic_Web">]
60
+
61
+ RDFObject::Resources sort of act as singletons
62
+
63
+ >> r1 = Resource.new('http://ex.org/ex/1234')
64
+
65
+ => #<RDFObject::Resource uri="http://ex.org/ex/1234">
66
+
67
+ >> r1.object_id
68
+
69
+ => 8996290
70
+
71
+ >> r2 = Resource.new('http://ex.org/ex/1234')
72
+
73
+ => #<RDFObject::Resource uri="http://ex.org/ex/1234">
74
+
75
+ >> r2.object_id
76
+
77
+ => 8996290
78
+
79
+ So relationships and assertions are always applied to the same object. These are managed in the RDFObject::Resource class:
80
+
81
+ >> Resource.instances
82
+
83
+ => {"http://ex.org/ex/1234"=>#<RDFObject::Resource uri="http://ex.org/ex/1234">}
84
+
85
+ You can delete a single resource:
86
+
87
+ >> Resource.remove(r1)
88
+
89
+ >> Resource.instances
90
+ => {}
91
+
92
+ Or clear the entire hash:
93
+
94
+ >> Resource.reset!
95
+
96
+ There are also very crude parsers for ntriples and rdf/xml
97
+
98
+ >> resources = Parser.parse(open('lcsh.nt').read)
99
+
100
+ >> resources.first
101
+
102
+ => #<RDFObject::Resource n0={"altLabel"=>"Lichen ruber planus", "inScheme"=>[#<RDFObject::Resource uri="http://id.loc.gov/authorities#conceptScheme">, #<RDFObject::Resource uri="http://id.loc.gov/authorities#topicalTerms">], "prefLabel"=>"Lichen planus"}, n1={"sameAs"=>#<RDFObject::Resource uri="info:lc/authorities/sh85076767">}, uri="http://id.loc.gov/authorities/sh85076767#concept", n2={"modified"=>#<DateTime: 211644344801/86400,-1/6,2299161>}, rdf={"type"=>#<RDFObject::Resource uri="http://www.w3.org/2004/02/skos/core#Concept">}>
103
+
@@ -0,0 +1,62 @@
1
+ require 'rubygems'
2
+ require 'curies'
3
+ class Curie
4
+ @@namespace_counter = 0
5
+
6
+ # Returns a Curie object from a fully qualified uri (assuming it is registered)
7
+ def self.curie_from_uri(uri_string)
8
+ @@mappings.each do | prefix, uri |
9
+ if m = uri_string.match(/^#{uri}(.*)/)
10
+ return self.new(prefix, m[1]) if m[1]
11
+ end
12
+ end
13
+ false
14
+ end
15
+
16
+ # Returns the Curie prefix for a URI
17
+ def self.prefix_for(uri_string)
18
+ @@mappings.each do | prefix, uri |
19
+ if m = uri_string.match(/^#{uri}(.*)/)
20
+ return prefix
21
+ end
22
+ end
23
+ false
24
+ end
25
+
26
+ # Automatically tries to build a safe curie from a uri string.
27
+ # Assumes an RDF Schema and a flat hierarchy.
28
+ def self.create_from_uri(uri_string, prefix=nil)
29
+ if curie = self.curie_from_uri(uri_string)
30
+ return curie
31
+ end
32
+ uri = URI.parse(uri_string)
33
+ ns = nil
34
+ elem = nil
35
+ if uri.fragment
36
+ ns, elem = uri.to_s.split('#')
37
+ ns << '#'
38
+ else
39
+ elem = uri.path.split('/').last
40
+ ns = uri.to_s.sub(/#{elem}$/, '')
41
+ end
42
+ unless prefix
43
+ prefix = "n#{@@namespace_counter}"
44
+ @@namespace_counter += 1
45
+ end
46
+ Curie.add_prefixes! prefix.to_s => ns
47
+ self.curie_from_uri(uri_string)
48
+ end
49
+
50
+ def self.get_mappings
51
+ return @@mappings
52
+ end
53
+
54
+ # Return a Curie object from a safe curie string.
55
+ def self.new_from_curie(curie_string)
56
+ unless curie_string.could_be_a_safe_curie?
57
+ raise "not a real curie"
58
+ end
59
+ prefix, resource = curie_string.curie_parts
60
+ return Curie.new(prefix, resource)
61
+ end
62
+ end
@@ -0,0 +1,57 @@
1
+ class Integer
2
+ attr_accessor :language, :data_type
3
+ def set_data_type(uri)
4
+ @data_type = uri
5
+ end
6
+ end
7
+ class Date
8
+ attr_accessor :language, :data_type
9
+ def set_data_type(uri)
10
+ @data_type = uri
11
+ end
12
+ end
13
+ class String
14
+ attr_accessor :language, :data_type
15
+ def set_data_type(uri)
16
+ @data_type = uri
17
+ end
18
+ end
19
+ class TrueClass
20
+ attr_accessor :language, :data_type
21
+ def set_data_type(uri)
22
+ @data_type = uri
23
+ end
24
+ end
25
+ class FalseClass
26
+ attr_accessor :language, :data_type
27
+ def set_data_type(uri)
28
+ @data_type = uri
29
+ end
30
+ end
31
+
32
+
33
+
34
+ class RDFObject::Literal
35
+ def self.new(value, options={})
36
+ obj = case options[:data_type]
37
+ when 'http://www.w3.org/2001/XMLSchema#dateTime' then DateTime.parse(value)
38
+ when 'http://www.w3.org/2001/XMLSchema#date' then Date.parse(value)
39
+ when 'http://www.w3.org/2001/XMLSchema#integer' then value.to_i
40
+ when 'http://www.w3.org/2001/XMLSchema#string' then value.to_s
41
+ when 'http://www.w3.org/2001/XMLSchema#boolean'
42
+ if value.downcase == 'true' || value == '1'
43
+ true
44
+ else
45
+ false
46
+ end
47
+ else
48
+ value
49
+ end
50
+ if obj.to_s != value
51
+ raise ArgumentError
52
+ end
53
+ obj.set_data_type(options[:data_type])
54
+ obj.language = options[:language]
55
+ obj
56
+ end
57
+ end
@@ -0,0 +1,55 @@
1
+ require 'net/http'
2
+ require 'uri'
3
+ require 'cgi'
4
+ module RDFObject
5
+ class HTTPClient
6
+ @@proxies = {}
7
+ def self.fetch(uri)
8
+ @@proxies.each do | key, proxy |
9
+ if uri.match(key)
10
+ uri = proxy.proxy_uri(uri, ['ntriples','rdf'])
11
+ end
12
+ end
13
+ u = URI.parse(uri)
14
+ request = Net::HTTP::Get.new(u.request_uri)
15
+ request['accept'] = nil
16
+ request['accept'] = ['application/rdf+xml']
17
+ response = Net::HTTP.start(u.host, u.port) do | http |
18
+ http.request(request)
19
+ end
20
+ if response.code != "200"
21
+ raise response.message
22
+ end
23
+ response.body
24
+ end
25
+
26
+ def self.register_proxy(uri,proxy)
27
+ @@proxies[uri] = proxy
28
+ end
29
+ end
30
+
31
+
32
+ class TalisPlatformProxy
33
+ attr_reader :store
34
+ @@formats = ['rdf','ntriples','turtle','json']
35
+ def initialize(store_name)
36
+ @store = store_name
37
+ end
38
+
39
+ def proxy_uri(uri, format=['rdf'])
40
+ idx = 0
41
+ best_format = nil
42
+ while !best_format
43
+ @@formats.each do | fmt |
44
+ if format[idx] == fmt
45
+ best_format = fmt
46
+ break
47
+ end
48
+ end
49
+ idx += 1
50
+ end
51
+ raise "No compatible response format!" if !best_format
52
+ "http://api.talis.com/stores/#{@store}/meta?about=#{CGI.escape(uri)}&output=#{best_format}"
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,274 @@
1
+ # encoding: utf-8
2
+ require 'rubygems'
3
+ require 'strscan'
4
+ require 'iconv'
5
+ require 'uri'
6
+ require 'json'
7
+ require 'nokogiri'
8
+ require 'cgi'
9
+ if RUBY_VERSION < '1.9.0'
10
+ $KCODE = 'u'
11
+ require 'jcode'
12
+ end
13
+
14
+ class UTF8Parser < StringScanner
15
+ STRING = /(([\x0-\x1f]|[\\\/bfnrt]|\\u[0-9a-fA-F]{4}|[\x20-\xff])*)/nx
16
+ UNPARSED = Object.new
17
+ UNESCAPE_MAP = Hash.new { |h, k| h[k] = k.chr }
18
+ UNESCAPE_MAP.update({
19
+ ?" => '"',
20
+ ?\\ => '\\',
21
+ ?/ => '/',
22
+ ?b => "\b",
23
+ ?f => "\f",
24
+ ?n => "\n",
25
+ ?r => "\r",
26
+ ?t => "\t",
27
+ ?u => nil,
28
+ })
29
+ UTF16toUTF8 = Iconv.new('utf-8', 'utf-16be')
30
+ def initialize(str)
31
+ super(str)
32
+ @string = str
33
+ end
34
+ def parse_string
35
+ if scan(STRING)
36
+ return '' if self[1].empty?
37
+ string = self[1].gsub(%r((?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff]))n) do |c|
38
+ if u = UNESCAPE_MAP[$&[1]]
39
+ u
40
+ else # \uXXXX
41
+ bytes = ''
42
+ i = 0
43
+ while c[6 * i] == ?\\ && c[6 * i + 1] == ?u
44
+ bytes << c[6 * i + 2, 2].to_i(16) << c[6 * i + 4, 2].to_i(16)
45
+ i += 1
46
+ end
47
+ UTF16toUTF8.iconv(bytes)
48
+ end
49
+ end
50
+ if string.respond_to?(:force_encoding)
51
+ string.force_encoding(Encoding::UTF_8)
52
+ end
53
+ string
54
+ else
55
+ UNPARSED
56
+ end
57
+ rescue Iconv::Failure => e
58
+ raise StandardError, "Caught #{e.class}: #{e}"
59
+ end
60
+ end
61
+ module RDFObject
62
+ class NTriplesParser
63
+ attr_reader :ntriple, :subject, :predicate, :data_type, :language, :literal
64
+ attr_accessor :object
65
+ def initialize(line)
66
+ @ntriple = line
67
+ if @ntriple.respond_to?(:force_encoding)
68
+ @ntriple.force_encoding("ASCII-8BIT")
69
+ end
70
+ parse_ntriple
71
+ end
72
+
73
+ def parse_ntriple
74
+ scanner = StringScanner.new(@ntriple)
75
+ @subject = scanner.scan_until(/> /)
76
+ @subject.sub!(/^</,'')
77
+ @subject.sub!(/> $/,'')
78
+ @predicate = scanner.scan_until(/> /)
79
+ @predicate.sub!(/^</,'')
80
+ @predicate.sub!(/> $/,'')
81
+ if scanner.match?(/</)
82
+ object = scanner.scan_until(/>\s?\.\s*\n?$/)
83
+ object.sub!(/^</,'')
84
+ object.sub!(/>\s?\.\s*\n?$/,'')
85
+ @object = Resource.new(object)
86
+ else
87
+ @literal = true
88
+ scanner.getch
89
+ object = scanner.scan_until(/("\s?\.\s*\n?$)|("@[A-z])|("\^\^)/)
90
+ scanner.pos=(scanner.pos-2)
91
+ object.sub!(/"..$/,'')
92
+ if object.respond_to?(:force_encoding)
93
+ object.force_encoding('utf-8').chomp!
94
+ else
95
+ uscan = UTF8Parser.new(object)
96
+ object = uscan.parse_string.chomp
97
+ end
98
+ if scanner.match?(/@/)
99
+ scanner.getch
100
+ @language = scanner.scan_until(/\s?\.\n?$/)
101
+ @language.sub!(/\s?\.\n?$/,'')
102
+ elsif scanner.match?(/\^\^/)
103
+ scanner.skip_until(/</)
104
+ @data_type = scanner.scan_until(/>/)
105
+ @data_type.sub!(/>$/,'')
106
+ end
107
+ @object = Literal.new(object,{:data_type=>@data_type,:language=>@language})
108
+ end
109
+ end
110
+
111
+ def self.parse(resources)
112
+ collection = []
113
+ if resources.is_a?(String)
114
+ assertions = resources.split("\n")
115
+ elsif resources.is_a?(Array)
116
+ assertions = resources
117
+ elsif resources.respond_to?(:read)
118
+ assertions = resources.readlines
119
+ end
120
+ assertions.each do | assertion |
121
+ next if assertion[0, 1] == "#" # Ignore comments
122
+ triple = self.new(assertion)
123
+ resource = Resource.new(triple.subject)
124
+ resource.assert(triple.predicate, triple.object)
125
+ collection << resource
126
+ end
127
+ collection.uniq!
128
+ end
129
+ end
130
+
131
+ class XMLParser
132
+ #
133
+ # A very unsophisticated RDF/XML Parser -- currently only parses RDF/XML that conforms to
134
+ # the SimpleRdfXml convention: http://esw.w3.org/topic/SimpleRdfXml. This is a pragmatic
135
+ # rather than dogmatic decision. If it is not working with your RDF/XML let me know and we
136
+ # can probably fix it.
137
+ #
138
+ def self.parse(doc)
139
+ namespaces = doc.namespaces
140
+ if namespaces.index("http://purl.org/rss/1.0/")
141
+ collection = parse_rss10(doc)
142
+ elsif namespaces.index("http://www.w3.org/2005/sparql-results#")
143
+ raise "Sorry, SPARQL not yet supported"
144
+ else
145
+ collection = parse_rdfxml(doc)
146
+ end
147
+ collection.uniq
148
+ end
149
+
150
+ def self.parse_resource_node(resource_node, collection)
151
+ resource = Resource.new(resource_node.attribute_with_ns('about', "http://www.w3.org/1999/02/22-rdf-syntax-ns#").value)
152
+ unless (resource_node.name == "Description" and resource_node.namespace.href == "http://www.w3.org/1999/02/22-rdf-syntax-ns#") or
153
+ (resource_node.name == "item" and resource_node.namespace.href == "http://purl.org/rss/1.0/")
154
+ resource.assert("[rdf:type]","#{resource_node.namespace.href}#{resource_node.name}")
155
+ end
156
+ resource_node.children.each do | child |
157
+ next if child.text?
158
+ predicate = "#{child.namespace.href}#{child.name}"
159
+ if object_uri = child.attribute_with_ns("resource", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
160
+ obj_resource = Resource.new(object_uri.value)
161
+ resource.assert(predicate, obj_resource)
162
+ collection << obj_resource
163
+ elsif child.content
164
+
165
+ opts = {}
166
+ if lang = child.attribute_with_ns("lang", "http://www.w3.org/XML/1998/namespace")
167
+ opts[:language] = lang.value
168
+ end
169
+ if datatype = child.attribute_with_ns("datatype", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
170
+ opts[:data_type] = datatype.value
171
+ end
172
+ resource.assert(predicate, Literal.new(child.content,opts))
173
+ end
174
+ child.xpath("./*[@rdf:about]").each do | grandchild |
175
+ gc_resource = Resource.new(grandchild.attribute_with_ns('about', "http://www.w3.org/1999/02/22-rdf-syntax-ns#").value)
176
+ resource.assert(predicate, gc_resource)
177
+ collection << gc_resource
178
+ parse_resource_node(grandchild, collection)
179
+ end
180
+ end
181
+ collection << resource
182
+ end
183
+
184
+ def self.parse_rdfxml(doc)
185
+ collection = []
186
+ doc.root.xpath("./*[@rdf:about]").each do | resource_node |
187
+ parse_resource_node(resource_node, collection)
188
+ end
189
+ collection
190
+ end
191
+
192
+ def self.parse_rss10(doc)
193
+ collection = []
194
+ doc.root.xpath("./rss:item","rss"=>"http://purl.org/rss/1.0/").each do | resource_node |
195
+ parse_resource_node(resource_node, collection)
196
+ end
197
+ collection
198
+ end
199
+ end
200
+
201
+ class RDFAParser
202
+ def self.parse(doc)
203
+ xslt = Nokogiri::XSLT(open(File.dirname(__FILE__) + '/../xsl/RDFa2RDFXML.xsl'))
204
+ rdf_doc = xslt.apply_to(doc)
205
+ XMLParser.parse(Nokogiri.parse(rdf_doc))
206
+ end
207
+ end
208
+
209
+ class JSONParser
210
+ def self.parse(json)
211
+ collection = []
212
+ json.each_pair do |subject, assertions|
213
+ resource = Resource.new(subject)
214
+ collection << resource
215
+ assertions.each_pair do |predicate, objects|
216
+ objects.each do | object |
217
+ if object['type'] == 'literal'
218
+ opts = {}
219
+ if object['lang']
220
+ opts[:language] = object['lang']
221
+ end
222
+ if object['datatype']
223
+ opts[:data_type] = object['datatype']
224
+ end
225
+ literal = Literal.new(object['value'],opts)
226
+ resource.assert(predicate, literal)
227
+ elsif object['type'] == 'uri'
228
+ o = Resource.new(object['value'])
229
+ resource.assert(predicate, o)
230
+ collection << o
231
+ elsif object['type'] == 'bnode' # For now, we're going to treat a blank node like a URI resource.
232
+ o = Resource.new(object['value'])
233
+ resource.assert(predicate, o)
234
+ collection << o
235
+ end
236
+ end
237
+ end
238
+ end
239
+ collection.uniq
240
+ end
241
+ end
242
+
243
+ class Parser
244
+ # Choose the best format parser from an admittedly small group of choices.
245
+ def self.parse(rdf)
246
+ begin
247
+ # Check if the format is XML or RDFa
248
+ doc = Nokogiri::XML.parse(rdf, nil, nil, Nokogiri::XML::ParseOptions::PEDANTIC)
249
+ raise "Unable to parse XML/HTML document -- no namespace declared" unless doc.root.namespaces
250
+ if doc.root.namespaces.values.index("http://www.w3.org/1999/xhtml")
251
+ collection = RDFAParser.parse(doc)
252
+ else
253
+ collection = XMLParser.parse(doc)
254
+ end
255
+ rescue Nokogiri::XML::SyntaxError
256
+ begin
257
+ if rdf.respond_to?(:read)
258
+ rdf.rewind
259
+ json = JSON.parse(rdf.read)
260
+ else
261
+ json = JSON.parse(rdf)
262
+ end
263
+ collection = JSONParser.parse(json)
264
+ rescue JSON::ParserError
265
+ if rdf.respond_to?(:read)
266
+ rdf.rewind
267
+ end
268
+ collection = NTriplesParser.parse(rdf)
269
+ end
270
+ end
271
+ collection
272
+ end
273
+ end
274
+ end