rsinger-rdfobjects 0.1.3 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,62 @@
1
+ require 'rubygems'
2
+ require 'curies'
3
+ class Curie
4
+ @@namespace_counter = 0
5
+
6
+ # Returns a Curie object from a fully qualified uri (assuming it is registered)
7
+ def self.curie_from_uri(uri_string)
8
+ @@mappings.each do | prefix, uri |
9
+ if m = uri_string.match(/^#{uri}(.*)/)
10
+ return self.new(prefix, m[1]) if m[1]
11
+ end
12
+ end
13
+ false
14
+ end
15
+
16
+ # Returns the Curie prefix for a URI
17
+ def self.prefix_for(uri_string)
18
+ @@mappings.each do | prefix, uri |
19
+ if m = uri_string.match(/^#{uri}(.*)/)
20
+ return prefix
21
+ end
22
+ end
23
+ false
24
+ end
25
+
26
+ # Automatically tries to build a safe curie from a uri string.
27
+ # Assumes an RDF Schema and a flat hierarchy.
28
+ def self.create_from_uri(uri_string, prefix=nil)
29
+ if curie = self.curie_from_uri(uri_string)
30
+ return curie
31
+ end
32
+ uri = URI.parse(uri_string)
33
+ ns = nil
34
+ elem = nil
35
+ if uri.fragment
36
+ ns, elem = uri.to_s.split('#')
37
+ ns << '#'
38
+ else
39
+ elem = uri.path.split('/').last
40
+ ns = uri.to_s.sub(/#{elem}$/, '')
41
+ end
42
+ unless prefix
43
+ prefix = "n#{@@namespace_counter}"
44
+ @@namespace_counter += 1
45
+ end
46
+ Curie.add_prefixes! prefix.to_s => ns
47
+ self.curie_from_uri(uri_string)
48
+ end
49
+
50
+ def self.get_mappings
51
+ return @@mappings
52
+ end
53
+
54
+ # Return a Curie object from a safe curie string.
55
+ def self.new_from_curie(curie_string)
56
+ unless curie_string.could_be_a_safe_curie?
57
+ raise "not a real curie"
58
+ end
59
+ prefix, resource = curie_string.curie_parts
60
+ return Curie.new(prefix, resource)
61
+ end
62
+ end
@@ -0,0 +1,30 @@
1
+ module RDFObject::Modifiers
2
+ attr_reader :data_type
3
+ attr_accessor :language
4
+ def set_data_type(uri)
5
+ @data_type = uri
6
+ end
7
+ end
8
+
9
+ class RDFObject::Literal
10
+ def self.new(value, options={})
11
+ obj = case options[:data_type]
12
+ when 'http://www.w3.org/2001/XMLSchema#dateTime' then DateTime.parse(value)
13
+ when 'http://www.w3.org/2001/XMLSchema#date' then Date.parse(value)
14
+ when 'http://www.w3.org/2001/XMLSchema#int' then value.to_i
15
+ when 'http://www.w3.org/2001/XMLSchema#string' then value.to_s
16
+ when 'http://www.w3.org/2001/XMLSchema#boolean'
17
+ if value.downcase == 'true' || value == '1'
18
+ true
19
+ else
20
+ false
21
+ end
22
+ else
23
+ value
24
+ end
25
+ obj.extend(RDFObject::Modifiers)
26
+ obj.set_data_type(options[:data_type])
27
+ obj.language = options[:language]
28
+ obj
29
+ end
30
+ end
@@ -0,0 +1,54 @@
1
+ require 'net/http'
2
+ require 'uri'
3
+ require 'cgi'
4
+
5
+ class HTTPClient
6
+ @@proxies = {}
7
+ def self.fetch(uri)
8
+ @@proxies.each do | key, proxy |
9
+ if uri.match(key)
10
+ uri = proxy.proxy_uri(uri, ['ntriples','rdf'])
11
+ end
12
+ end
13
+ u = URI.parse(uri)
14
+ request = Net::HTTP::Get.new(u.request_uri)
15
+ request['accept'] = nil
16
+ request['accept'] = ['application/rdf+xml']
17
+ response = Net::HTTP.start(u.host, u.port) do | http |
18
+ http.request(request)
19
+ end
20
+ if response.code != "200"
21
+ raise response.message
22
+ end
23
+ response.body
24
+ end
25
+
26
+ def self.register_proxy(uri,proxy)
27
+ @@proxies[uri] = proxy
28
+ end
29
+ end
30
+
31
+
32
+ class TalisPlatformProxy
33
+ attr_reader :store
34
+ @@formats = ['rdf','ntriples','turtle','json']
35
+ def initialize(store_name)
36
+ @store = store_name
37
+ end
38
+
39
+ def proxy_uri(uri, format=['rdf'])
40
+ idx = 0
41
+ best_format = nil
42
+ while !best_format
43
+ @@formats.each do | fmt |
44
+ if format[idx] == fmt
45
+ best_format = fmt
46
+ break
47
+ end
48
+ end
49
+ idx += 1
50
+ end
51
+ raise "No compatible response format!" if !best_format
52
+ "http://api.talis.com/stores/#{@store}/meta?about=#{CGI.escape(uri)}&output=#{best_format}"
53
+ end
54
+ end
@@ -0,0 +1,173 @@
1
+ $KCODE = 'u'
2
+ require 'rubygems'
3
+ require 'strscan'
4
+ require 'iconv'
5
+ require 'jcode'
6
+ require 'uri'
7
+ require 'json'
8
+ require 'nokogiri'
9
+ require 'cgi'
10
+
11
+ class UTF8Parser < StringScanner
12
+ STRING = /(([\x0-\x1f]|[\\\/bfnrt]|\\u[0-9a-fA-F]{4}|[\x20-\xff])*)/nx
13
+ UNPARSED = Object.new
14
+ UNESCAPE_MAP = Hash.new { |h, k| h[k] = k.chr }
15
+ UNESCAPE_MAP.update({
16
+ ?" => '"',
17
+ ?\\ => '\\',
18
+ ?/ => '/',
19
+ ?b => "\b",
20
+ ?f => "\f",
21
+ ?n => "\n",
22
+ ?r => "\r",
23
+ ?t => "\t",
24
+ ?u => nil,
25
+ })
26
+ UTF16toUTF8 = Iconv.new('utf-8', 'utf-16be')
27
+ def initialize(str)
28
+ super(str)
29
+ @string = str
30
+ end
31
+ def parse_string
32
+ if scan(STRING)
33
+ return '' if self[1].empty?
34
+ string = self[1].gsub(%r((?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff]))n) do |c|
35
+ if u = UNESCAPE_MAP[$&[1]]
36
+ u
37
+ else # \uXXXX
38
+ bytes = ''
39
+ i = 0
40
+ while c[6 * i] == ?\\ && c[6 * i + 1] == ?u
41
+ bytes << c[6 * i + 2, 2].to_i(16) << c[6 * i + 4, 2].to_i(16)
42
+ i += 1
43
+ end
44
+ UTF16toUTF8.iconv(bytes)
45
+ end
46
+ end
47
+ if string.respond_to?(:force_encoding)
48
+ string.force_encoding(Encoding::UTF_8)
49
+ end
50
+ string
51
+ else
52
+ UNPARSED
53
+ end
54
+ rescue Iconv::Failure => e
55
+ raise GeneratorError, "Caught #{e.class}: #{e}"
56
+ end
57
+ end
58
+ module RDFObject
59
+ class NTriplesParser
60
+ attr_reader :ntriple, :subject, :predicate, :data_type, :language, :literal
61
+ attr_accessor :object
62
+ def initialize(line)
63
+ @ntriple = line
64
+ parse_ntriple
65
+ end
66
+
67
+ def parse_ntriple
68
+ scanner = StringScanner.new(@ntriple)
69
+ @subject = scanner.scan_until(/> /)
70
+ @subject.sub!(/^</,'')
71
+ @subject.sub!(/> $/,'')
72
+ @predicate = scanner.scan_until(/> /)
73
+ @predicate.sub!(/^</,'')
74
+ @predicate.sub!(/> $/,'')
75
+ if scanner.match?(/</)
76
+ object = scanner.scan_until(/>\s?\.\s*\n?$/)
77
+ object.sub!(/^</,'')
78
+ object.sub!(/>\s?\.\s*\n?$/,'')
79
+ @object = Resource.new(object)
80
+ else
81
+ @literal = true
82
+ scanner.getch
83
+ object = scanner.scan_until(/("\s?\.\n?$)|("@[A-z])|("\^\^)/)
84
+ scanner.pos=(scanner.pos-2)
85
+ object.sub!(/"..$/,'')
86
+ uscan = UTF8Parser.new(object)
87
+ object = uscan.parse_string
88
+ if scanner.match?(/@/)
89
+ scanner.getch
90
+ @language = scanner.scan_until(/\s?\.\n?$/)
91
+ @language.sub!(/\s?\.\n?$/,'')
92
+ elsif scanner.match?(/\^\^/)
93
+ scanner.skip_until(/</)
94
+ @data_type = scanner.scan_until(/>/)
95
+ @data_type.sub!(/>$/,'')
96
+ end
97
+ @object = Literal.new(object,{:data_type=>@data_type,:language=>@language})
98
+ end
99
+ end
100
+
101
+ def self.parse(resources)
102
+ collection = []
103
+ if resources.is_a?(String)
104
+ assertions = resources.split("\n")
105
+ elsif resources.is_a?(Array)
106
+ assertions = resources
107
+ elsif resources.respond_to?(:read)
108
+ assertions = resources.readlines
109
+ end
110
+ assertions.each do | assertion |
111
+ triple = self.new(assertion)
112
+ resource = Resource.new(triple.subject)
113
+ resource.assert(triple.predicate, triple.object)
114
+ collection << resource
115
+ end
116
+ collection.uniq!
117
+ end
118
+ end
119
+
120
+ class XMLParser
121
+ def self.parse(doc)
122
+ xslt = Nokogiri::XSLT(open(File.dirname(__FILE__) + '/../xsl/rdf2nt.xsl'))
123
+ xformed_doc = xslt.apply_to(doc)
124
+ ntriples = xformed_doc.split("\n")
125
+ if ntriples[0] =~ /^\<\?xml/
126
+ ntriples.delete_at(0)
127
+ end
128
+ return NTriplesParser.parse(ntriples)
129
+ end
130
+ end
131
+
132
+ class RDFAParser
133
+ def self.parse(doc)
134
+ xslt = Nokogiri::XSLT(open(File.dirname(__FILE__) + '/../xsl/RDFa2RDFXML.xsl'))
135
+ rdf_doc = xslt.apply_to(doc)
136
+ XMLParser.parse(Nokogiri.parse(rdf_doc))
137
+ end
138
+ end
139
+
140
+ class JSONParser
141
+ end
142
+
143
+ class Parser
144
+ # Choose the best format parser from an admittedly small group of choices.
145
+ def self.parse(rdf)
146
+ begin
147
+ # Check if the format is XML or RDFa
148
+ doc = Nokogiri::XML.parse(rdf, nil, nil, Nokogiri::XML::ParseOptions::PEDANTIC)
149
+ raise "Unable to parse XML/HTML document -- no namespace declared" unless doc.root.namespaces
150
+ if doc.root.namespaces.values.index("http://www.w3.org/1999/xhtml")
151
+ collection = RDFAParser.parse(doc)
152
+ else
153
+ collection = XMLParser.parse(doc)
154
+ end
155
+ rescue Nokogiri::XML::SyntaxError
156
+ begin
157
+ if rdf.respond_to?(:read)
158
+ json = JSON.parse(rdf.read)
159
+ else
160
+ json = JSON.parse(rdf)
161
+ end
162
+ collection = JSONParser.parse(json)
163
+ rescue JSON::ParserError
164
+ if rdf.respond_to?(:read)
165
+ rdf.rewind
166
+ end
167
+ collection = NTriplesParser.parse(rdf)
168
+ end
169
+ end
170
+ collection
171
+ end
172
+ end
173
+ end
@@ -0,0 +1,216 @@
1
+ require 'uri'
2
+ require 'builder'
3
+ require 'date'
4
+ require 'curies'
5
+
6
+ module RDFObject
7
+ class Resource < OpenStruct
8
+ class << self
9
+ attr_reader :instances
10
+
11
+ def instances
12
+ @instances ||= {}
13
+ @instances
14
+ end
15
+
16
+ def reset!
17
+ @instances = {}
18
+ end
19
+
20
+ def register(resource)
21
+ instances
22
+ @instances[resource.uri] = resource
23
+ end
24
+
25
+ def remove(resource)
26
+ instances
27
+ @instances.delete(resource.uri)
28
+ end
29
+
30
+ def exists?(uri)
31
+ instances
32
+ if @instances.has_key?(uri)
33
+ true
34
+ else
35
+ false
36
+ end
37
+ end
38
+ end
39
+
40
+ def initialize(uri)
41
+ if uri.could_be_a_safe_curie?
42
+ uri = Curie.parse uri
43
+ end
44
+ super(:uri=>uri)
45
+ self.class.register(self)
46
+ end
47
+
48
+ def assert(predicate, object)
49
+ curied_predicate = case
50
+ when predicate.could_be_a_safe_curie? then Curie.new_from_curie(predicate)
51
+ when Curie.curie_from_uri(predicate) then Curie.curie_from_uri(predicate)
52
+ else Curie.create_from_uri(predicate)
53
+ end
54
+ self.register_vocabulary(curied_predicate.prefix)
55
+ pred_attr = self.send(curied_predicate.prefix.to_sym)
56
+ return if assertion_exists?(predicate, object)
57
+ if pred_attr[curied_predicate.reference]
58
+ unless pred_attr[curied_predicate.reference].is_a?(Array)
59
+ pred_attr[curied_predicate.reference] = [pred_attr[curied_predicate.reference]]
60
+ end
61
+ pred_attr[curied_predicate.reference] << object
62
+ else
63
+ pred_attr[curied_predicate.reference] = object
64
+ end
65
+ end
66
+
67
+ def assertion_exists?(predicate, object)
68
+ return false unless self[predicate]
69
+ if self[predicate].is_a?(Array)
70
+ return true if self[predicate].index(object)
71
+ else
72
+ return true if self[predicate] == object
73
+ end
74
+ return false
75
+ end
76
+
77
+ def [](uri)
78
+ curie = case
79
+ when uri.could_be_a_safe_curie? then Curie.new_from_curie(uri)
80
+ when Curie.curie_from_uri(uri) then Curie.curie_from_uri(uri)
81
+ else
82
+ return nil
83
+ end
84
+ vocab = self.send(curie.prefix.to_sym)
85
+ return nil unless vocab
86
+ return vocab if curie.reference.empty?
87
+ return vocab[curie.reference]
88
+ end
89
+
90
+ def prefix_for(uri)
91
+ Curie.prefix_for(uri)
92
+ end
93
+
94
+ def register_vocabulary(name)
95
+ return if self.respond_to?(name.to_sym)
96
+ self.new_ostruct_member(name)
97
+ self.send("#{name}=".to_sym, {})
98
+ end
99
+
100
+ def relate(predicate, resource)
101
+ unless resource.is_a?(self.class)
102
+ resource = self.class.new(resource)
103
+ end
104
+ self.assert(predicate, resource)
105
+ end
106
+
107
+ def describe
108
+ rdf = HTTPClient.fetch(self.uri)
109
+ Parser.parse(rdf)
110
+ end
111
+
112
+ def empty_graph?
113
+ Curie.get_mappings.each do | prefix, uri |
114
+ return false if self.respond_to?(prefix.to_sym)
115
+ end
116
+ return true
117
+ end
118
+
119
+ def to_xml
120
+ doc = Builder::XmlMarkup.new
121
+ xmlns = {}
122
+ i = 1
123
+ @namespaces.each do | ns |
124
+ next if ns == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
125
+ xmlns["xmlns:n#{i}"] = ns
126
+ i += 1
127
+ end
128
+ doc.rdf :Description,xmlns.merge({:about=>uri}) do | rdf |
129
+ self.instance_variables.each do | ivar |
130
+ next unless ivar =~ /^@n[0-9]*_/
131
+ prefix, tag = ivar.split('_',2)
132
+ attrs = {}
133
+ curr_attr = self.instance_variable_get("#{ivar}")
134
+ prefix.sub!(/^@/,'')
135
+ prefix = 'rdf' if prefix == 'n0'
136
+ unless curr_attr.is_a?(Array)
137
+ curr_attr = [curr_attr]
138
+ end
139
+ curr_attr.each do | val |
140
+ if val.is_a?(RDFResource)
141
+ attrs['rdf:resource'] = val.uri
142
+ end
143
+ if @modifiers[val.object_id]
144
+ if @modifiers[val.object_id][:language]
145
+ attrs['xml:lang'] = @modifiers[val.object_id][:language]
146
+ end
147
+ if @modifiers[val.object_id][:type]
148
+ attrs['rdf:datatype'] = @modifiers[val.object_id][:type]
149
+ end
150
+ end
151
+ unless attrs['rdf:resource']
152
+ rdf.tag!("#{prefix}:#{tag}", attrs, val)
153
+ else
154
+ rdf.tag!("#{prefix}:#{tag}", attrs)
155
+ end
156
+ end
157
+ end
158
+ end
159
+ doc.target!
160
+ end
161
+
162
+ def to_rss
163
+ doc = Builder::XmlMarkup.new
164
+ xmlns = {}
165
+ i = 1
166
+ @namespaces.each do | ns |
167
+ next if ns == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
168
+ xmlns["xmlns:n#{i}"] = ns
169
+ i += 1
170
+ end
171
+ xmlns["xmlns:rss"] = "http://purl.org/rss/1.0/"
172
+ doc.rdf :RDF, xmlns do | rdf |
173
+ rdf.item :about=>uri do | item |
174
+ self.instance_variables.each do | ivar |
175
+ next unless ivar =~ /^@n[0-9]*_/
176
+ prefix, tag = ivar.split('_',2)
177
+ attrs = {}
178
+ curr_attr = self.instance_variable_get("#{ivar}")
179
+ prefix.sub!(/^@/,'')
180
+ prefix = 'rdf' if prefix == 'n0'
181
+ unless curr_attr.is_a?(Array)
182
+ curr_attr = [curr_attr]
183
+ end
184
+ curr_attr.each do | val |
185
+ if val.is_a?(RDFResource)
186
+ attrs['rdf:resource'] = val.uri
187
+ end
188
+ if @modifiers[val.object_id]
189
+ if @modifiers[val.object_id][:language]
190
+ attrs['xml:lang'] = @modifiers[val.object_id][:language]
191
+ end
192
+ if @modifiers[val.object_id][:type]
193
+ attrs['rdf:datatype'] = @modifiers[val.object_id][:type]
194
+ end
195
+ end
196
+ unless attrs['rdf:resource']
197
+ item.tag!("#{prefix}:#{tag}", attrs, val)
198
+ else
199
+ item.tag!("#{prefix}:#{tag}", attrs)
200
+ end
201
+ end
202
+ end
203
+ end
204
+ end
205
+ doc.target!
206
+ end
207
+ def self.new(uri)
208
+ if self.exists?(uri)
209
+ return self.instances[uri]
210
+ end
211
+ super(uri)
212
+ end
213
+
214
+
215
+ end
216
+ end
@@ -0,0 +1,7 @@
1
+ require 'nokogiri'
2
+ module RDFObject
3
+ class XMLSerializer
4
+ def self.write(resources)
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,11 @@
1
+ module RDFObject
2
+ require 'rubygems'
3
+ require 'ostruct'
4
+ require 'curies'
5
+ require File.dirname(__FILE__) + '/rdf_objects/parsers'
6
+ require File.dirname(__FILE__) + '/rdf_objects/rdf_resource'
7
+ require File.dirname(__FILE__) + '/rdf_objects/curies'
8
+ require File.dirname(__FILE__) + '/rdf_objects/data_types'
9
+ require File.dirname(__FILE__) + '/rdf_objects/http_client'
10
+ Curie.remove_prefixes!(:http)
11
+ end