rsinger-rdfobjects 0.1.3 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,62 @@
1
+ require 'rubygems'
2
+ require 'curies'
3
+ class Curie
4
+ @@namespace_counter = 0
5
+
6
+ # Returns a Curie object from a fully qualified uri (assuming it is registered)
7
+ def self.curie_from_uri(uri_string)
8
+ @@mappings.each do | prefix, uri |
9
+ if m = uri_string.match(/^#{uri}(.*)/)
10
+ return self.new(prefix, m[1]) if m[1]
11
+ end
12
+ end
13
+ false
14
+ end
15
+
16
+ # Returns the Curie prefix for a URI
17
+ def self.prefix_for(uri_string)
18
+ @@mappings.each do | prefix, uri |
19
+ if m = uri_string.match(/^#{uri}(.*)/)
20
+ return prefix
21
+ end
22
+ end
23
+ false
24
+ end
25
+
26
+ # Automatically tries to build a safe curie from a uri string.
27
+ # Assumes an RDF Schema and a flat hierarchy.
28
+ def self.create_from_uri(uri_string, prefix=nil)
29
+ if curie = self.curie_from_uri(uri_string)
30
+ return curie
31
+ end
32
+ uri = URI.parse(uri_string)
33
+ ns = nil
34
+ elem = nil
35
+ if uri.fragment
36
+ ns, elem = uri.to_s.split('#')
37
+ ns << '#'
38
+ else
39
+ elem = uri.path.split('/').last
40
+ ns = uri.to_s.sub(/#{elem}$/, '')
41
+ end
42
+ unless prefix
43
+ prefix = "n#{@@namespace_counter}"
44
+ @@namespace_counter += 1
45
+ end
46
+ Curie.add_prefixes! prefix.to_s => ns
47
+ self.curie_from_uri(uri_string)
48
+ end
49
+
50
+ def self.get_mappings
51
+ return @@mappings
52
+ end
53
+
54
+ # Return a Curie object from a safe curie string.
55
+ def self.new_from_curie(curie_string)
56
+ unless curie_string.could_be_a_safe_curie?
57
+ raise "not a real curie"
58
+ end
59
+ prefix, resource = curie_string.curie_parts
60
+ return Curie.new(prefix, resource)
61
+ end
62
+ end
@@ -0,0 +1,30 @@
1
+ module RDFObject::Modifiers
2
+ attr_reader :data_type
3
+ attr_accessor :language
4
+ def set_data_type(uri)
5
+ @data_type = uri
6
+ end
7
+ end
8
+
9
+ class RDFObject::Literal
10
+ def self.new(value, options={})
11
+ obj = case options[:data_type]
12
+ when 'http://www.w3.org/2001/XMLSchema#dateTime' then DateTime.parse(value)
13
+ when 'http://www.w3.org/2001/XMLSchema#date' then Date.parse(value)
14
+ when 'http://www.w3.org/2001/XMLSchema#int' then value.to_i
15
+ when 'http://www.w3.org/2001/XMLSchema#string' then value.to_s
16
+ when 'http://www.w3.org/2001/XMLSchema#boolean'
17
+ if value.downcase == 'true' || value == '1'
18
+ true
19
+ else
20
+ false
21
+ end
22
+ else
23
+ value
24
+ end
25
+ obj.extend(RDFObject::Modifiers)
26
+ obj.set_data_type(options[:data_type])
27
+ obj.language = options[:language]
28
+ obj
29
+ end
30
+ end
@@ -0,0 +1,54 @@
1
+ require 'net/http'
2
+ require 'uri'
3
+ require 'cgi'
4
+
5
+ class HTTPClient
6
+ @@proxies = {}
7
+ def self.fetch(uri)
8
+ @@proxies.each do | key, proxy |
9
+ if uri.match(key)
10
+ uri = proxy.proxy_uri(uri, ['ntriples','rdf'])
11
+ end
12
+ end
13
+ u = URI.parse(uri)
14
+ request = Net::HTTP::Get.new(u.request_uri)
15
+ request['accept'] = nil
16
+ request['accept'] = ['application/rdf+xml']
17
+ response = Net::HTTP.start(u.host, u.port) do | http |
18
+ http.request(request)
19
+ end
20
+ if response.code != "200"
21
+ raise response.message
22
+ end
23
+ response.body
24
+ end
25
+
26
+ def self.register_proxy(uri,proxy)
27
+ @@proxies[uri] = proxy
28
+ end
29
+ end
30
+
31
+
32
+ class TalisPlatformProxy
33
+ attr_reader :store
34
+ @@formats = ['rdf','ntriples','turtle','json']
35
+ def initialize(store_name)
36
+ @store = store_name
37
+ end
38
+
39
+ def proxy_uri(uri, format=['rdf'])
40
+ idx = 0
41
+ best_format = nil
42
+ while !best_format
43
+ @@formats.each do | fmt |
44
+ if format[idx] == fmt
45
+ best_format = fmt
46
+ break
47
+ end
48
+ end
49
+ idx += 1
50
+ end
51
+ raise "No compatible response format!" if !best_format
52
+ "http://api.talis.com/stores/#{@store}/meta?about=#{CGI.escape(uri)}&output=#{best_format}"
53
+ end
54
+ end
@@ -0,0 +1,173 @@
1
+ $KCODE = 'u'
2
+ require 'rubygems'
3
+ require 'strscan'
4
+ require 'iconv'
5
+ require 'jcode'
6
+ require 'uri'
7
+ require 'json'
8
+ require 'nokogiri'
9
+ require 'cgi'
10
+
11
+ class UTF8Parser < StringScanner
12
+ STRING = /(([\x0-\x1f]|[\\\/bfnrt]|\\u[0-9a-fA-F]{4}|[\x20-\xff])*)/nx
13
+ UNPARSED = Object.new
14
+ UNESCAPE_MAP = Hash.new { |h, k| h[k] = k.chr }
15
+ UNESCAPE_MAP.update({
16
+ ?" => '"',
17
+ ?\\ => '\\',
18
+ ?/ => '/',
19
+ ?b => "\b",
20
+ ?f => "\f",
21
+ ?n => "\n",
22
+ ?r => "\r",
23
+ ?t => "\t",
24
+ ?u => nil,
25
+ })
26
+ UTF16toUTF8 = Iconv.new('utf-8', 'utf-16be')
27
+ def initialize(str)
28
+ super(str)
29
+ @string = str
30
+ end
31
+ def parse_string
32
+ if scan(STRING)
33
+ return '' if self[1].empty?
34
+ string = self[1].gsub(%r((?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff]))n) do |c|
35
+ if u = UNESCAPE_MAP[$&[1]]
36
+ u
37
+ else # \uXXXX
38
+ bytes = ''
39
+ i = 0
40
+ while c[6 * i] == ?\\ && c[6 * i + 1] == ?u
41
+ bytes << c[6 * i + 2, 2].to_i(16) << c[6 * i + 4, 2].to_i(16)
42
+ i += 1
43
+ end
44
+ UTF16toUTF8.iconv(bytes)
45
+ end
46
+ end
47
+ if string.respond_to?(:force_encoding)
48
+ string.force_encoding(Encoding::UTF_8)
49
+ end
50
+ string
51
+ else
52
+ UNPARSED
53
+ end
54
+ rescue Iconv::Failure => e
55
+ raise GeneratorError, "Caught #{e.class}: #{e}"
56
+ end
57
+ end
58
+ module RDFObject
59
+ class NTriplesParser
60
+ attr_reader :ntriple, :subject, :predicate, :data_type, :language, :literal
61
+ attr_accessor :object
62
+ def initialize(line)
63
+ @ntriple = line
64
+ parse_ntriple
65
+ end
66
+
67
+ def parse_ntriple
68
+ scanner = StringScanner.new(@ntriple)
69
+ @subject = scanner.scan_until(/> /)
70
+ @subject.sub!(/^</,'')
71
+ @subject.sub!(/> $/,'')
72
+ @predicate = scanner.scan_until(/> /)
73
+ @predicate.sub!(/^</,'')
74
+ @predicate.sub!(/> $/,'')
75
+ if scanner.match?(/</)
76
+ object = scanner.scan_until(/>\s?\.\s*\n?$/)
77
+ object.sub!(/^</,'')
78
+ object.sub!(/>\s?\.\s*\n?$/,'')
79
+ @object = Resource.new(object)
80
+ else
81
+ @literal = true
82
+ scanner.getch
83
+ object = scanner.scan_until(/("\s?\.\n?$)|("@[A-z])|("\^\^)/)
84
+ scanner.pos=(scanner.pos-2)
85
+ object.sub!(/"..$/,'')
86
+ uscan = UTF8Parser.new(object)
87
+ object = uscan.parse_string
88
+ if scanner.match?(/@/)
89
+ scanner.getch
90
+ @language = scanner.scan_until(/\s?\.\n?$/)
91
+ @language.sub!(/\s?\.\n?$/,'')
92
+ elsif scanner.match?(/\^\^/)
93
+ scanner.skip_until(/</)
94
+ @data_type = scanner.scan_until(/>/)
95
+ @data_type.sub!(/>$/,'')
96
+ end
97
+ @object = Literal.new(object,{:data_type=>@data_type,:language=>@language})
98
+ end
99
+ end
100
+
101
+ def self.parse(resources)
102
+ collection = []
103
+ if resources.is_a?(String)
104
+ assertions = resources.split("\n")
105
+ elsif resources.is_a?(Array)
106
+ assertions = resources
107
+ elsif resources.respond_to?(:read)
108
+ assertions = resources.readlines
109
+ end
110
+ assertions.each do | assertion |
111
+ triple = self.new(assertion)
112
+ resource = Resource.new(triple.subject)
113
+ resource.assert(triple.predicate, triple.object)
114
+ collection << resource
115
+ end
116
+ collection.uniq!
117
+ end
118
+ end
119
+
120
+ class XMLParser
121
+ def self.parse(doc)
122
+ xslt = Nokogiri::XSLT(open(File.dirname(__FILE__) + '/../xsl/rdf2nt.xsl'))
123
+ xformed_doc = xslt.apply_to(doc)
124
+ ntriples = xformed_doc.split("\n")
125
+ if ntriples[0] =~ /^\<\?xml/
126
+ ntriples.delete_at(0)
127
+ end
128
+ return NTriplesParser.parse(ntriples)
129
+ end
130
+ end
131
+
132
+ class RDFAParser
133
+ def self.parse(doc)
134
+ xslt = Nokogiri::XSLT(open(File.dirname(__FILE__) + '/../xsl/RDFa2RDFXML.xsl'))
135
+ rdf_doc = xslt.apply_to(doc)
136
+ XMLParser.parse(Nokogiri.parse(rdf_doc))
137
+ end
138
+ end
139
+
140
+ class JSONParser
141
+ end
142
+
143
+ class Parser
144
+ # Choose the best format parser from an admittedly small group of choices.
145
+ def self.parse(rdf)
146
+ begin
147
+ # Check if the format is XML or RDFa
148
+ doc = Nokogiri::XML.parse(rdf, nil, nil, Nokogiri::XML::ParseOptions::PEDANTIC)
149
+ raise "Unable to parse XML/HTML document -- no namespace declared" unless doc.root.namespaces
150
+ if doc.root.namespaces.values.index("http://www.w3.org/1999/xhtml")
151
+ collection = RDFAParser.parse(doc)
152
+ else
153
+ collection = XMLParser.parse(doc)
154
+ end
155
+ rescue Nokogiri::XML::SyntaxError
156
+ begin
157
+ if rdf.respond_to?(:read)
158
+ json = JSON.parse(rdf.read)
159
+ else
160
+ json = JSON.parse(rdf)
161
+ end
162
+ collection = JSONParser.parse(json)
163
+ rescue JSON::ParserError
164
+ if rdf.respond_to?(:read)
165
+ rdf.rewind
166
+ end
167
+ collection = NTriplesParser.parse(rdf)
168
+ end
169
+ end
170
+ collection
171
+ end
172
+ end
173
+ end
@@ -0,0 +1,216 @@
1
+ require 'uri'
2
+ require 'builder'
3
+ require 'date'
4
+ require 'curies'
5
+
6
+ module RDFObject
7
+ class Resource < OpenStruct
8
+ class << self
9
+ attr_reader :instances
10
+
11
+ def instances
12
+ @instances ||= {}
13
+ @instances
14
+ end
15
+
16
+ def reset!
17
+ @instances = {}
18
+ end
19
+
20
+ def register(resource)
21
+ instances
22
+ @instances[resource.uri] = resource
23
+ end
24
+
25
+ def remove(resource)
26
+ instances
27
+ @instances.delete(resource.uri)
28
+ end
29
+
30
+ def exists?(uri)
31
+ instances
32
+ if @instances.has_key?(uri)
33
+ true
34
+ else
35
+ false
36
+ end
37
+ end
38
+ end
39
+
40
+ def initialize(uri)
41
+ if uri.could_be_a_safe_curie?
42
+ uri = Curie.parse uri
43
+ end
44
+ super(:uri=>uri)
45
+ self.class.register(self)
46
+ end
47
+
48
+ def assert(predicate, object)
49
+ curied_predicate = case
50
+ when predicate.could_be_a_safe_curie? then Curie.new_from_curie(predicate)
51
+ when Curie.curie_from_uri(predicate) then Curie.curie_from_uri(predicate)
52
+ else Curie.create_from_uri(predicate)
53
+ end
54
+ self.register_vocabulary(curied_predicate.prefix)
55
+ pred_attr = self.send(curied_predicate.prefix.to_sym)
56
+ return if assertion_exists?(predicate, object)
57
+ if pred_attr[curied_predicate.reference]
58
+ unless pred_attr[curied_predicate.reference].is_a?(Array)
59
+ pred_attr[curied_predicate.reference] = [pred_attr[curied_predicate.reference]]
60
+ end
61
+ pred_attr[curied_predicate.reference] << object
62
+ else
63
+ pred_attr[curied_predicate.reference] = object
64
+ end
65
+ end
66
+
67
+ def assertion_exists?(predicate, object)
68
+ return false unless self[predicate]
69
+ if self[predicate].is_a?(Array)
70
+ return true if self[predicate].index(object)
71
+ else
72
+ return true if self[predicate] == object
73
+ end
74
+ return false
75
+ end
76
+
77
+ def [](uri)
78
+ curie = case
79
+ when uri.could_be_a_safe_curie? then Curie.new_from_curie(uri)
80
+ when Curie.curie_from_uri(uri) then Curie.curie_from_uri(uri)
81
+ else
82
+ return nil
83
+ end
84
+ vocab = self.send(curie.prefix.to_sym)
85
+ return nil unless vocab
86
+ return vocab if curie.reference.empty?
87
+ return vocab[curie.reference]
88
+ end
89
+
90
+ def prefix_for(uri)
91
+ Curie.prefix_for(uri)
92
+ end
93
+
94
+ def register_vocabulary(name)
95
+ return if self.respond_to?(name.to_sym)
96
+ self.new_ostruct_member(name)
97
+ self.send("#{name}=".to_sym, {})
98
+ end
99
+
100
+ def relate(predicate, resource)
101
+ unless resource.is_a?(self.class)
102
+ resource = self.class.new(resource)
103
+ end
104
+ self.assert(predicate, resource)
105
+ end
106
+
107
+ def describe
108
+ rdf = HTTPClient.fetch(self.uri)
109
+ Parser.parse(rdf)
110
+ end
111
+
112
+ def empty_graph?
113
+ Curie.get_mappings.each do | prefix, uri |
114
+ return false if self.respond_to?(prefix.to_sym)
115
+ end
116
+ return true
117
+ end
118
+
119
+ def to_xml
120
+ doc = Builder::XmlMarkup.new
121
+ xmlns = {}
122
+ i = 1
123
+ @namespaces.each do | ns |
124
+ next if ns == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
125
+ xmlns["xmlns:n#{i}"] = ns
126
+ i += 1
127
+ end
128
+ doc.rdf :Description,xmlns.merge({:about=>uri}) do | rdf |
129
+ self.instance_variables.each do | ivar |
130
+ next unless ivar =~ /^@n[0-9]*_/
131
+ prefix, tag = ivar.split('_',2)
132
+ attrs = {}
133
+ curr_attr = self.instance_variable_get("#{ivar}")
134
+ prefix.sub!(/^@/,'')
135
+ prefix = 'rdf' if prefix == 'n0'
136
+ unless curr_attr.is_a?(Array)
137
+ curr_attr = [curr_attr]
138
+ end
139
+ curr_attr.each do | val |
140
+ if val.is_a?(RDFResource)
141
+ attrs['rdf:resource'] = val.uri
142
+ end
143
+ if @modifiers[val.object_id]
144
+ if @modifiers[val.object_id][:language]
145
+ attrs['xml:lang'] = @modifiers[val.object_id][:language]
146
+ end
147
+ if @modifiers[val.object_id][:type]
148
+ attrs['rdf:datatype'] = @modifiers[val.object_id][:type]
149
+ end
150
+ end
151
+ unless attrs['rdf:resource']
152
+ rdf.tag!("#{prefix}:#{tag}", attrs, val)
153
+ else
154
+ rdf.tag!("#{prefix}:#{tag}", attrs)
155
+ end
156
+ end
157
+ end
158
+ end
159
+ doc.target!
160
+ end
161
+
162
+ def to_rss
163
+ doc = Builder::XmlMarkup.new
164
+ xmlns = {}
165
+ i = 1
166
+ @namespaces.each do | ns |
167
+ next if ns == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
168
+ xmlns["xmlns:n#{i}"] = ns
169
+ i += 1
170
+ end
171
+ xmlns["xmlns:rss"] = "http://purl.org/rss/1.0/"
172
+ doc.rdf :RDF, xmlns do | rdf |
173
+ rdf.item :about=>uri do | item |
174
+ self.instance_variables.each do | ivar |
175
+ next unless ivar =~ /^@n[0-9]*_/
176
+ prefix, tag = ivar.split('_',2)
177
+ attrs = {}
178
+ curr_attr = self.instance_variable_get("#{ivar}")
179
+ prefix.sub!(/^@/,'')
180
+ prefix = 'rdf' if prefix == 'n0'
181
+ unless curr_attr.is_a?(Array)
182
+ curr_attr = [curr_attr]
183
+ end
184
+ curr_attr.each do | val |
185
+ if val.is_a?(RDFResource)
186
+ attrs['rdf:resource'] = val.uri
187
+ end
188
+ if @modifiers[val.object_id]
189
+ if @modifiers[val.object_id][:language]
190
+ attrs['xml:lang'] = @modifiers[val.object_id][:language]
191
+ end
192
+ if @modifiers[val.object_id][:type]
193
+ attrs['rdf:datatype'] = @modifiers[val.object_id][:type]
194
+ end
195
+ end
196
+ unless attrs['rdf:resource']
197
+ item.tag!("#{prefix}:#{tag}", attrs, val)
198
+ else
199
+ item.tag!("#{prefix}:#{tag}", attrs)
200
+ end
201
+ end
202
+ end
203
+ end
204
+ end
205
+ doc.target!
206
+ end
207
+ def self.new(uri)
208
+ if self.exists?(uri)
209
+ return self.instances[uri]
210
+ end
211
+ super(uri)
212
+ end
213
+
214
+
215
+ end
216
+ end
@@ -0,0 +1,7 @@
1
+ require 'nokogiri'
2
+ module RDFObject
3
+ class XMLSerializer
4
+ def self.write(resources)
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,11 @@
1
+ module RDFObject
2
+ require 'rubygems'
3
+ require 'ostruct'
4
+ require 'curies'
5
+ require File.dirname(__FILE__) + '/rdf_objects/parsers'
6
+ require File.dirname(__FILE__) + '/rdf_objects/rdf_resource'
7
+ require File.dirname(__FILE__) + '/rdf_objects/curies'
8
+ require File.dirname(__FILE__) + '/rdf_objects/data_types'
9
+ require File.dirname(__FILE__) + '/rdf_objects/http_client'
10
+ Curie.remove_prefixes!(:http)
11
+ end