pius-rena 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,182 @@
1
+ require 'rena/uriref'
2
+ require 'rena/graph'
3
+ require 'rena/literal'
4
+ require 'rena/exceptions/uri_relative_exception'
5
+ require 'rena/exceptions/about_each_exception'
6
+ require 'rena/rexml_hacks'
7
+
8
+ class RdfXmlParser
9
+ attr_accessor :xml, :graph
10
+ def initialize (xml_str, uri = nil)
11
+ @excl = ["http://www.w3.org/1999/02/22-rdf-syntax-ns#resource", "http://www.w3.org/1999/02/22-rdf-syntax-ns#nodeID", "http://www.w3.org/1999/02/22-rdf-syntax-ns#about", "http://www.w3.org/1999/02/22-rdf-syntax-ns#ID"]
12
+ if uri != nil
13
+ @uri = Addressable::URI.parse(uri)
14
+ end
15
+ @xml = REXML::Document.new(xml_str)
16
+ # self.iterator @xml.root.children
17
+ if self.is_rdf?
18
+ @graph = Graph.new
19
+ @xml.root.each_element { |e|
20
+ self.parse_element e
21
+ }
22
+ # puts @graph.size
23
+ end
24
+ end
25
+
26
+ def is_rdf?
27
+ trigger = false
28
+ @xml.each_element do |e|
29
+ if e.namespaces.has_value? "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
30
+ trigger = true
31
+ end
32
+ end
33
+ return trigger
34
+ end
35
+
36
+ protected
37
+ def get_uri_from_atts (element, aboutmode = false)
38
+ if aboutmode == false
39
+ resourceuri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#resource"
40
+ else
41
+ resourceuri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#about"
42
+ end
43
+
44
+ subject = nil
45
+ element.attributes.each_attribute { |att|
46
+ uri = att.namespace + att.name
47
+ value = att.to_s
48
+ if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEach"
49
+ raise AboutEachException, "Failed as per RDFMS-AboutEach-Error001.rdf test from 2004 test suite"
50
+ end
51
+ if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEachPrefix"
52
+ raise AboutEachException, "Failed as per RDFMS-AboutEach-Error002.rdf test from 2004 test suite"
53
+ end
54
+ if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#bagID"
55
+ raise
56
+ if name =~ /^[a-zA-Z_][a-zA-Z0-9]*$/
57
+ # TODO: do something intelligent with the bagID
58
+ else
59
+ raise
60
+ end
61
+ end
62
+
63
+ if uri == resourceuri #specified resource
64
+ begin
65
+ possible_subject = URIRef.new(value)
66
+ rescue UriRelativeException
67
+ if value[0..0].to_s != "#"
68
+ value = "#" + value
69
+ end
70
+ begin
71
+ value = URIRef.new(element.base + value)
72
+ rescue UriRelativeException
73
+ # still not a URI
74
+ raise
75
+ else
76
+ subject = value
77
+ end
78
+ else
79
+ subject = possible_subject
80
+ break
81
+ end
82
+ end
83
+
84
+ if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#nodeID" #BNode with ID
85
+ # we have a BNode with an identifier. First, we need to do syntax checking.
86
+ if value =~ /^[a-zA-Z_][a-zA-Z0-9]*$/
87
+ # now we check to see if the graph has the value
88
+ if @graph.has_bnode_identifier?(value)
89
+ # if so, pull it in - no need to recreate objects.
90
+ subject = @graph.get_bnode_by_identifier(value)
91
+ else
92
+ # if not, create a new one.
93
+ subject = BNode.new(value)
94
+ end
95
+ end
96
+ end
97
+
98
+ if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#ID"
99
+ begin
100
+ # check for base
101
+ if att.element.base?
102
+ subject = att.element.base.to_s + value
103
+ elsif @uri != nil
104
+ compound = @uri.to_s + "#" + value
105
+ subject = compound.to_s
106
+ else
107
+ raise "Needs to have an ID"
108
+ end
109
+ # rescue UriRelativeException
110
+ end
111
+ end
112
+
113
+ # add other subject detection subroutines here
114
+ }
115
+ if subject.class == NilClass
116
+ subject = BNode.new
117
+ end
118
+ return subject
119
+ end
120
+
121
+ protected
122
+ def parse_element (element, subject = nil, resource = false)
123
+ if subject == nil
124
+ # figure out subject
125
+ subject = self.get_uri_from_atts(element, true)
126
+ end
127
+
128
+ # type parsing
129
+ if resource == true
130
+ type = URIRef.new(element.namespace + element.name)
131
+ unless type.to_s == "http://www.w3.org/1999/02/22-rdf-syntax-ns#Description"
132
+ @graph.add_triple(subject, URIRef.new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), type)
133
+ end
134
+ end
135
+
136
+ # attribute parsing
137
+ element.attributes.each_attribute { |att|
138
+ uri = att.namespace + att.name
139
+ value = att.to_s
140
+
141
+ unless @excl.member? uri
142
+ @graph.add_triple(subject, URIRef.new(uri), Literal.new(value))
143
+ end
144
+ }
145
+
146
+ # element parsing
147
+ element.each_element { |e|
148
+ self.parse_resource_element e, subject
149
+ }
150
+ end
151
+
152
+ def parse_resource_element e, subject
153
+ uri = e.namespace + e.name
154
+ if e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "parseType").to_s == "Literal"
155
+ @graph.add_triple(subject, URIRef.new(uri), TypedLiteral.new(e.children.to_s.strip, "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"))
156
+ elsif e.has_elements?
157
+ # subparsing
158
+ e.each_element { |se| #se = 'striped element'
159
+ if e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "parseType").to_s == "Resource"
160
+ object = BNode.new
161
+ else
162
+ object = self.get_uri_from_atts(se, true)
163
+ end
164
+ @graph.add_triple(subject, URIRef.new(uri), object)
165
+ self.parse_element(se, object, true)
166
+ }
167
+ elsif e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "datatype")
168
+ @graph.add_triple(subject, URIRef.new(uri), TypedLiteral.new(e.text, e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "datatype").to_s.strip))
169
+ elsif e.has_attributes?
170
+ # get object out
171
+ object = self.get_uri_from_atts(e)
172
+ @graph.add_triple(subject, URIRef.new(uri), object)
173
+ elsif e.has_text?
174
+ if e.lang?
175
+ @graph.add_triple(subject, URIRef.new(uri), Literal.new(e.text, e.lang))
176
+ else
177
+ @graph.add_triple(subject, URIRef.new(uri), Literal.new(e.text))
178
+ end
179
+ end
180
+ end
181
+
182
+ end
@@ -0,0 +1,70 @@
1
+ require 'rexml/document'
2
+
3
+ # def subdocument_writer(el)
4
+ # el.prefixes.each { |ns|
5
+ # el.add_attribute('xmlns:' + ns, el.namespaces[ns].to_s)
6
+ # }
7
+ # return el.to_s
8
+ # end
9
+
10
+ class REXML::Element
11
+ public
12
+ def lang?
13
+ if self.lang != nil
14
+ true
15
+ else
16
+ false
17
+ end
18
+ end
19
+ def lang
20
+ if self.attributes['xml:lang']
21
+ return self.attributes['xml:lang'].to_s
22
+ elsif self.parent != nil
23
+ return self.parent.lang
24
+ else
25
+ return nil
26
+ end
27
+ end
28
+
29
+ def base?
30
+ if self.base != nil
31
+ true
32
+ else
33
+ false
34
+ end
35
+ end
36
+
37
+ def base
38
+ if self.attributes['xml:base']
39
+ return self.attributes['xml:base'].to_s
40
+ elsif self.parent != nil
41
+ return self.parent.base
42
+ else
43
+ return nil
44
+ end
45
+ end
46
+
47
+ def write(excl=[])
48
+ # TODO: add optional list argument of excluded namespaces
49
+ self.prefixes.each { |ns|
50
+ self.add_attribute('xmlns:' + ns, self.namespaces[ns].to_s) unless excl.include? self.namespaces[ns]
51
+ }
52
+ self.support_write_recursive(self.namespaces, self)
53
+ return self.to_s
54
+ end
55
+
56
+ protected
57
+ def support_write_recursive(array, el)
58
+ el.each_element { |e|
59
+ unless array.has_key?(e.prefix) && array.has_value?(e.namespace)
60
+ if e.prefix != ""
61
+ e.add_attribute('xmlns:' + e.prefix, e.namespace)
62
+ else
63
+ e.add_attribute('xmlns', e.namespace)
64
+ end
65
+ end
66
+ self.support_write_recursive(array, e)
67
+ }
68
+ end
69
+
70
+ end
@@ -0,0 +1,72 @@
1
+ class Triple
2
+ attr_accessor :subject, :object, :predicate
3
+
4
+ ##
5
+ # Creates a new triple directly from the intended subject, predicate, and object.
6
+ #
7
+ # ==== Example
8
+ # Triple.new(BNode.new, URIRef.new("http://xmlns.com/foaf/0.1/knows"), BNode.new) # => results in the creation of a new triple and returns it
9
+ #
10
+ # @param [URIRef, BNode] s the subject of the triple
11
+ # @param [URIRef] p the predicate of the triple
12
+ # @param [URIRef, BNode, Literal, TypedLiteral] o the object of the triple
13
+ #
14
+ # ==== Returns
15
+ #
16
+ # @return [Triple] An array of the triples (leaky abstraction? consider returning the graph instead)
17
+ #
18
+ # @raise [Error] Checks parameter types and raises if they are incorrect.
19
+ # @author Tom Morris
20
+ def initialize (subject, predicate, object)
21
+ self.check_subject(subject)
22
+ self.check_predicate(predicate)
23
+ self.check_object(object)
24
+ end
25
+
26
+ def to_ntriples
27
+ @subject.to_ntriples + " " + @predicate.to_ntriples + " " + @object.to_ntriples + " ."
28
+ end
29
+
30
+ protected
31
+ def check_subject(subject)
32
+ if subject.class == BNode || subject.class == URIRef
33
+ @subject = subject
34
+ elsif subject.class == String
35
+ if subject =~ /\S+\/\/\S+/ # does it smell like a URI?
36
+ @subject = URIRef.new(subject)
37
+ else
38
+ @subject = BNode.new(subject)
39
+ end
40
+ else
41
+ raise "Subject is not of a known class"
42
+ end
43
+ end
44
+
45
+ protected
46
+ def check_predicate(predicate)
47
+ if predicate.class == URIRef
48
+ @predicate = predicate
49
+ elsif predicate.class == BNode
50
+ raise "BNode is not allowed as a predicate"
51
+ elsif predicate.class == String
52
+ if predicate =~ /\S+\/\/\S+/ # URI smell check again
53
+ @predicate = URIRef.new(predicate)
54
+ else
55
+ raise "String literals are not acceptable as predicates"
56
+ end
57
+ else
58
+ raise "Predicate should be a uriref"
59
+ end
60
+ end
61
+
62
+ protected
63
+ def check_object(object)
64
+ if [String, Integer, Fixnum, Float].include? object.class
65
+ @object = Literal.new(object.to_s)
66
+ elsif [URIRef, BNode, Literal, TypedLiteral].include? object.class
67
+ @object = object
68
+ else
69
+ raise "Object expects valid class"
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,41 @@
1
+ require 'rubygems'
2
+ require 'addressable/uri'
3
+ require 'rena/exceptions/uri_relative_exception'
4
+
5
+ class URIRef
6
+ attr_accessor :uri
7
+ def initialize (string)
8
+ self.test_string(string)
9
+ @uri = Addressable::URI.parse(string)
10
+ if @uri.relative?
11
+ raise UriRelativeException, "<" + @uri.to_s + ">"
12
+ end
13
+ if !@uri.to_s.match(/^javascript/).nil?
14
+ raise "Javascript pseudo-URIs are not acceptable"
15
+ end
16
+ end
17
+
18
+ def == (other)
19
+ return true if @uri == other.uri
20
+ end
21
+
22
+ def to_s
23
+ @uri.to_s
24
+ end
25
+
26
+ def to_ntriples
27
+ "<" + @uri.to_s + ">"
28
+ end
29
+
30
+ def test_string (string)
31
+ if string.class != String
32
+ string = string.to_s
33
+ end
34
+
35
+ string.each_byte do |b|
36
+ if b >= 0 and b <= 31
37
+ raise "URI must not contain control characters"
38
+ end
39
+ end
40
+ end
41
+ end
data/rena.gemspec ADDED
@@ -0,0 +1,16 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "rena"
3
+ s.version = "0.0.1"
4
+ s.date = "2008-07-13"
5
+ s.summary = "Ruby RDF library."
6
+ s.email = "tom@tommorris.org"
7
+ s.homepage = "http://github.com/tommorris/rena"
8
+ s.description = "Rena is a Ruby library for manipulating RDF files."
9
+ s.has_rdoc = true
10
+ s.authors = ['Tom Morris', 'Pius Uzamere']
11
+ s.files = ["README.txt", "Rakefile", "rena.gemspec", "lib/rena.rb", "lib/rena/bnode.rb", "lib/rena/graph.rb", "lib/rena/literal.rb", "lib/rena/namespace.rb", "lib/rena/rdfxmlparser.rb", "lib/rena/rexml_hacks.rb", "lib/rena/triple.rb", "lib/rena/uriref.rb", "lib/rena/exceptions/about_each_exception.rb", "lib/rena/exceptions/uri_relative_exception.rb"]
12
+ s.test_files = ["test/test_uris.rb", "test/xml.rdf", "test/spec/bnode.spec.rb", "test/spec/graph.spec.rb", "test/spec/literal.spec.rb", "test/spec/namespaces.spec.rb", "test/spec/parser.spec.rb", "test/spec/rexml_hacks.spec.rb", "test/spec/triple.spec.rb", "test/spec/uriref.spec.rb"]
13
+ #s.rdoc_options = ["--main", "README.txt"]
14
+ #s.extra_rdoc_files = ["History.txt", "Manifest.txt", "README.txt"]
15
+ s.add_dependency("addressable", ["> 0.0.1"])
16
+ end
@@ -0,0 +1,25 @@
1
+ require 'lib/rena'
2
+ describe "Blank nodes" do
3
+ it "should accept a custom identifier" do
4
+ b = BNode.new('foo')
5
+ b.identifier.should == "foo"
6
+ b.to_s.should == "foo"
7
+ end
8
+
9
+ it "should reject custom identifiers if they are not acceptable" do
10
+ b = BNode.new("4cake")
11
+ b.identifier.should_not == "4cake"
12
+ end
13
+
14
+ it "should be expressible in N3 and NT syntax" do
15
+ b = BNode.new('test')
16
+ b.to_n3.should == "_:test"
17
+ b.to_ntriples.should == b.to_n3
18
+ end
19
+
20
+ it "should be able to determine equality" do
21
+ a = BNode.new('a')
22
+ a2 = BNode.new('a')
23
+ a.eql?(a2).should == true
24
+ end
25
+ end
@@ -0,0 +1,108 @@
1
+ require 'lib/rena'
2
+
3
+ describe "Graphs" do
4
+ it "should allow you to add one or more triples" do
5
+ lambda do
6
+ f = Graph.new
7
+ f.add_triple(BNode.new, URIRef.new("http://xmlns.com/foaf/0.1/knows"), BNode.new)
8
+ end.should_not raise_error
9
+ end
10
+
11
+ it "should tell you how large the graph is" do
12
+ f = Graph.new
13
+ 5.times do
14
+ f.add_triple BNode.new, URIRef.new("http://xmlns.com/foaf/0.1/knows"), BNode.new
15
+ end
16
+ f.size.should == 5
17
+ end
18
+
19
+ it "should support << as an alias for add_triple" do
20
+ lambda do
21
+ f = Graph.new
22
+ f << Triple.new(BNode.new, URIRef.new("http://xmlns.com/foaf/0.1/knows"), BNode.new)
23
+ end.should_not raise_error
24
+ end
25
+
26
+ it "should output NTriple" do
27
+ f = Graph.new
28
+ ex = Namespace.new("http://example.org/", "ex")
29
+ foaf = Namespace.new("http://xmlns.com/foaf/0.1/", "foaf")
30
+ f << Triple.new(ex.john, foaf.knows, ex.jane)
31
+ f << Triple.new(ex.jane, foaf.knows, ex.rick)
32
+ f << Triple.new(ex.rick, foaf.knows, ex.john)
33
+ nt = "<http://example.org/john> <http://xmlns.com/foaf/0.1/knows> <http://example.org/jane> .\n<http://example.org/jane> <http://xmlns.com/foaf/0.1/knows> <http://example.org/rick> .\n<http://example.org/rick> <http://xmlns.com/foaf/0.1/knows> <http://example.org/john> .\n"
34
+ f.to_ntriples.should == nt
35
+ end
36
+
37
+ it "should allow iteration" do
38
+ f = Graph.new
39
+ ex = Namespace.new("http://example.org/", "ex")
40
+ foaf = Namespace.new("http://xmlns.com/foaf/0.1/", "foaf")
41
+ f << Triple.new(ex.john, foaf.knows, ex.jane)
42
+ f << Triple.new(ex.jane, foaf.knows, ex.rick)
43
+ f << Triple.new(ex.rick, foaf.knows, ex.john)
44
+ count = 0
45
+ f.each do |t|
46
+ count = count + 1
47
+ t.class.should == Triple
48
+ end
49
+ count.should == 3
50
+ end
51
+
52
+ it "should allow iteration over a particular subject" do
53
+ f = Graph.new
54
+ ex = Namespace.new("http://example.org/", "ex")
55
+ foaf = Namespace.new("http://xmlns.com/foaf/0.1/", "foaf")
56
+ f << Triple.new(ex.john, foaf.knows, ex.jane)
57
+ f << Triple.new(ex.jane, foaf.knows, ex.rick)
58
+ f << Triple.new(ex.rick, foaf.knows, ex.john)
59
+ count = 0
60
+ f.each_with_subject(ex.john) do |t|
61
+ count = count + 1
62
+ t.class.should == Triple
63
+ end
64
+ count.should == 1
65
+ end
66
+
67
+ it "should be able to determine whether or not it has existing BNodes" do
68
+ f = Graph.new
69
+ foaf = Namespace.new("http://xmlns.com/foaf/0.1/", "foaf")
70
+ f << Triple.new(BNode.new('john'), foaf.knows, BNode.new('jane'))
71
+ f.has_bnode_identifier?('john').should == true
72
+ f.has_bnode_identifier?('jane').should == true
73
+ f.has_bnode_identifier?('jack').should == false
74
+ end
75
+
76
+ it "should be able to return BNodes on demand" do
77
+ f = Graph.new
78
+ john = BNode.new('john')
79
+ jane = BNode.new('jane')
80
+ foaf = Namespace.new("http://xmlns.com/foaf/0.1/", "foaf")
81
+ f << Triple.new(john, foaf.knows, jane)
82
+ f.get_bnode_by_identifier('john').should == john
83
+ f.get_bnode_by_identifier('jane').should == jane
84
+ end
85
+
86
+ it "should allow you to create and bind Namespace objects on-the-fly" do
87
+ f = Graph.new
88
+ f.namespace("http://xmlns.com/foaf/0.1/", "foaf")
89
+ f.nsbinding["foaf"].uri.should == "http://xmlns.com/foaf/0.1/"
90
+ end
91
+
92
+ it "should not allow you to bind things other than namespaces" do
93
+ lambda do
94
+ f = Graph.new
95
+ f.bind(false)
96
+ end.should raise_error
97
+ end
98
+
99
+ it "should have an error log for parsing errors" do
100
+ pending "TODO: implement an error log at the graph level"
101
+ end
102
+
103
+ it "should follow the specification as to output identical triples" do
104
+ pending
105
+ end
106
+
107
+ it "should be able to integrate another graph"
108
+ end