tommorris-rena 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,74 +1,76 @@
1
1
  require 'rena/uriref'
2
2
  require 'rena/graph'
3
3
 
4
- class Namespace
5
- attr_accessor :short, :uri, :fragment
6
-
7
- ##
8
- # Creates a new namespace given a URI and the short name.
9
- #
10
- # ==== Example
11
- # Namespace.new("http://xmlns.com/foaf/0.1/", "foaf") # => returns a new Foaf namespace
12
- #
13
- # @param [String] uri the URI of the namespace
14
- # @param [String] short the short name of the namespace
15
- # @param [Boolean] fragment are the identifiers on this resource fragment identifiers? (e.g. '#') Defaults to false.
16
- #
17
- # ==== Returns
18
- # @return [Namespace] The newly created namespace.
19
- #
20
- # @raise [Error] Checks validity of the desired shortname and raises if it is incorrect.
21
- # @author Tom Morris, Pius Uzamere
22
-
23
- def initialize(uri, short, fragment = false)
24
- @uri = uri
25
- @fragment = fragment
26
- if shortname_valid?(short)
27
- @short = short
28
- else
29
- raise
4
+ module Rena
5
+ class Namespace
6
+ attr_accessor :short, :uri, :fragment
7
+
8
+ ##
9
+ # Creates a new namespace given a URI and the short name.
10
+ #
11
+ # ==== Example
12
+ # Namespace.new("http://xmlns.com/foaf/0.1/", "foaf") # => returns a new Foaf namespace
13
+ #
14
+ # @param [String] uri the URI of the namespace
15
+ # @param [String] short the short name of the namespace
16
+ # @param [Boolean] fragment are the identifiers on this resource fragment identifiers? (e.g. '#') Defaults to false.
17
+ #
18
+ # ==== Returns
19
+ # @return [Namespace] The newly created namespace.
20
+ #
21
+ # @raise [Error] Checks validity of the desired shortname and raises if it is incorrect.
22
+ # @author Tom Morris, Pius Uzamere
23
+
24
+ def initialize(uri, short, fragment = false)
25
+ @uri = uri
26
+ @fragment = fragment
27
+ if shortname_valid?(short)
28
+ @short = short
29
+ else
30
+ raise
31
+ end
30
32
  end
31
- end
32
-
33
- ##
34
- # Allows the construction of arbitrary URIs on the namespace.
35
- #
36
- # ==== Example
37
- # foaf = Namespace.new("http://xmlns.com/foaf/0.1/", "foaf"); foaf.knows # => returns a new URIRef with URI "http://xmlns.com/foaf/0.1/knows"
38
- # foaf = Namespace.new("http://xmlns.com/foaf/0.1/", "foaf", true); foaf.knows # => returns a new URIRef with URI "http://xmlns.com/foaf/0.1/#knows"
39
- #
40
- # @param [String] uri the URI of the namespace
41
- # @param [String] short the short name of the namespace
42
- # @param [Boolean] fragment are the identifiers on this resource fragment identifiers? (e.g. '#') Defaults to false.
43
- #
44
- # ==== Returns
45
- # @return [URIRef] The newly created URIRegerence.
46
- #
47
- # @raise [Error] Checks validity of the desired shortname and raises if it is incorrect.
48
- # @author Tom Morris, Pius Uzamere
49
-
50
- def method_missing(methodname, *args)
51
- unless fragment
52
- URIRef.new(@uri + methodname.to_s)
53
- else
54
- URIRef.new(@uri + '#' + methodname.to_s)
33
+
34
+ ##
35
+ # Allows the construction of arbitrary URIs on the namespace.
36
+ #
37
+ # ==== Example
38
+ # foaf = Namespace.new("http://xmlns.com/foaf/0.1/", "foaf"); foaf.knows # => returns a new URIRef with URI "http://xmlns.com/foaf/0.1/knows"
39
+ # foaf = Namespace.new("http://xmlns.com/foaf/0.1/", "foaf", true); foaf.knows # => returns a new URIRef with URI "http://xmlns.com/foaf/0.1/#knows"
40
+ #
41
+ # @param [String] uri the URI of the namespace
42
+ # @param [String] short the short name of the namespace
43
+ # @param [Boolean] fragment are the identifiers on this resource fragment identifiers? (e.g. '#') Defaults to false.
44
+ #
45
+ # ==== Returns
46
+ # @return [URIRef] The newly created URIRegerence.
47
+ #
48
+ # @raise [Error] Checks validity of the desired shortname and raises if it is incorrect.
49
+ # @author Tom Morris, Pius Uzamere
50
+
51
+ def method_missing(methodname, *args)
52
+ unless fragment
53
+ URIRef.new(@uri + methodname.to_s)
54
+ else
55
+ URIRef.new(@uri + '#' + methodname.to_s)
56
+ end
55
57
  end
56
- end
57
-
58
- def bind(graph)
59
- if graph.class == Graph
60
- graph.bind(self)
61
- else
62
- raise
58
+
59
+ def bind(graph)
60
+ if graph.class == Graph
61
+ graph.bind(self)
62
+ else
63
+ raise
64
+ end
63
65
  end
64
- end
65
-
66
- private
67
- def shortname_valid?(shortname)
68
- if shortname =~ /[a-zA-Z_][a-zA-Z0-9_]+/
69
- return true
70
- else
71
- return false
66
+
67
+ private
68
+ def shortname_valid?(shortname)
69
+ if shortname =~ /[a-zA-Z_][a-zA-Z0-9_]+/
70
+ return true
71
+ else
72
+ return false
73
+ end
72
74
  end
73
75
  end
74
- end
76
+ end
@@ -5,178 +5,184 @@ require 'rena/exceptions/uri_relative_exception'
5
5
  require 'rena/exceptions/about_each_exception'
6
6
  require 'rena/rexml_hacks'
7
7
 
8
- class RdfXmlParser
9
- attr_accessor :xml, :graph
10
- def initialize (xml_str, uri = nil)
11
- @excl = ["http://www.w3.org/1999/02/22-rdf-syntax-ns#resource", "http://www.w3.org/1999/02/22-rdf-syntax-ns#nodeID", "http://www.w3.org/1999/02/22-rdf-syntax-ns#about", "http://www.w3.org/1999/02/22-rdf-syntax-ns#ID"]
12
- if uri != nil
13
- @uri = Addressable::URI.parse(uri)
14
- end
15
- @xml = REXML::Document.new(xml_str)
16
- # self.iterator @xml.root.children
17
- if self.is_rdf?
18
- @graph = Graph.new
19
- @xml.root.each_element { |e|
20
- self.parse_element e
21
- }
22
- # puts @graph.size
23
- end
24
- end
25
-
26
- def is_rdf?
27
- trigger = false
28
- @xml.each_element do |e|
29
- if e.namespaces.has_value? "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
30
- trigger = true
8
+
9
+ module Rena
10
+ class RdfXmlParser
11
+ SYNTAX_BASE = "http://www.w3.org/1999/02/22-rdf-syntax-ns"
12
+ RDF_TYPE = SYNTAX_BASE + "#type"
13
+ RDF_DESCRIPTION = SYNTAX_BASE + "#Description"
14
+
15
+ attr_accessor :xml, :graph
16
+ def initialize(xml_str, uri = nil)
17
+ @excl = ["http://www.w3.org/1999/02/22-rdf-syntax-ns#resource",
18
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#nodeID",
19
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#about",
20
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#ID"]
21
+ if uri != nil
22
+ @uri = Addressable::URI.parse(uri)
23
+ end
24
+ @xml = REXML::Document.new(xml_str)
25
+ # self.iterator @xml.root.children
26
+ if self.is_rdf?
27
+ @graph = Graph.new
28
+
29
+ @xml.root.each_element { |e|
30
+ self.parse_element e
31
+ }
32
+ # puts @graph.size
31
33
  end
32
34
  end
33
- return trigger
34
- end
35
-
36
- protected
37
- def get_uri_from_atts (element, aboutmode = false)
38
- if aboutmode == false
39
- resourceuri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#resource"
40
- else
41
- resourceuri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#about"
42
- end
43
-
44
- subject = nil
45
- element.attributes.each_attribute { |att|
46
- uri = att.namespace + att.name
47
- value = att.to_s
48
- if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEach"
49
- raise AboutEachException, "Failed as per RDFMS-AboutEach-Error001.rdf test from 2004 test suite"
35
+
36
+ def is_rdf?
37
+ @xml.each_element do |e|
38
+ if e.namespaces.has_value? "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
39
+ return true
40
+ end
50
41
  end
51
- if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEachPrefix"
52
- raise AboutEachException, "Failed as per RDFMS-AboutEach-Error002.rdf test from 2004 test suite"
42
+ return false
43
+ end
44
+
45
+ protected
46
+ def get_uri_from_atts (element, aboutmode = false)
47
+ if aboutmode == false
48
+ resourceuri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#resource"
49
+ else
50
+ resourceuri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#about"
53
51
  end
54
- if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#bagID"
55
- raise
56
- if name =~ /^[a-zA-Z_][a-zA-Z0-9]*$/
57
- # TODO: do something intelligent with the bagID
58
- else
59
- raise
52
+
53
+ subject = nil
54
+ element.attributes.each_attribute { |att|
55
+ uri = att.namespace + att.name
56
+ value = att.to_s
57
+ if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEach"
58
+ raise AboutEachException, "Failed as per RDFMS-AboutEach-Error001.rdf test from 2004 test suite"
60
59
  end
61
- end
62
-
63
- if uri == resourceuri #specified resource
64
- begin
65
- possible_subject = URIRef.new(value)
66
- rescue UriRelativeException
67
- if value[0..0].to_s != "#"
68
- value = "#" + value
69
- end
70
- begin
71
- value = URIRef.new(element.base + value)
72
- rescue UriRelativeException
73
- # still not a URI
74
- raise
60
+ if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEachPrefix"
61
+ raise AboutEachException, "Failed as per RDFMS-AboutEach-Error002.rdf test from 2004 test suite"
62
+ end
63
+ if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#bagID"
64
+ raise
65
+ if name =~ /^[a-zA-Z_][a-zA-Z0-9]*$/
66
+ # TODO: do something intelligent with the bagID
75
67
  else
76
- subject = value
68
+ raise
77
69
  end
78
- else
79
- subject = possible_subject
80
- break
81
70
  end
82
- end
83
-
84
- if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#nodeID" #BNode with ID
85
- # we have a BNode with an identifier. First, we need to do syntax checking.
86
- if value =~ /^[a-zA-Z_][a-zA-Z0-9]*$/
87
- # now we check to see if the graph has the value
88
- if @graph.has_bnode_identifier?(value)
89
- # if so, pull it in - no need to recreate objects.
90
- subject = @graph.get_bnode_by_identifier(value)
91
- else
92
- # if not, create a new one.
93
- subject = BNode.new(value)
71
+
72
+ if uri == resourceuri #specified resource
73
+ element_uri = Addressable::URI.parse(value)
74
+ if (element_uri.relative?)
75
+ # we have an element with a relative URI
76
+ if (element.base?)
77
+ # the element has a base URI, use that to build the URI
78
+ value = "##{value}" if (value[0..0].to_s != "#")
79
+ value = "#{element.base}#{value}"
80
+ elsif (!@uri.nil?)
81
+ # we can use the document URI to build the URI for the element
82
+ value = @uri + element_uri
83
+ end
94
84
  end
85
+ subject = URIRef.new(value)
95
86
  end
96
- end
97
-
98
- if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#ID"
99
- begin
100
- # check for base
101
- if att.element.base?
102
- subject = att.element.base.to_s + value
103
- elsif @uri != nil
104
- compound = @uri.to_s + "#" + value
105
- subject = compound.to_s
106
- else
107
- raise "Needs to have an ID"
87
+
88
+ if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#nodeID" #BNode with ID
89
+ # we have a BNode with an identifier. First, we need to do syntax checking.
90
+ if value =~ /^[a-zA-Z_][a-zA-Z0-9]*$/
91
+ # now we check to see if the graph has the value
92
+ if @graph.has_bnode_identifier?(value)
93
+ # if so, pull it in - no need to recreate objects.
94
+ subject = @graph.get_bnode_by_identifier(value)
95
+ else
96
+ # if not, create a new one.
97
+ subject = BNode.new(value)
98
+ end
99
+ end
100
+ end
101
+
102
+ if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#ID"
103
+ begin
104
+ # check for base
105
+ if att.element.base?
106
+ subject = att.element.base.to_s + value
107
+ elsif @uri != nil
108
+ compound = @uri.to_s + "#" + value
109
+ subject = compound.to_s
110
+ else
111
+ raise "Needs to have an ID"
112
+ end
113
+ # rescue UriRelativeException
108
114
  end
109
- # rescue UriRelativeException
110
115
  end
111
- end
112
116
 
113
- # add other subject detection subroutines here
114
- }
115
- if subject.class == NilClass
116
- subject = BNode.new
117
- end
118
- return subject
119
- end
120
-
121
- protected
122
- def parse_element (element, subject = nil, resource = false)
123
- if subject == nil
124
- # figure out subject
125
- subject = self.get_uri_from_atts(element, true)
117
+ # add other subject detection subroutines here
118
+ }
119
+ if subject.class == NilClass
120
+ subject = BNode.new
121
+ end
122
+ return subject
126
123
  end
127
124
 
128
- # type parsing
129
- if resource == true
130
- type = URIRef.new(element.namespace + element.name)
131
- unless type.to_s == "http://www.w3.org/1999/02/22-rdf-syntax-ns#Description"
132
- @graph.add_triple(subject, URIRef.new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), type)
125
+ protected
126
+
127
+ def parse_element (element, subject = nil, resource = false)
128
+ if subject == nil
129
+ # figure out subject
130
+ subject = self.get_uri_from_atts(element, true)
133
131
  end
134
- end
135
-
136
- # attribute parsing
137
- element.attributes.each_attribute { |att|
138
- uri = att.namespace + att.name
139
- value = att.to_s
140
132
 
141
- unless @excl.member? uri
142
- @graph.add_triple(subject, URIRef.new(uri), Literal.new(value))
133
+ # type parsing
134
+ if (resource == true or element.attributes.has_key? 'about')
135
+ type = URIRef.new(element.namespace + element.name)
136
+ unless type.to_s == RDF_TYPE
137
+ @graph.add_triple(subject, RDF_DESCRIPTION, type)
138
+ end
143
139
  end
144
- }
140
+
141
+ # attribute parsing
142
+ element.attributes.each_attribute { |att|
143
+ uri = att.namespace + att.name
144
+ value = att.to_s
145
+
146
+ unless @excl.member? uri
147
+ @graph.add_triple(subject, uri, Literal.untyped(value))
148
+ end
149
+ }
145
150
 
146
- # element parsing
147
- element.each_element { |e|
148
- self.parse_resource_element e, subject
149
- }
150
- end
151
-
152
- def parse_resource_element e, subject
153
- uri = e.namespace + e.name
154
- if e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "parseType").to_s == "Literal"
155
- @graph.add_triple(subject, URIRef.new(uri), TypedLiteral.new(e.children.to_s.strip, "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"))
156
- elsif e.has_elements?
157
- # subparsing
158
- e.each_element { |se| #se = 'striped element'
159
- if e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "parseType").to_s == "Resource"
160
- object = BNode.new
151
+ # element parsing
152
+ element.each_element { |e|
153
+ self.parse_resource_element e, subject
154
+ }
155
+ end
156
+
157
+ def parse_resource_element e, subject
158
+ uri = e.namespace + e.name
159
+ if e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "parseType").to_s == "Literal"
160
+ @graph.add_triple(subject, uri, Literal.typed(e.children.to_s.strip, "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"))
161
+ elsif e.has_elements?
162
+ # subparsing
163
+ e.each_element { |se| #se = 'striped element'
164
+ if e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "parseType").to_s == "Resource"
165
+ object = BNode.new
166
+ else
167
+ object = self.get_uri_from_atts(se, true)
168
+ end
169
+ @graph.add_triple(subject, uri, object)
170
+ self.parse_element(se, object, true)
171
+ }
172
+ elsif e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "datatype")
173
+ @graph.add_triple(subject, uri, Literal.typed(e.text, e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "datatype").to_s.strip))
174
+ elsif e.has_attributes?
175
+ # get object out
176
+ object = self.get_uri_from_atts(e)
177
+ @graph.add_triple(subject, uri, object)
178
+ elsif e.has_text?
179
+ if e.lang?
180
+ @graph.add_triple(subject, uri, Literal.untyped(e.text, e.lang))
161
181
  else
162
- object = self.get_uri_from_atts(se, true)
182
+ @graph.add_triple(subject, uri, Literal.untyped(e.text))
163
183
  end
164
- @graph.add_triple(subject, URIRef.new(uri), object)
165
- self.parse_element(se, object, true)
166
- }
167
- elsif e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "datatype")
168
- @graph.add_triple(subject, URIRef.new(uri), TypedLiteral.new(e.text, e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "datatype").to_s.strip))
169
- elsif e.has_attributes?
170
- # get object out
171
- object = self.get_uri_from_atts(e)
172
- @graph.add_triple(subject, URIRef.new(uri), object)
173
- elsif e.has_text?
174
- if e.lang?
175
- @graph.add_triple(subject, URIRef.new(uri), Literal.new(e.text, e.lang))
176
- else
177
- @graph.add_triple(subject, URIRef.new(uri), Literal.new(e.text))
178
184
  end
179
185
  end
186
+
180
187
  end
181
-
182
188
  end