tommorris-rena 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +13 -9
- data/lib/rena/bnode.rb +65 -59
- data/lib/rena/graph.rb +173 -135
- data/lib/rena/literal.rb +198 -80
- data/lib/rena/n3_grammar.treetop +129 -0
- data/lib/rena/n3parser.rb +145 -0
- data/lib/rena/namespace.rb +68 -66
- data/lib/rena/rdfxmlparser.rb +156 -150
- data/lib/rena/rexml_hacks.rb +36 -9
- data/lib/rena/triple.rb +80 -63
- data/lib/rena/uriref.rb +41 -27
- data/rena.gemspec +8 -7
- data/{test/spec/bnode.spec.rb → spec/bnode_spec.rb} +5 -1
- data/{test/spec/graph.spec.rb → spec/graph_spec.rb} +26 -7
- data/spec/literal_spec.rb +136 -0
- data/{test/spec/namespaces.spec.rb → spec/namespaces_spec.rb} +0 -0
- data/{test/spec/parser.spec.rb → spec/parser_spec.rb} +27 -1
- data/{test/spec/rexml_hacks.spec.rb → spec/rexml_hacks_spec.rb} +3 -4
- data/spec/triple_spec.rb +100 -0
- data/{test/spec/uriref.spec.rb → spec/uriref_spec.rb} +14 -1
- metadata +24 -12
- data/test/spec/literal.spec.rb +0 -112
- data/test/spec/triple.spec.rb +0 -32
data/lib/rena/namespace.rb
CHANGED
@@ -1,74 +1,76 @@
|
|
1
1
|
require 'rena/uriref'
|
2
2
|
require 'rena/graph'
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
4
|
+
module Rena
|
5
|
+
class Namespace
|
6
|
+
attr_accessor :short, :uri, :fragment
|
7
|
+
|
8
|
+
##
|
9
|
+
# Creates a new namespace given a URI and the short name.
|
10
|
+
#
|
11
|
+
# ==== Example
|
12
|
+
# Namespace.new("http://xmlns.com/foaf/0.1/", "foaf") # => returns a new Foaf namespace
|
13
|
+
#
|
14
|
+
# @param [String] uri the URI of the namespace
|
15
|
+
# @param [String] short the short name of the namespace
|
16
|
+
# @param [Boolean] fragment are the identifiers on this resource fragment identifiers? (e.g. '#') Defaults to false.
|
17
|
+
#
|
18
|
+
# ==== Returns
|
19
|
+
# @return [Namespace] The newly created namespace.
|
20
|
+
#
|
21
|
+
# @raise [Error] Checks validity of the desired shortname and raises if it is incorrect.
|
22
|
+
# @author Tom Morris, Pius Uzamere
|
23
|
+
|
24
|
+
def initialize(uri, short, fragment = false)
|
25
|
+
@uri = uri
|
26
|
+
@fragment = fragment
|
27
|
+
if shortname_valid?(short)
|
28
|
+
@short = short
|
29
|
+
else
|
30
|
+
raise
|
31
|
+
end
|
30
32
|
end
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
33
|
+
|
34
|
+
##
|
35
|
+
# Allows the construction of arbitrary URIs on the namespace.
|
36
|
+
#
|
37
|
+
# ==== Example
|
38
|
+
# foaf = Namespace.new("http://xmlns.com/foaf/0.1/", "foaf"); foaf.knows # => returns a new URIRef with URI "http://xmlns.com/foaf/0.1/knows"
|
39
|
+
# foaf = Namespace.new("http://xmlns.com/foaf/0.1/", "foaf", true); foaf.knows # => returns a new URIRef with URI "http://xmlns.com/foaf/0.1/#knows"
|
40
|
+
#
|
41
|
+
# @param [String] uri the URI of the namespace
|
42
|
+
# @param [String] short the short name of the namespace
|
43
|
+
# @param [Boolean] fragment are the identifiers on this resource fragment identifiers? (e.g. '#') Defaults to false.
|
44
|
+
#
|
45
|
+
# ==== Returns
|
46
|
+
# @return [URIRef] The newly created URIRegerence.
|
47
|
+
#
|
48
|
+
# @raise [Error] Checks validity of the desired shortname and raises if it is incorrect.
|
49
|
+
# @author Tom Morris, Pius Uzamere
|
50
|
+
|
51
|
+
def method_missing(methodname, *args)
|
52
|
+
unless fragment
|
53
|
+
URIRef.new(@uri + methodname.to_s)
|
54
|
+
else
|
55
|
+
URIRef.new(@uri + '#' + methodname.to_s)
|
56
|
+
end
|
55
57
|
end
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
58
|
+
|
59
|
+
def bind(graph)
|
60
|
+
if graph.class == Graph
|
61
|
+
graph.bind(self)
|
62
|
+
else
|
63
|
+
raise
|
64
|
+
end
|
63
65
|
end
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
66
|
+
|
67
|
+
private
|
68
|
+
def shortname_valid?(shortname)
|
69
|
+
if shortname =~ /[a-zA-Z_][a-zA-Z0-9_]+/
|
70
|
+
return true
|
71
|
+
else
|
72
|
+
return false
|
73
|
+
end
|
72
74
|
end
|
73
75
|
end
|
74
|
-
end
|
76
|
+
end
|
data/lib/rena/rdfxmlparser.rb
CHANGED
@@ -5,178 +5,184 @@ require 'rena/exceptions/uri_relative_exception'
|
|
5
5
|
require 'rena/exceptions/about_each_exception'
|
6
6
|
require 'rena/rexml_hacks'
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
8
|
+
|
9
|
+
module Rena
|
10
|
+
class RdfXmlParser
|
11
|
+
SYNTAX_BASE = "http://www.w3.org/1999/02/22-rdf-syntax-ns"
|
12
|
+
RDF_TYPE = SYNTAX_BASE + "#type"
|
13
|
+
RDF_DESCRIPTION = SYNTAX_BASE + "#Description"
|
14
|
+
|
15
|
+
attr_accessor :xml, :graph
|
16
|
+
def initialize(xml_str, uri = nil)
|
17
|
+
@excl = ["http://www.w3.org/1999/02/22-rdf-syntax-ns#resource",
|
18
|
+
"http://www.w3.org/1999/02/22-rdf-syntax-ns#nodeID",
|
19
|
+
"http://www.w3.org/1999/02/22-rdf-syntax-ns#about",
|
20
|
+
"http://www.w3.org/1999/02/22-rdf-syntax-ns#ID"]
|
21
|
+
if uri != nil
|
22
|
+
@uri = Addressable::URI.parse(uri)
|
23
|
+
end
|
24
|
+
@xml = REXML::Document.new(xml_str)
|
25
|
+
# self.iterator @xml.root.children
|
26
|
+
if self.is_rdf?
|
27
|
+
@graph = Graph.new
|
28
|
+
|
29
|
+
@xml.root.each_element { |e|
|
30
|
+
self.parse_element e
|
31
|
+
}
|
32
|
+
# puts @graph.size
|
31
33
|
end
|
32
34
|
end
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
resourceuri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#resource"
|
40
|
-
else
|
41
|
-
resourceuri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#about"
|
42
|
-
end
|
43
|
-
|
44
|
-
subject = nil
|
45
|
-
element.attributes.each_attribute { |att|
|
46
|
-
uri = att.namespace + att.name
|
47
|
-
value = att.to_s
|
48
|
-
if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEach"
|
49
|
-
raise AboutEachException, "Failed as per RDFMS-AboutEach-Error001.rdf test from 2004 test suite"
|
35
|
+
|
36
|
+
def is_rdf?
|
37
|
+
@xml.each_element do |e|
|
38
|
+
if e.namespaces.has_value? "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
39
|
+
return true
|
40
|
+
end
|
50
41
|
end
|
51
|
-
|
52
|
-
|
42
|
+
return false
|
43
|
+
end
|
44
|
+
|
45
|
+
protected
|
46
|
+
def get_uri_from_atts (element, aboutmode = false)
|
47
|
+
if aboutmode == false
|
48
|
+
resourceuri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#resource"
|
49
|
+
else
|
50
|
+
resourceuri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#about"
|
53
51
|
end
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
52
|
+
|
53
|
+
subject = nil
|
54
|
+
element.attributes.each_attribute { |att|
|
55
|
+
uri = att.namespace + att.name
|
56
|
+
value = att.to_s
|
57
|
+
if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEach"
|
58
|
+
raise AboutEachException, "Failed as per RDFMS-AboutEach-Error001.rdf test from 2004 test suite"
|
60
59
|
end
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
value = "#" + value
|
69
|
-
end
|
70
|
-
begin
|
71
|
-
value = URIRef.new(element.base + value)
|
72
|
-
rescue UriRelativeException
|
73
|
-
# still not a URI
|
74
|
-
raise
|
60
|
+
if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEachPrefix"
|
61
|
+
raise AboutEachException, "Failed as per RDFMS-AboutEach-Error002.rdf test from 2004 test suite"
|
62
|
+
end
|
63
|
+
if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#bagID"
|
64
|
+
raise
|
65
|
+
if name =~ /^[a-zA-Z_][a-zA-Z0-9]*$/
|
66
|
+
# TODO: do something intelligent with the bagID
|
75
67
|
else
|
76
|
-
|
68
|
+
raise
|
77
69
|
end
|
78
|
-
else
|
79
|
-
subject = possible_subject
|
80
|
-
break
|
81
70
|
end
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
71
|
+
|
72
|
+
if uri == resourceuri #specified resource
|
73
|
+
element_uri = Addressable::URI.parse(value)
|
74
|
+
if (element_uri.relative?)
|
75
|
+
# we have an element with a relative URI
|
76
|
+
if (element.base?)
|
77
|
+
# the element has a base URI, use that to build the URI
|
78
|
+
value = "##{value}" if (value[0..0].to_s != "#")
|
79
|
+
value = "#{element.base}#{value}"
|
80
|
+
elsif (!@uri.nil?)
|
81
|
+
# we can use the document URI to build the URI for the element
|
82
|
+
value = @uri + element_uri
|
83
|
+
end
|
94
84
|
end
|
85
|
+
subject = URIRef.new(value)
|
95
86
|
end
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
87
|
+
|
88
|
+
if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#nodeID" #BNode with ID
|
89
|
+
# we have a BNode with an identifier. First, we need to do syntax checking.
|
90
|
+
if value =~ /^[a-zA-Z_][a-zA-Z0-9]*$/
|
91
|
+
# now we check to see if the graph has the value
|
92
|
+
if @graph.has_bnode_identifier?(value)
|
93
|
+
# if so, pull it in - no need to recreate objects.
|
94
|
+
subject = @graph.get_bnode_by_identifier(value)
|
95
|
+
else
|
96
|
+
# if not, create a new one.
|
97
|
+
subject = BNode.new(value)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#ID"
|
103
|
+
begin
|
104
|
+
# check for base
|
105
|
+
if att.element.base?
|
106
|
+
subject = att.element.base.to_s + value
|
107
|
+
elsif @uri != nil
|
108
|
+
compound = @uri.to_s + "#" + value
|
109
|
+
subject = compound.to_s
|
110
|
+
else
|
111
|
+
raise "Needs to have an ID"
|
112
|
+
end
|
113
|
+
# rescue UriRelativeException
|
108
114
|
end
|
109
|
-
# rescue UriRelativeException
|
110
115
|
end
|
111
|
-
end
|
112
116
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
end
|
120
|
-
|
121
|
-
protected
|
122
|
-
def parse_element (element, subject = nil, resource = false)
|
123
|
-
if subject == nil
|
124
|
-
# figure out subject
|
125
|
-
subject = self.get_uri_from_atts(element, true)
|
117
|
+
# add other subject detection subroutines here
|
118
|
+
}
|
119
|
+
if subject.class == NilClass
|
120
|
+
subject = BNode.new
|
121
|
+
end
|
122
|
+
return subject
|
126
123
|
end
|
127
124
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
125
|
+
protected
|
126
|
+
|
127
|
+
def parse_element (element, subject = nil, resource = false)
|
128
|
+
if subject == nil
|
129
|
+
# figure out subject
|
130
|
+
subject = self.get_uri_from_atts(element, true)
|
133
131
|
end
|
134
|
-
end
|
135
|
-
|
136
|
-
# attribute parsing
|
137
|
-
element.attributes.each_attribute { |att|
|
138
|
-
uri = att.namespace + att.name
|
139
|
-
value = att.to_s
|
140
132
|
|
141
|
-
|
142
|
-
|
133
|
+
# type parsing
|
134
|
+
if (resource == true or element.attributes.has_key? 'about')
|
135
|
+
type = URIRef.new(element.namespace + element.name)
|
136
|
+
unless type.to_s == RDF_TYPE
|
137
|
+
@graph.add_triple(subject, RDF_DESCRIPTION, type)
|
138
|
+
end
|
143
139
|
end
|
144
|
-
|
140
|
+
|
141
|
+
# attribute parsing
|
142
|
+
element.attributes.each_attribute { |att|
|
143
|
+
uri = att.namespace + att.name
|
144
|
+
value = att.to_s
|
145
|
+
|
146
|
+
unless @excl.member? uri
|
147
|
+
@graph.add_triple(subject, uri, Literal.untyped(value))
|
148
|
+
end
|
149
|
+
}
|
145
150
|
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
151
|
+
# element parsing
|
152
|
+
element.each_element { |e|
|
153
|
+
self.parse_resource_element e, subject
|
154
|
+
}
|
155
|
+
end
|
156
|
+
|
157
|
+
def parse_resource_element e, subject
|
158
|
+
uri = e.namespace + e.name
|
159
|
+
if e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "parseType").to_s == "Literal"
|
160
|
+
@graph.add_triple(subject, uri, Literal.typed(e.children.to_s.strip, "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"))
|
161
|
+
elsif e.has_elements?
|
162
|
+
# subparsing
|
163
|
+
e.each_element { |se| #se = 'striped element'
|
164
|
+
if e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "parseType").to_s == "Resource"
|
165
|
+
object = BNode.new
|
166
|
+
else
|
167
|
+
object = self.get_uri_from_atts(se, true)
|
168
|
+
end
|
169
|
+
@graph.add_triple(subject, uri, object)
|
170
|
+
self.parse_element(se, object, true)
|
171
|
+
}
|
172
|
+
elsif e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "datatype")
|
173
|
+
@graph.add_triple(subject, uri, Literal.typed(e.text, e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "datatype").to_s.strip))
|
174
|
+
elsif e.has_attributes?
|
175
|
+
# get object out
|
176
|
+
object = self.get_uri_from_atts(e)
|
177
|
+
@graph.add_triple(subject, uri, object)
|
178
|
+
elsif e.has_text?
|
179
|
+
if e.lang?
|
180
|
+
@graph.add_triple(subject, uri, Literal.untyped(e.text, e.lang))
|
161
181
|
else
|
162
|
-
|
182
|
+
@graph.add_triple(subject, uri, Literal.untyped(e.text))
|
163
183
|
end
|
164
|
-
@graph.add_triple(subject, URIRef.new(uri), object)
|
165
|
-
self.parse_element(se, object, true)
|
166
|
-
}
|
167
|
-
elsif e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "datatype")
|
168
|
-
@graph.add_triple(subject, URIRef.new(uri), TypedLiteral.new(e.text, e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "datatype").to_s.strip))
|
169
|
-
elsif e.has_attributes?
|
170
|
-
# get object out
|
171
|
-
object = self.get_uri_from_atts(e)
|
172
|
-
@graph.add_triple(subject, URIRef.new(uri), object)
|
173
|
-
elsif e.has_text?
|
174
|
-
if e.lang?
|
175
|
-
@graph.add_triple(subject, URIRef.new(uri), Literal.new(e.text, e.lang))
|
176
|
-
else
|
177
|
-
@graph.add_triple(subject, URIRef.new(uri), Literal.new(e.text))
|
178
184
|
end
|
179
185
|
end
|
186
|
+
|
180
187
|
end
|
181
|
-
|
182
188
|
end
|