tommorris-rena 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +13 -9
- data/lib/rena/bnode.rb +65 -59
- data/lib/rena/graph.rb +173 -135
- data/lib/rena/literal.rb +198 -80
- data/lib/rena/n3_grammar.treetop +129 -0
- data/lib/rena/n3parser.rb +145 -0
- data/lib/rena/namespace.rb +68 -66
- data/lib/rena/rdfxmlparser.rb +156 -150
- data/lib/rena/rexml_hacks.rb +36 -9
- data/lib/rena/triple.rb +80 -63
- data/lib/rena/uriref.rb +41 -27
- data/rena.gemspec +8 -7
- data/{test/spec/bnode.spec.rb → spec/bnode_spec.rb} +5 -1
- data/{test/spec/graph.spec.rb → spec/graph_spec.rb} +26 -7
- data/spec/literal_spec.rb +136 -0
- data/{test/spec/namespaces.spec.rb → spec/namespaces_spec.rb} +0 -0
- data/{test/spec/parser.spec.rb → spec/parser_spec.rb} +27 -1
- data/{test/spec/rexml_hacks.spec.rb → spec/rexml_hacks_spec.rb} +3 -4
- data/spec/triple_spec.rb +100 -0
- data/{test/spec/uriref.spec.rb → spec/uriref_spec.rb} +14 -1
- metadata +24 -12
- data/test/spec/literal.spec.rb +0 -112
- data/test/spec/triple.spec.rb +0 -32
data/lib/rena/namespace.rb
CHANGED
@@ -1,74 +1,76 @@
|
|
1
1
|
require 'rena/uriref'
|
2
2
|
require 'rena/graph'
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
4
|
+
module Rena
|
5
|
+
class Namespace
|
6
|
+
attr_accessor :short, :uri, :fragment
|
7
|
+
|
8
|
+
##
|
9
|
+
# Creates a new namespace given a URI and the short name.
|
10
|
+
#
|
11
|
+
# ==== Example
|
12
|
+
# Namespace.new("http://xmlns.com/foaf/0.1/", "foaf") # => returns a new Foaf namespace
|
13
|
+
#
|
14
|
+
# @param [String] uri the URI of the namespace
|
15
|
+
# @param [String] short the short name of the namespace
|
16
|
+
# @param [Boolean] fragment are the identifiers on this resource fragment identifiers? (e.g. '#') Defaults to false.
|
17
|
+
#
|
18
|
+
# ==== Returns
|
19
|
+
# @return [Namespace] The newly created namespace.
|
20
|
+
#
|
21
|
+
# @raise [Error] Checks validity of the desired shortname and raises if it is incorrect.
|
22
|
+
# @author Tom Morris, Pius Uzamere
|
23
|
+
|
24
|
+
def initialize(uri, short, fragment = false)
|
25
|
+
@uri = uri
|
26
|
+
@fragment = fragment
|
27
|
+
if shortname_valid?(short)
|
28
|
+
@short = short
|
29
|
+
else
|
30
|
+
raise
|
31
|
+
end
|
30
32
|
end
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
33
|
+
|
34
|
+
##
|
35
|
+
# Allows the construction of arbitrary URIs on the namespace.
|
36
|
+
#
|
37
|
+
# ==== Example
|
38
|
+
# foaf = Namespace.new("http://xmlns.com/foaf/0.1/", "foaf"); foaf.knows # => returns a new URIRef with URI "http://xmlns.com/foaf/0.1/knows"
|
39
|
+
# foaf = Namespace.new("http://xmlns.com/foaf/0.1/", "foaf", true); foaf.knows # => returns a new URIRef with URI "http://xmlns.com/foaf/0.1/#knows"
|
40
|
+
#
|
41
|
+
# @param [String] uri the URI of the namespace
|
42
|
+
# @param [String] short the short name of the namespace
|
43
|
+
# @param [Boolean] fragment are the identifiers on this resource fragment identifiers? (e.g. '#') Defaults to false.
|
44
|
+
#
|
45
|
+
# ==== Returns
|
46
|
+
# @return [URIRef] The newly created URIRegerence.
|
47
|
+
#
|
48
|
+
# @raise [Error] Checks validity of the desired shortname and raises if it is incorrect.
|
49
|
+
# @author Tom Morris, Pius Uzamere
|
50
|
+
|
51
|
+
def method_missing(methodname, *args)
|
52
|
+
unless fragment
|
53
|
+
URIRef.new(@uri + methodname.to_s)
|
54
|
+
else
|
55
|
+
URIRef.new(@uri + '#' + methodname.to_s)
|
56
|
+
end
|
55
57
|
end
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
58
|
+
|
59
|
+
def bind(graph)
|
60
|
+
if graph.class == Graph
|
61
|
+
graph.bind(self)
|
62
|
+
else
|
63
|
+
raise
|
64
|
+
end
|
63
65
|
end
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
66
|
+
|
67
|
+
private
|
68
|
+
def shortname_valid?(shortname)
|
69
|
+
if shortname =~ /[a-zA-Z_][a-zA-Z0-9_]+/
|
70
|
+
return true
|
71
|
+
else
|
72
|
+
return false
|
73
|
+
end
|
72
74
|
end
|
73
75
|
end
|
74
|
-
end
|
76
|
+
end
|
data/lib/rena/rdfxmlparser.rb
CHANGED
@@ -5,178 +5,184 @@ require 'rena/exceptions/uri_relative_exception'
|
|
5
5
|
require 'rena/exceptions/about_each_exception'
|
6
6
|
require 'rena/rexml_hacks'
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
8
|
+
|
9
|
+
module Rena
|
10
|
+
class RdfXmlParser
|
11
|
+
SYNTAX_BASE = "http://www.w3.org/1999/02/22-rdf-syntax-ns"
|
12
|
+
RDF_TYPE = SYNTAX_BASE + "#type"
|
13
|
+
RDF_DESCRIPTION = SYNTAX_BASE + "#Description"
|
14
|
+
|
15
|
+
attr_accessor :xml, :graph
|
16
|
+
def initialize(xml_str, uri = nil)
|
17
|
+
@excl = ["http://www.w3.org/1999/02/22-rdf-syntax-ns#resource",
|
18
|
+
"http://www.w3.org/1999/02/22-rdf-syntax-ns#nodeID",
|
19
|
+
"http://www.w3.org/1999/02/22-rdf-syntax-ns#about",
|
20
|
+
"http://www.w3.org/1999/02/22-rdf-syntax-ns#ID"]
|
21
|
+
if uri != nil
|
22
|
+
@uri = Addressable::URI.parse(uri)
|
23
|
+
end
|
24
|
+
@xml = REXML::Document.new(xml_str)
|
25
|
+
# self.iterator @xml.root.children
|
26
|
+
if self.is_rdf?
|
27
|
+
@graph = Graph.new
|
28
|
+
|
29
|
+
@xml.root.each_element { |e|
|
30
|
+
self.parse_element e
|
31
|
+
}
|
32
|
+
# puts @graph.size
|
31
33
|
end
|
32
34
|
end
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
resourceuri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#resource"
|
40
|
-
else
|
41
|
-
resourceuri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#about"
|
42
|
-
end
|
43
|
-
|
44
|
-
subject = nil
|
45
|
-
element.attributes.each_attribute { |att|
|
46
|
-
uri = att.namespace + att.name
|
47
|
-
value = att.to_s
|
48
|
-
if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEach"
|
49
|
-
raise AboutEachException, "Failed as per RDFMS-AboutEach-Error001.rdf test from 2004 test suite"
|
35
|
+
|
36
|
+
def is_rdf?
|
37
|
+
@xml.each_element do |e|
|
38
|
+
if e.namespaces.has_value? "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
39
|
+
return true
|
40
|
+
end
|
50
41
|
end
|
51
|
-
|
52
|
-
|
42
|
+
return false
|
43
|
+
end
|
44
|
+
|
45
|
+
protected
|
46
|
+
def get_uri_from_atts (element, aboutmode = false)
|
47
|
+
if aboutmode == false
|
48
|
+
resourceuri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#resource"
|
49
|
+
else
|
50
|
+
resourceuri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#about"
|
53
51
|
end
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
52
|
+
|
53
|
+
subject = nil
|
54
|
+
element.attributes.each_attribute { |att|
|
55
|
+
uri = att.namespace + att.name
|
56
|
+
value = att.to_s
|
57
|
+
if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEach"
|
58
|
+
raise AboutEachException, "Failed as per RDFMS-AboutEach-Error001.rdf test from 2004 test suite"
|
60
59
|
end
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
value = "#" + value
|
69
|
-
end
|
70
|
-
begin
|
71
|
-
value = URIRef.new(element.base + value)
|
72
|
-
rescue UriRelativeException
|
73
|
-
# still not a URI
|
74
|
-
raise
|
60
|
+
if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEachPrefix"
|
61
|
+
raise AboutEachException, "Failed as per RDFMS-AboutEach-Error002.rdf test from 2004 test suite"
|
62
|
+
end
|
63
|
+
if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#bagID"
|
64
|
+
raise
|
65
|
+
if name =~ /^[a-zA-Z_][a-zA-Z0-9]*$/
|
66
|
+
# TODO: do something intelligent with the bagID
|
75
67
|
else
|
76
|
-
|
68
|
+
raise
|
77
69
|
end
|
78
|
-
else
|
79
|
-
subject = possible_subject
|
80
|
-
break
|
81
70
|
end
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
71
|
+
|
72
|
+
if uri == resourceuri #specified resource
|
73
|
+
element_uri = Addressable::URI.parse(value)
|
74
|
+
if (element_uri.relative?)
|
75
|
+
# we have an element with a relative URI
|
76
|
+
if (element.base?)
|
77
|
+
# the element has a base URI, use that to build the URI
|
78
|
+
value = "##{value}" if (value[0..0].to_s != "#")
|
79
|
+
value = "#{element.base}#{value}"
|
80
|
+
elsif (!@uri.nil?)
|
81
|
+
# we can use the document URI to build the URI for the element
|
82
|
+
value = @uri + element_uri
|
83
|
+
end
|
94
84
|
end
|
85
|
+
subject = URIRef.new(value)
|
95
86
|
end
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
87
|
+
|
88
|
+
if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#nodeID" #BNode with ID
|
89
|
+
# we have a BNode with an identifier. First, we need to do syntax checking.
|
90
|
+
if value =~ /^[a-zA-Z_][a-zA-Z0-9]*$/
|
91
|
+
# now we check to see if the graph has the value
|
92
|
+
if @graph.has_bnode_identifier?(value)
|
93
|
+
# if so, pull it in - no need to recreate objects.
|
94
|
+
subject = @graph.get_bnode_by_identifier(value)
|
95
|
+
else
|
96
|
+
# if not, create a new one.
|
97
|
+
subject = BNode.new(value)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#ID"
|
103
|
+
begin
|
104
|
+
# check for base
|
105
|
+
if att.element.base?
|
106
|
+
subject = att.element.base.to_s + value
|
107
|
+
elsif @uri != nil
|
108
|
+
compound = @uri.to_s + "#" + value
|
109
|
+
subject = compound.to_s
|
110
|
+
else
|
111
|
+
raise "Needs to have an ID"
|
112
|
+
end
|
113
|
+
# rescue UriRelativeException
|
108
114
|
end
|
109
|
-
# rescue UriRelativeException
|
110
115
|
end
|
111
|
-
end
|
112
116
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
end
|
120
|
-
|
121
|
-
protected
|
122
|
-
def parse_element (element, subject = nil, resource = false)
|
123
|
-
if subject == nil
|
124
|
-
# figure out subject
|
125
|
-
subject = self.get_uri_from_atts(element, true)
|
117
|
+
# add other subject detection subroutines here
|
118
|
+
}
|
119
|
+
if subject.class == NilClass
|
120
|
+
subject = BNode.new
|
121
|
+
end
|
122
|
+
return subject
|
126
123
|
end
|
127
124
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
125
|
+
protected
|
126
|
+
|
127
|
+
def parse_element (element, subject = nil, resource = false)
|
128
|
+
if subject == nil
|
129
|
+
# figure out subject
|
130
|
+
subject = self.get_uri_from_atts(element, true)
|
133
131
|
end
|
134
|
-
end
|
135
|
-
|
136
|
-
# attribute parsing
|
137
|
-
element.attributes.each_attribute { |att|
|
138
|
-
uri = att.namespace + att.name
|
139
|
-
value = att.to_s
|
140
132
|
|
141
|
-
|
142
|
-
|
133
|
+
# type parsing
|
134
|
+
if (resource == true or element.attributes.has_key? 'about')
|
135
|
+
type = URIRef.new(element.namespace + element.name)
|
136
|
+
unless type.to_s == RDF_TYPE
|
137
|
+
@graph.add_triple(subject, RDF_DESCRIPTION, type)
|
138
|
+
end
|
143
139
|
end
|
144
|
-
|
140
|
+
|
141
|
+
# attribute parsing
|
142
|
+
element.attributes.each_attribute { |att|
|
143
|
+
uri = att.namespace + att.name
|
144
|
+
value = att.to_s
|
145
|
+
|
146
|
+
unless @excl.member? uri
|
147
|
+
@graph.add_triple(subject, uri, Literal.untyped(value))
|
148
|
+
end
|
149
|
+
}
|
145
150
|
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
151
|
+
# element parsing
|
152
|
+
element.each_element { |e|
|
153
|
+
self.parse_resource_element e, subject
|
154
|
+
}
|
155
|
+
end
|
156
|
+
|
157
|
+
def parse_resource_element e, subject
|
158
|
+
uri = e.namespace + e.name
|
159
|
+
if e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "parseType").to_s == "Literal"
|
160
|
+
@graph.add_triple(subject, uri, Literal.typed(e.children.to_s.strip, "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"))
|
161
|
+
elsif e.has_elements?
|
162
|
+
# subparsing
|
163
|
+
e.each_element { |se| #se = 'striped element'
|
164
|
+
if e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "parseType").to_s == "Resource"
|
165
|
+
object = BNode.new
|
166
|
+
else
|
167
|
+
object = self.get_uri_from_atts(se, true)
|
168
|
+
end
|
169
|
+
@graph.add_triple(subject, uri, object)
|
170
|
+
self.parse_element(se, object, true)
|
171
|
+
}
|
172
|
+
elsif e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "datatype")
|
173
|
+
@graph.add_triple(subject, uri, Literal.typed(e.text, e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "datatype").to_s.strip))
|
174
|
+
elsif e.has_attributes?
|
175
|
+
# get object out
|
176
|
+
object = self.get_uri_from_atts(e)
|
177
|
+
@graph.add_triple(subject, uri, object)
|
178
|
+
elsif e.has_text?
|
179
|
+
if e.lang?
|
180
|
+
@graph.add_triple(subject, uri, Literal.untyped(e.text, e.lang))
|
161
181
|
else
|
162
|
-
|
182
|
+
@graph.add_triple(subject, uri, Literal.untyped(e.text))
|
163
183
|
end
|
164
|
-
@graph.add_triple(subject, URIRef.new(uri), object)
|
165
|
-
self.parse_element(se, object, true)
|
166
|
-
}
|
167
|
-
elsif e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "datatype")
|
168
|
-
@graph.add_triple(subject, URIRef.new(uri), TypedLiteral.new(e.text, e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "datatype").to_s.strip))
|
169
|
-
elsif e.has_attributes?
|
170
|
-
# get object out
|
171
|
-
object = self.get_uri_from_atts(e)
|
172
|
-
@graph.add_triple(subject, URIRef.new(uri), object)
|
173
|
-
elsif e.has_text?
|
174
|
-
if e.lang?
|
175
|
-
@graph.add_triple(subject, URIRef.new(uri), Literal.new(e.text, e.lang))
|
176
|
-
else
|
177
|
-
@graph.add_triple(subject, URIRef.new(uri), Literal.new(e.text))
|
178
184
|
end
|
179
185
|
end
|
186
|
+
|
180
187
|
end
|
181
|
-
|
182
188
|
end
|