metade-rena 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,129 @@
1
+ grammar N3Grammer
2
+ rule document
3
+ statements
4
+ end
5
+
6
+ rule statements
7
+ (space / (statement / directive) space* ('.' space*)? )*
8
+ end
9
+
10
+ rule statement
11
+ subject space+ property_list
12
+ end
13
+
14
+ rule subject
15
+ node
16
+ end
17
+
18
+ rule verb
19
+ ">-" prop "->" # has xxx of
20
+ / "<-" prop "<-" # is xxx of
21
+ # / # / operator # has operator:xxx of??? NOT IMPLMENTED
22
+ / prop # has xxx of -- shorthand
23
+ # / "has" prop # has xxx of
24
+ # / "is" prop "of" # is xxx of
25
+ / "a" # has rdf:type of
26
+ # / "=" # has daml:equivaent of
27
+ end
28
+
29
+ rule prop
30
+ node
31
+ end
32
+
33
+ rule node
34
+ uri_ref2 / anonnode / 'this'
35
+ end
36
+
37
+ rule anonnode
38
+ "[" space* property_list space* "]" # something which ...
39
+ / "{" statementlist "}" # the statementlist itself as a resource
40
+ / "(" nodelist ")" {
41
+ def anonnode; true; end
42
+ }
43
+ end
44
+
45
+ rule property_list
46
+ verb space+ object_list space* ";" space+ property_list
47
+ / verb space+ object_list
48
+ / ":-" anonnode #to allow two anonymous forms to be given eg [ a :Truth; :- { :sky :color :blue } ] )
49
+ / ":-" anonnode ";" property_list
50
+ / '.'
51
+ end
52
+
53
+ rule object_list
54
+ object "," space* object_list / object
55
+ end
56
+
57
+ rule directive
58
+ '@prefix' space+ nprefix:nprefix? ':' space+ uri_ref2:uri_ref2 {
59
+ def directive; true; end
60
+ }
61
+ end
62
+
63
+ rule uri_ref2
64
+ qname / "<" uri:URI_Reference ">"
65
+ end
66
+
67
+ rule qname
68
+ nprefix ":" localname / ':' localname
69
+ end
70
+
71
+ rule object
72
+ subject / literal
73
+ end
74
+
75
+ rule literal
76
+ (string1 / string2) ("^^<" uri:URI_Reference ">" / "@" [a-z]+ )?
77
+ end
78
+
79
+ rule localname
80
+ fragid
81
+ end
82
+
83
+ rule URI_Reference
84
+ [^{}<>]*
85
+ end
86
+
87
+ rule nprefix
88
+ ((alpha / "_") alphanumeric*)
89
+ end
90
+
91
+ rule fragid
92
+ alpha alphanumeric*
93
+ end
94
+
95
+ rule alpha
96
+ [a-zA-Z]
97
+ end
98
+
99
+ rule alphanumeric
100
+ alpha / [0-9] / "_"
101
+ end
102
+
103
+ rule space
104
+ [ \t\n\r]+ / comment
105
+ end
106
+
107
+ rule comment
108
+ '#' (![\n\r] .)*
109
+ end
110
+
111
+ # " constant-value-with-escaping "
112
+ rule string1
113
+ '"' string1_char+ '"'
114
+ end
115
+
116
+ rule string1_char
117
+ !["] .
118
+ end
119
+
120
+ # """ constant value with escaping including single or double occurences of quotes and/or newlines """
121
+ rule string2
122
+ '"""' string2_char* '"""'
123
+ end
124
+
125
+ rule string2_char
126
+ !'"""' . # something like this; need to think about it some more
127
+ end
128
+
129
+ end
@@ -0,0 +1,145 @@
1
+ require 'rena/graph'
2
+ require 'treetop'
3
+
4
+ Treetop.load(File.join(File.dirname(__FILE__), "n3_grammar"))
5
+
6
+ module Rena
7
+ class N3Parser
8
+ attr_accessor :graph
9
+
10
+ ##
11
+ # Creates a new parser for N3 (or Turtle).
12
+ #
13
+ # @param [String] n3_str the Notation3/Turtle string
14
+ # @param [String] uri the URI of the document
15
+ #
16
+ # @author Patrick Sinclair (metade)
17
+ def initialize(n3_str, uri=nil)
18
+ @uri = Addressable::URI.parse(uri) unless uri.nil?
19
+ parser = N3GrammerParser.new
20
+ document = parser.parse(n3_str)
21
+ if document
22
+ @graph = Graph.new
23
+ process_directives(document)
24
+ process_statements(document)
25
+ else
26
+ parser.terminal_failures.each do |tf|
27
+ puts "Expected #{tf.expected_string.inspect} (#{tf.index})- '#{n3_str[tf.index,10].inspect}'"
28
+ end
29
+ end
30
+ end
31
+
32
+ protected
33
+
34
+ def process_directives(document)
35
+ directives = document.elements.find_all { |e| e.elements.first.respond_to? :directive }
36
+ directives.map! { |d| d.elements.first }
37
+ directives.each { |d| namespace(d.uri_ref2.uri.text_value, d.nprefix.text_value) }
38
+ end
39
+
40
+ def namespace(uri, short)
41
+ short = '__local__' if short == ''
42
+ @graph.namespace(uri, short)
43
+ end
44
+
45
+ def process_statements(document)
46
+ subjects = document.elements.find_all { |e| e.elements.first.respond_to? :subject }
47
+ subjects.map! { |s| s.elements.first }
48
+ subjects.each do |s|
49
+ subject = process_node(s.subject)
50
+ properties = process_properties(s.property_list)
51
+ properties.each do |p|
52
+ predicate = process_verb(p.verb)
53
+ objects = process_objects(p.object_list)
54
+ objects.each { |object| triple(subject, predicate, object) }
55
+ end
56
+ end
57
+ end
58
+
59
+ def triple(subject, predicate, object)
60
+ @graph.add_triple(subject, predicate, object)
61
+ end
62
+
63
+ def process_anonnode(anonnode)
64
+ bnode = BNode.new
65
+ properties = process_properties(anonnode.property_list)
66
+ properties.each do |p|
67
+ predicate = process_node(p.verb)
68
+ objects = process_objects(p.object_list)
69
+ objects.each { |object| triple(bnode, predicate, object) }
70
+ end
71
+ bnode
72
+ end
73
+
74
+ def process_verb(verb)
75
+ return URIRef.new('http://www.w3.org/1999/02/22-rdf-syntax-ns#type') if (verb.text_value=='a')
76
+ return process_node(verb)
77
+ end
78
+
79
+ def process_node(node)
80
+ if (node.respond_to? :uri)
81
+ URIRef.new(node.uri.text_value)
82
+ else
83
+ prefix = (node.respond_to? :nprefix) ? node.nprefix.text_value : nil
84
+ localname = node.localname.text_value
85
+ build_uri(prefix, localname)
86
+ end
87
+ end
88
+
89
+ def process_properties(properties)
90
+ result = []
91
+ result << properties if (properties.respond_to? :verb)
92
+ result << process_properties(properties.property_list) if (properties.respond_to? :property_list)
93
+ result.flatten
94
+ end
95
+
96
+ def process_objects(objects)
97
+ result = []
98
+ if (objects.respond_to? :object)
99
+ result << process_object(objects.object)
100
+ else
101
+ result << process_object(objects)
102
+ end
103
+ result << process_objects(objects.object_list) if (objects.respond_to? :object_list)
104
+ result.flatten
105
+ end
106
+
107
+ def process_object(object)
108
+ if (object.respond_to? :localname or object.respond_to? :uri)
109
+ process_node(object)
110
+ elsif (object.respond_to? :property_list)
111
+ process_anonnode(object)
112
+ else
113
+ process_literal(object)
114
+ end
115
+ end
116
+
117
+ def process_literal(object)
118
+ encoding, language = nil, nil
119
+ string, type = object.elements
120
+
121
+ unless type.elements.nil?
122
+ if (type.elements[0].text_value=='@')
123
+ language = type.elements[1].text_value
124
+ else
125
+ encoding = type.elements[1].text_value
126
+ end
127
+ end
128
+
129
+ if (encoding.nil?)
130
+ Literal.untyped(string.elements[1].text_value, language)
131
+ else
132
+ Literal.typed(string.elements[1].text_value, encoding)
133
+ end
134
+ end
135
+
136
+ def build_uri(prefix, localname)
137
+ prefix = '__local__' if prefix.nil?
138
+ if (prefix=='_')
139
+ BNode.new(localname)
140
+ else
141
+ @graph.nsbinding[prefix].send(localname)
142
+ end
143
+ end
144
+ end
145
+ end
@@ -0,0 +1,76 @@
1
+ require 'rena/uriref'
2
+ require 'rena/graph'
3
+
4
+ module Rena
5
+ class Namespace
6
+ attr_accessor :short, :uri, :fragment
7
+
8
+ ##
9
+ # Creates a new namespace given a URI and the short name.
10
+ #
11
+ # ==== Example
12
+ # Namespace.new("http://xmlns.com/foaf/0.1/", "foaf") # => returns a new Foaf namespace
13
+ #
14
+ # @param [String] uri the URI of the namespace
15
+ # @param [String] short the short name of the namespace
16
+ # @param [Boolean] fragment are the identifiers on this resource fragment identifiers? (e.g. '#') Defaults to false.
17
+ #
18
+ # ==== Returns
19
+ # @return [Namespace] The newly created namespace.
20
+ #
21
+ # @raise [Error] Checks validity of the desired shortname and raises if it is incorrect.
22
+ # @author Tom Morris, Pius Uzamere
23
+
24
+ def initialize(uri, short, fragment = false)
25
+ @uri = uri
26
+ @fragment = fragment
27
+ if shortname_valid?(short)
28
+ @short = short
29
+ else
30
+ raise
31
+ end
32
+ end
33
+
34
+ ##
35
+ # Allows the construction of arbitrary URIs on the namespace.
36
+ #
37
+ # ==== Example
38
+ # foaf = Namespace.new("http://xmlns.com/foaf/0.1/", "foaf"); foaf.knows # => returns a new URIRef with URI "http://xmlns.com/foaf/0.1/knows"
39
+ # foaf = Namespace.new("http://xmlns.com/foaf/0.1/", "foaf", true); foaf.knows # => returns a new URIRef with URI "http://xmlns.com/foaf/0.1/#knows"
40
+ #
41
+ # @param [String] uri the URI of the namespace
42
+ # @param [String] short the short name of the namespace
43
+ # @param [Boolean] fragment are the identifiers on this resource fragment identifiers? (e.g. '#') Defaults to false.
44
+ #
45
+ # ==== Returns
46
+ # @return [URIRef] The newly created URIRegerence.
47
+ #
48
+ # @raise [Error] Checks validity of the desired shortname and raises if it is incorrect.
49
+ # @author Tom Morris, Pius Uzamere
50
+
51
+ def method_missing(methodname, *args)
52
+ unless fragment
53
+ URIRef.new(@uri + methodname.to_s)
54
+ else
55
+ URIRef.new(@uri + '#' + methodname.to_s)
56
+ end
57
+ end
58
+
59
+ def bind(graph)
60
+ if graph.class == Graph
61
+ graph.bind(self)
62
+ else
63
+ raise
64
+ end
65
+ end
66
+
67
+ private
68
+ def shortname_valid?(shortname)
69
+ if shortname =~ /[a-zA-Z_][a-zA-Z0-9_]+/
70
+ return true
71
+ else
72
+ return false
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,188 @@
1
+ require 'rena/uriref'
2
+ require 'rena/graph'
3
+ require 'rena/literal'
4
+ require 'rena/exceptions/uri_relative_exception'
5
+ require 'rena/exceptions/about_each_exception'
6
+ require 'rena/rexml_hacks'
7
+
8
+
9
+ module Rena
10
+ class RdfXmlParser
11
+ SYNTAX_BASE = "http://www.w3.org/1999/02/22-rdf-syntax-ns"
12
+ RDF_TYPE = SYNTAX_BASE + "#type"
13
+ RDF_DESCRIPTION = SYNTAX_BASE + "#Description"
14
+
15
+ attr_accessor :xml, :graph
16
+ def initialize(xml_str, uri = nil)
17
+ @excl = ["http://www.w3.org/1999/02/22-rdf-syntax-ns#resource",
18
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#nodeID",
19
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#about",
20
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#ID"]
21
+ if uri != nil
22
+ @uri = Addressable::URI.parse(uri)
23
+ end
24
+ @xml = REXML::Document.new(xml_str)
25
+ # self.iterator @xml.root.children
26
+ if self.is_rdf?
27
+ @graph = Graph.new
28
+
29
+ @xml.root.each_element { |e|
30
+ self.parse_element e
31
+ }
32
+ # puts @graph.size
33
+ end
34
+ end
35
+
36
+ def is_rdf?
37
+ @xml.each_element do |e|
38
+ if e.namespaces.has_value? "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
39
+ return true
40
+ end
41
+ end
42
+ return false
43
+ end
44
+
45
+ protected
46
+ def get_uri_from_atts (element, aboutmode = false)
47
+ if aboutmode == false
48
+ resourceuri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#resource"
49
+ else
50
+ resourceuri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#about"
51
+ end
52
+
53
+ subject = nil
54
+ element.attributes.each_attribute { |att|
55
+ uri = att.namespace + att.name
56
+ value = att.to_s
57
+ if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEach"
58
+ raise AboutEachException, "Failed as per RDFMS-AboutEach-Error001.rdf test from 2004 test suite"
59
+ end
60
+ if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEachPrefix"
61
+ raise AboutEachException, "Failed as per RDFMS-AboutEach-Error002.rdf test from 2004 test suite"
62
+ end
63
+ if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#bagID"
64
+ raise
65
+ if name =~ /^[a-zA-Z_][a-zA-Z0-9]*$/
66
+ # TODO: do something intelligent with the bagID
67
+ else
68
+ raise
69
+ end
70
+ end
71
+
72
+ if uri == resourceuri #specified resource
73
+ element_uri = Addressable::URI.parse(value)
74
+ if (element_uri.relative?)
75
+ # we have an element with a relative URI
76
+ if (element.base?)
77
+ # the element has a base URI, use that to build the URI
78
+ value = "##{value}" if (value[0..0].to_s != "#")
79
+ value = "#{element.base}#{value}"
80
+ elsif (!@uri.nil?)
81
+ # we can use the document URI to build the URI for the element
82
+ value = @uri + element_uri
83
+ end
84
+ end
85
+ subject = URIRef.new(value)
86
+ end
87
+
88
+ if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#nodeID" #BNode with ID
89
+ # we have a BNode with an identifier. First, we need to do syntax checking.
90
+ if value =~ /^[a-zA-Z_][a-zA-Z0-9]*$/
91
+ # now we check to see if the graph has the value
92
+ if @graph.has_bnode_identifier?(value)
93
+ # if so, pull it in - no need to recreate objects.
94
+ subject = @graph.get_bnode_by_identifier(value)
95
+ else
96
+ # if not, create a new one.
97
+ subject = BNode.new(value)
98
+ end
99
+ end
100
+ end
101
+
102
+ if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#ID"
103
+ begin
104
+ # check for base
105
+ if att.element.base?
106
+ subject = att.element.base.to_s + value
107
+ elsif @uri != nil
108
+ compound = @uri.to_s + "#" + value
109
+ subject = compound.to_s
110
+ else
111
+ raise "Needs to have an ID"
112
+ end
113
+ # rescue UriRelativeException
114
+ end
115
+ end
116
+
117
+ # add other subject detection subroutines here
118
+ }
119
+ if subject.class == NilClass
120
+ subject = BNode.new
121
+ end
122
+ return subject
123
+ end
124
+
125
+ protected
126
+
127
+ def parse_element (element, subject = nil, resource = false)
128
+ if subject == nil
129
+ # figure out subject
130
+ subject = self.get_uri_from_atts(element, true)
131
+ end
132
+
133
+ # type parsing
134
+ if (resource == true or element.attributes.has_key? 'about')
135
+ type = URIRef.new(element.namespace + element.name)
136
+ unless type.to_s == RDF_TYPE
137
+ @graph.add_triple(subject, RDF_DESCRIPTION, type)
138
+ end
139
+ end
140
+
141
+ # attribute parsing
142
+ element.attributes.each_attribute { |att|
143
+ uri = att.namespace + att.name
144
+ value = att.to_s
145
+
146
+ unless @excl.member? uri
147
+ @graph.add_triple(subject, uri, Literal.untyped(value))
148
+ end
149
+ }
150
+
151
+ # element parsing
152
+ element.each_element { |e|
153
+ self.parse_resource_element e, subject
154
+ }
155
+ end
156
+
157
+ def parse_resource_element e, subject
158
+ uri = e.namespace + e.name
159
+ if e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "parseType").to_s == "Literal"
160
+ @graph.add_triple(subject, uri, Literal.typed(e.children.to_s.strip, "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"))
161
+ elsif e.has_elements?
162
+ # subparsing
163
+ e.each_element { |se| #se = 'striped element'
164
+ if e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "parseType").to_s == "Resource"
165
+ object = BNode.new
166
+ else
167
+ object = self.get_uri_from_atts(se, true)
168
+ end
169
+ @graph.add_triple(subject, uri, object)
170
+ self.parse_element(se, object, true)
171
+ }
172
+ elsif e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "datatype")
173
+ @graph.add_triple(subject, uri, Literal.typed(e.text, e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "datatype").to_s.strip))
174
+ elsif e.has_attributes?
175
+ # get object out
176
+ object = self.get_uri_from_atts(e)
177
+ @graph.add_triple(subject, uri, object)
178
+ elsif e.has_text?
179
+ if e.lang?
180
+ @graph.add_triple(subject, uri, Literal.untyped(e.text, e.lang))
181
+ else
182
+ @graph.add_triple(subject, uri, Literal.untyped(e.text))
183
+ end
184
+ end
185
+ end
186
+
187
+ end
188
+ end