metade-rena 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,129 @@
1
+ grammar N3Grammer
2
+ rule document
3
+ statements
4
+ end
5
+
6
+ rule statements
7
+ (space / (statement / directive) space* ('.' space*)? )*
8
+ end
9
+
10
+ rule statement
11
+ subject space+ property_list
12
+ end
13
+
14
+ rule subject
15
+ node
16
+ end
17
+
18
+ rule verb
19
+ ">-" prop "->" # has xxx of
20
+ / "<-" prop "<-" # is xxx of
21
+ # / # / operator # has operator:xxx of??? NOT IMPLMENTED
22
+ / prop # has xxx of -- shorthand
23
+ # / "has" prop # has xxx of
24
+ # / "is" prop "of" # is xxx of
25
+ / "a" # has rdf:type of
26
+ # / "=" # has daml:equivaent of
27
+ end
28
+
29
+ rule prop
30
+ node
31
+ end
32
+
33
+ rule node
34
+ uri_ref2 / anonnode / 'this'
35
+ end
36
+
37
+ rule anonnode
38
+ "[" space* property_list space* "]" # something which ...
39
+ / "{" statementlist "}" # the statementlist itself as a resource
40
+ / "(" nodelist ")" {
41
+ def anonnode; true; end
42
+ }
43
+ end
44
+
45
+ rule property_list
46
+ verb space+ object_list space* ";" space+ property_list
47
+ / verb space+ object_list
48
+ / ":-" anonnode #to allow two anonymous forms to be given eg [ a :Truth; :- { :sky :color :blue } ] )
49
+ / ":-" anonnode ";" property_list
50
+ / '.'
51
+ end
52
+
53
+ rule object_list
54
+ object "," space* object_list / object
55
+ end
56
+
57
+ rule directive
58
+ '@prefix' space+ nprefix:nprefix? ':' space+ uri_ref2:uri_ref2 {
59
+ def directive; true; end
60
+ }
61
+ end
62
+
63
+ rule uri_ref2
64
+ qname / "<" uri:URI_Reference ">"
65
+ end
66
+
67
+ rule qname
68
+ nprefix ":" localname / ':' localname
69
+ end
70
+
71
+ rule object
72
+ subject / literal
73
+ end
74
+
75
+ rule literal
76
+ (string1 / string2) ("^^<" uri:URI_Reference ">" / "@" [a-z]+ )?
77
+ end
78
+
79
+ rule localname
80
+ fragid
81
+ end
82
+
83
+ rule URI_Reference
84
+ [^{}<>]*
85
+ end
86
+
87
+ rule nprefix
88
+ ((alpha / "_") alphanumeric*)
89
+ end
90
+
91
+ rule fragid
92
+ alpha alphanumeric*
93
+ end
94
+
95
+ rule alpha
96
+ [a-zA-Z]
97
+ end
98
+
99
+ rule alphanumeric
100
+ alpha / [0-9] / "_"
101
+ end
102
+
103
+ rule space
104
+ [ \t\n\r]+ / comment
105
+ end
106
+
107
+ rule comment
108
+ '#' (![\n\r] .)*
109
+ end
110
+
111
+ # " constant-value-with-escaping "
112
+ rule string1
113
+ '"' string1_char+ '"'
114
+ end
115
+
116
+ rule string1_char
117
+ !["] .
118
+ end
119
+
120
+ # """ constant value with escaping including single or double occurences of quotes and/or newlines """
121
+ rule string2
122
+ '"""' string2_char* '"""'
123
+ end
124
+
125
+ rule string2_char
126
+ !'"""' . # something like this; need to think about it some more
127
+ end
128
+
129
+ end
@@ -0,0 +1,145 @@
1
+ require 'rena/graph'
2
+ require 'treetop'
3
+
4
+ Treetop.load(File.join(File.dirname(__FILE__), "n3_grammar"))
5
+
6
+ module Rena
7
+ class N3Parser
8
+ attr_accessor :graph
9
+
10
+ ##
11
+ # Creates a new parser for N3 (or Turtle).
12
+ #
13
+ # @param [String] n3_str the Notation3/Turtle string
14
+ # @param [String] uri the URI of the document
15
+ #
16
+ # @author Patrick Sinclair (metade)
17
+ def initialize(n3_str, uri=nil)
18
+ @uri = Addressable::URI.parse(uri) unless uri.nil?
19
+ parser = N3GrammerParser.new
20
+ document = parser.parse(n3_str)
21
+ if document
22
+ @graph = Graph.new
23
+ process_directives(document)
24
+ process_statements(document)
25
+ else
26
+ parser.terminal_failures.each do |tf|
27
+ puts "Expected #{tf.expected_string.inspect} (#{tf.index})- '#{n3_str[tf.index,10].inspect}'"
28
+ end
29
+ end
30
+ end
31
+
32
+ protected
33
+
34
+ def process_directives(document)
35
+ directives = document.elements.find_all { |e| e.elements.first.respond_to? :directive }
36
+ directives.map! { |d| d.elements.first }
37
+ directives.each { |d| namespace(d.uri_ref2.uri.text_value, d.nprefix.text_value) }
38
+ end
39
+
40
+ def namespace(uri, short)
41
+ short = '__local__' if short == ''
42
+ @graph.namespace(uri, short)
43
+ end
44
+
45
+ def process_statements(document)
46
+ subjects = document.elements.find_all { |e| e.elements.first.respond_to? :subject }
47
+ subjects.map! { |s| s.elements.first }
48
+ subjects.each do |s|
49
+ subject = process_node(s.subject)
50
+ properties = process_properties(s.property_list)
51
+ properties.each do |p|
52
+ predicate = process_verb(p.verb)
53
+ objects = process_objects(p.object_list)
54
+ objects.each { |object| triple(subject, predicate, object) }
55
+ end
56
+ end
57
+ end
58
+
59
+ def triple(subject, predicate, object)
60
+ @graph.add_triple(subject, predicate, object)
61
+ end
62
+
63
+ def process_anonnode(anonnode)
64
+ bnode = BNode.new
65
+ properties = process_properties(anonnode.property_list)
66
+ properties.each do |p|
67
+ predicate = process_node(p.verb)
68
+ objects = process_objects(p.object_list)
69
+ objects.each { |object| triple(bnode, predicate, object) }
70
+ end
71
+ bnode
72
+ end
73
+
74
+ def process_verb(verb)
75
+ return URIRef.new('http://www.w3.org/1999/02/22-rdf-syntax-ns#type') if (verb.text_value=='a')
76
+ return process_node(verb)
77
+ end
78
+
79
+ def process_node(node)
80
+ if (node.respond_to? :uri)
81
+ URIRef.new(node.uri.text_value)
82
+ else
83
+ prefix = (node.respond_to? :nprefix) ? node.nprefix.text_value : nil
84
+ localname = node.localname.text_value
85
+ build_uri(prefix, localname)
86
+ end
87
+ end
88
+
89
+ def process_properties(properties)
90
+ result = []
91
+ result << properties if (properties.respond_to? :verb)
92
+ result << process_properties(properties.property_list) if (properties.respond_to? :property_list)
93
+ result.flatten
94
+ end
95
+
96
+ def process_objects(objects)
97
+ result = []
98
+ if (objects.respond_to? :object)
99
+ result << process_object(objects.object)
100
+ else
101
+ result << process_object(objects)
102
+ end
103
+ result << process_objects(objects.object_list) if (objects.respond_to? :object_list)
104
+ result.flatten
105
+ end
106
+
107
+ def process_object(object)
108
+ if (object.respond_to? :localname or object.respond_to? :uri)
109
+ process_node(object)
110
+ elsif (object.respond_to? :property_list)
111
+ process_anonnode(object)
112
+ else
113
+ process_literal(object)
114
+ end
115
+ end
116
+
117
+ def process_literal(object)
118
+ encoding, language = nil, nil
119
+ string, type = object.elements
120
+
121
+ unless type.elements.nil?
122
+ if (type.elements[0].text_value=='@')
123
+ language = type.elements[1].text_value
124
+ else
125
+ encoding = type.elements[1].text_value
126
+ end
127
+ end
128
+
129
+ if (encoding.nil?)
130
+ Literal.untyped(string.elements[1].text_value, language)
131
+ else
132
+ Literal.typed(string.elements[1].text_value, encoding)
133
+ end
134
+ end
135
+
136
+ def build_uri(prefix, localname)
137
+ prefix = '__local__' if prefix.nil?
138
+ if (prefix=='_')
139
+ BNode.new(localname)
140
+ else
141
+ @graph.nsbinding[prefix].send(localname)
142
+ end
143
+ end
144
+ end
145
+ end
@@ -0,0 +1,76 @@
1
+ require 'rena/uriref'
2
+ require 'rena/graph'
3
+
4
+ module Rena
5
+ class Namespace
6
+ attr_accessor :short, :uri, :fragment
7
+
8
+ ##
9
+ # Creates a new namespace given a URI and the short name.
10
+ #
11
+ # ==== Example
12
+ # Namespace.new("http://xmlns.com/foaf/0.1/", "foaf") # => returns a new Foaf namespace
13
+ #
14
+ # @param [String] uri the URI of the namespace
15
+ # @param [String] short the short name of the namespace
16
+ # @param [Boolean] fragment are the identifiers on this resource fragment identifiers? (e.g. '#') Defaults to false.
17
+ #
18
+ # ==== Returns
19
+ # @return [Namespace] The newly created namespace.
20
+ #
21
+ # @raise [Error] Checks validity of the desired shortname and raises if it is incorrect.
22
+ # @author Tom Morris, Pius Uzamere
23
+
24
+ def initialize(uri, short, fragment = false)
25
+ @uri = uri
26
+ @fragment = fragment
27
+ if shortname_valid?(short)
28
+ @short = short
29
+ else
30
+ raise
31
+ end
32
+ end
33
+
34
+ ##
35
+ # Allows the construction of arbitrary URIs on the namespace.
36
+ #
37
+ # ==== Example
38
+ # foaf = Namespace.new("http://xmlns.com/foaf/0.1/", "foaf"); foaf.knows # => returns a new URIRef with URI "http://xmlns.com/foaf/0.1/knows"
39
+ # foaf = Namespace.new("http://xmlns.com/foaf/0.1/", "foaf", true); foaf.knows # => returns a new URIRef with URI "http://xmlns.com/foaf/0.1/#knows"
40
+ #
41
+ # @param [String] uri the URI of the namespace
42
+ # @param [String] short the short name of the namespace
43
+ # @param [Boolean] fragment are the identifiers on this resource fragment identifiers? (e.g. '#') Defaults to false.
44
+ #
45
+ # ==== Returns
46
+ # @return [URIRef] The newly created URIRegerence.
47
+ #
48
+ # @raise [Error] Checks validity of the desired shortname and raises if it is incorrect.
49
+ # @author Tom Morris, Pius Uzamere
50
+
51
+ def method_missing(methodname, *args)
52
+ unless fragment
53
+ URIRef.new(@uri + methodname.to_s)
54
+ else
55
+ URIRef.new(@uri + '#' + methodname.to_s)
56
+ end
57
+ end
58
+
59
+ def bind(graph)
60
+ if graph.class == Graph
61
+ graph.bind(self)
62
+ else
63
+ raise
64
+ end
65
+ end
66
+
67
+ private
68
+ def shortname_valid?(shortname)
69
+ if shortname =~ /[a-zA-Z_][a-zA-Z0-9_]+/
70
+ return true
71
+ else
72
+ return false
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,188 @@
1
+ require 'rena/uriref'
2
+ require 'rena/graph'
3
+ require 'rena/literal'
4
+ require 'rena/exceptions/uri_relative_exception'
5
+ require 'rena/exceptions/about_each_exception'
6
+ require 'rena/rexml_hacks'
7
+
8
+
9
+ module Rena
10
+ class RdfXmlParser
11
+ SYNTAX_BASE = "http://www.w3.org/1999/02/22-rdf-syntax-ns"
12
+ RDF_TYPE = SYNTAX_BASE + "#type"
13
+ RDF_DESCRIPTION = SYNTAX_BASE + "#Description"
14
+
15
+ attr_accessor :xml, :graph
16
+ def initialize(xml_str, uri = nil)
17
+ @excl = ["http://www.w3.org/1999/02/22-rdf-syntax-ns#resource",
18
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#nodeID",
19
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#about",
20
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#ID"]
21
+ if uri != nil
22
+ @uri = Addressable::URI.parse(uri)
23
+ end
24
+ @xml = REXML::Document.new(xml_str)
25
+ # self.iterator @xml.root.children
26
+ if self.is_rdf?
27
+ @graph = Graph.new
28
+
29
+ @xml.root.each_element { |e|
30
+ self.parse_element e
31
+ }
32
+ # puts @graph.size
33
+ end
34
+ end
35
+
36
+ def is_rdf?
37
+ @xml.each_element do |e|
38
+ if e.namespaces.has_value? "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
39
+ return true
40
+ end
41
+ end
42
+ return false
43
+ end
44
+
45
+ protected
46
+ def get_uri_from_atts (element, aboutmode = false)
47
+ if aboutmode == false
48
+ resourceuri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#resource"
49
+ else
50
+ resourceuri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#about"
51
+ end
52
+
53
+ subject = nil
54
+ element.attributes.each_attribute { |att|
55
+ uri = att.namespace + att.name
56
+ value = att.to_s
57
+ if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEach"
58
+ raise AboutEachException, "Failed as per RDFMS-AboutEach-Error001.rdf test from 2004 test suite"
59
+ end
60
+ if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEachPrefix"
61
+ raise AboutEachException, "Failed as per RDFMS-AboutEach-Error002.rdf test from 2004 test suite"
62
+ end
63
+ if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#bagID"
64
+ raise
65
+ if name =~ /^[a-zA-Z_][a-zA-Z0-9]*$/
66
+ # TODO: do something intelligent with the bagID
67
+ else
68
+ raise
69
+ end
70
+ end
71
+
72
+ if uri == resourceuri #specified resource
73
+ element_uri = Addressable::URI.parse(value)
74
+ if (element_uri.relative?)
75
+ # we have an element with a relative URI
76
+ if (element.base?)
77
+ # the element has a base URI, use that to build the URI
78
+ value = "##{value}" if (value[0..0].to_s != "#")
79
+ value = "#{element.base}#{value}"
80
+ elsif (!@uri.nil?)
81
+ # we can use the document URI to build the URI for the element
82
+ value = @uri + element_uri
83
+ end
84
+ end
85
+ subject = URIRef.new(value)
86
+ end
87
+
88
+ if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#nodeID" #BNode with ID
89
+ # we have a BNode with an identifier. First, we need to do syntax checking.
90
+ if value =~ /^[a-zA-Z_][a-zA-Z0-9]*$/
91
+ # now we check to see if the graph has the value
92
+ if @graph.has_bnode_identifier?(value)
93
+ # if so, pull it in - no need to recreate objects.
94
+ subject = @graph.get_bnode_by_identifier(value)
95
+ else
96
+ # if not, create a new one.
97
+ subject = BNode.new(value)
98
+ end
99
+ end
100
+ end
101
+
102
+ if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#ID"
103
+ begin
104
+ # check for base
105
+ if att.element.base?
106
+ subject = att.element.base.to_s + value
107
+ elsif @uri != nil
108
+ compound = @uri.to_s + "#" + value
109
+ subject = compound.to_s
110
+ else
111
+ raise "Needs to have an ID"
112
+ end
113
+ # rescue UriRelativeException
114
+ end
115
+ end
116
+
117
+ # add other subject detection subroutines here
118
+ }
119
+ if subject.class == NilClass
120
+ subject = BNode.new
121
+ end
122
+ return subject
123
+ end
124
+
125
+ protected
126
+
127
+ def parse_element (element, subject = nil, resource = false)
128
+ if subject == nil
129
+ # figure out subject
130
+ subject = self.get_uri_from_atts(element, true)
131
+ end
132
+
133
+ # type parsing
134
+ if (resource == true or element.attributes.has_key? 'about')
135
+ type = URIRef.new(element.namespace + element.name)
136
+ unless type.to_s == RDF_TYPE
137
+ @graph.add_triple(subject, RDF_DESCRIPTION, type)
138
+ end
139
+ end
140
+
141
+ # attribute parsing
142
+ element.attributes.each_attribute { |att|
143
+ uri = att.namespace + att.name
144
+ value = att.to_s
145
+
146
+ unless @excl.member? uri
147
+ @graph.add_triple(subject, uri, Literal.untyped(value))
148
+ end
149
+ }
150
+
151
+ # element parsing
152
+ element.each_element { |e|
153
+ self.parse_resource_element e, subject
154
+ }
155
+ end
156
+
157
+ def parse_resource_element e, subject
158
+ uri = e.namespace + e.name
159
+ if e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "parseType").to_s == "Literal"
160
+ @graph.add_triple(subject, uri, Literal.typed(e.children.to_s.strip, "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"))
161
+ elsif e.has_elements?
162
+ # subparsing
163
+ e.each_element { |se| #se = 'striped element'
164
+ if e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "parseType").to_s == "Resource"
165
+ object = BNode.new
166
+ else
167
+ object = self.get_uri_from_atts(se, true)
168
+ end
169
+ @graph.add_triple(subject, uri, object)
170
+ self.parse_element(se, object, true)
171
+ }
172
+ elsif e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "datatype")
173
+ @graph.add_triple(subject, uri, Literal.typed(e.text, e.attributes.get_attribute_ns("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "datatype").to_s.strip))
174
+ elsif e.has_attributes?
175
+ # get object out
176
+ object = self.get_uri_from_atts(e)
177
+ @graph.add_triple(subject, uri, object)
178
+ elsif e.has_text?
179
+ if e.lang?
180
+ @graph.add_triple(subject, uri, Literal.untyped(e.text, e.lang))
181
+ else
182
+ @graph.add_triple(subject, uri, Literal.untyped(e.text))
183
+ end
184
+ end
185
+ end
186
+
187
+ end
188
+ end