reddy 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. data/HACKNOTES +2 -0
  2. data/History.txt +3 -0
  3. data/Manifest.txt +80 -0
  4. data/README.rdoc +48 -0
  5. data/README.txt +62 -0
  6. data/Rakefile +67 -0
  7. data/lib/reddy.rb +8 -0
  8. data/lib/reddy/bnode.rb +70 -0
  9. data/lib/reddy/exceptions/about_each_exception.rb +2 -0
  10. data/lib/reddy/exceptions/uri_relative_exception.rb +2 -0
  11. data/lib/reddy/graph.rb +182 -0
  12. data/lib/reddy/libxml_hacks.rb +6 -0
  13. data/lib/reddy/literal.rb +211 -0
  14. data/lib/reddy/n3_grammar.treetop +129 -0
  15. data/lib/reddy/n3parser.rb +145 -0
  16. data/lib/reddy/namespace.rb +73 -0
  17. data/lib/reddy/rdfaparser.rb +63 -0
  18. data/lib/reddy/rdfxmlparser.rb +254 -0
  19. data/lib/reddy/rexml_hacks.rb +97 -0
  20. data/lib/reddy/triple.rb +95 -0
  21. data/lib/reddy/uriref.rb +66 -0
  22. data/reddy.gemspec +50 -0
  23. data/spec/bnode_spec.rb +29 -0
  24. data/spec/graph_spec.rb +138 -0
  25. data/spec/literal_spec.rb +142 -0
  26. data/spec/n3parser_spec.rb +86 -0
  27. data/spec/namespaces_spec.rb +44 -0
  28. data/spec/parser_spec.rb +391 -0
  29. data/spec/rdfa_parser_spec.rb +28 -0
  30. data/spec/rexml_hacks_spec.rb +99 -0
  31. data/spec/triple_spec.rb +108 -0
  32. data/spec/uriref_spec.rb +96 -0
  33. data/test/longtests_spec.rb +25 -0
  34. data/test/n3_tests/lcsh/sh85062913.n3 +41 -0
  35. data/test/n3_tests/lcsh/sh85062913.nt +21 -0
  36. data/test/n3_tests/lcsh/sh85082139.n3 +157 -0
  37. data/test/n3_tests/lcsh/sh85082139.nt +79 -0
  38. data/test/n3_tests/lcsh/sh85118553.n3 +123 -0
  39. data/test/n3_tests/lcsh/sh85118553.nt +63 -0
  40. data/test/n3_tests/misc/on_now-01.n3 +30 -0
  41. data/test/n3_tests/misc/on_now-01.nt +15 -0
  42. data/test/n3_tests/n3p/simple-01.n3 +1 -0
  43. data/test/n3_tests/n3p/simple-01.nt +0 -0
  44. data/test/n3_tests/n3p/simple-02.n3 +4 -0
  45. data/test/n3_tests/n3p/simple-02.nt +0 -0
  46. data/test/n3_tests/n3p/simple-03.n3 +5 -0
  47. data/test/n3_tests/n3p/simple-03.nt +1 -0
  48. data/test/n3_tests/n3p/simple-04.n3 +6 -0
  49. data/test/n3_tests/n3p/simple-04.nt +3 -0
  50. data/test/n3_tests/n3p/simple-05.n3 +7 -0
  51. data/test/n3_tests/n3p/simple-05.nt +2 -0
  52. data/test/n3_tests/n3p/simple-06.n3 +6 -0
  53. data/test/n3_tests/n3p/simple-06.nt +4 -0
  54. data/test/n3_tests/n3p/simple-07.n3 +7 -0
  55. data/test/n3_tests/n3p/simple-07.nt +6 -0
  56. data/test/perf_test/test.rb +11 -0
  57. data/test/perf_test/tommorris.rdf +2267 -0
  58. data/test/rdf_tests/cc197bad-dc9c-440d-a5b5-d52ba2e14234.nt +24 -0
  59. data/test/rdf_tests/cc197bad-dc9c-440d-a5b5-d52ba2e14234.rdf +46 -0
  60. data/test/rdf_tests/tm_001.nt +1 -0
  61. data/test/rdf_tests/tm_001.rdf +7 -0
  62. data/test/rdf_tests/xml-literal-mixed.nt +7 -0
  63. data/test/rdf_tests/xml-literal-mixed.rdf +15 -0
  64. data/test/ruby_fundamentals.spec.rb +17 -0
  65. data/test/test_helper.rb +2 -0
  66. data/test/test_reddy.rb +11 -0
  67. data/test/test_uris.rb +13 -0
  68. data/test/xml.rdf +6 -0
  69. metadata +198 -0
@@ -0,0 +1,73 @@
1
+ module Reddy
2
+ class Namespace
3
+ attr_accessor :short, :uri, :fragment
4
+
5
+ ##
6
+ # Creates a new namespace given a URI and the short name.
7
+ #
8
+ # ==== Example
9
+ # Namespace.new("http://xmlns.com/foaf/0.1/", "foaf") # => returns a new Foaf namespace
10
+ #
11
+ # @param [String] uri the URI of the namespace
12
+ # @param [String] short the short name of the namespace
13
+ # @param [Boolean] fragment are the identifiers on this resource fragment identifiers? (e.g. '#') Defaults to false.
14
+ #
15
+ # ==== Returns
16
+ # @return [Namespace] The newly created namespace.
17
+ #
18
+ # @raise [Error] Checks validity of the desired shortname and raises if it is incorrect.
19
+ # @author Tom Morris, Pius Uzamere
20
+
21
+ def initialize(uri, short, fragment = false)
22
+ @uri = uri
23
+ @fragment = fragment
24
+ if shortname_valid?(short)
25
+ @short = short
26
+ else
27
+ raise
28
+ end
29
+ end
30
+
31
+ ##
32
+ # Allows the construction of arbitrary URIs on the namespace.
33
+ #
34
+ # ==== Example
35
+ # foaf = Namespace.new("http://xmlns.com/foaf/0.1/", "foaf"); foaf.knows # => returns a new URIRef with URI "http://xmlns.com/foaf/0.1/knows"
36
+ # foaf = Namespace.new("http://xmlns.com/foaf/0.1/", "foaf", true); foaf.knows # => returns a new URIRef with URI "http://xmlns.com/foaf/0.1/#knows"
37
+ #
38
+ # @param [String] uri the URI of the namespace
39
+ # @param [String] short the short name of the namespace
40
+ # @param [Boolean] fragment are the identifiers on this resource fragment identifiers? (e.g. '#') Defaults to false.
41
+ #
42
+ # ==== Returns
43
+ # @return [URIRef] The newly created URIRegerence.
44
+ #
45
+ # @raise [Error] Checks validity of the desired shortname and raises if it is incorrect.
46
+ # @author Tom Morris, Pius Uzamere
47
+
48
+ def method_missing(methodname, *args)
49
+ unless fragment
50
+ URIRef.new(@uri + methodname.to_s)
51
+ else
52
+ URIRef.new(@uri + '#' + methodname.to_s)
53
+ end
54
+ end
55
+
56
+ def bind(graph)
57
+ if graph.class == Graph
58
+ graph.bind(self)
59
+ else
60
+ raise
61
+ end
62
+ end
63
+
64
+ private
65
+ def shortname_valid?(shortname)
66
+ if shortname =~ /[a-zA-Z_][a-zA-Z0-9_]+/
67
+ return true
68
+ else
69
+ return false
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,63 @@
1
+ module Reddy
2
+ class Reddy::RdfaParser
3
+ attr_accessor :xml, :uri, :graph
4
+
5
+ def initialize (str, uri)
6
+ @doc_string = str
7
+ @xml = REXML::Document.new(str)
8
+ @uri = uri
9
+ @graph = Graph.new
10
+ self.iterate(@xml.root.elements[2].elements[1].elements[1])
11
+ end
12
+
13
+ def parse_ns_curie(el, attname)
14
+ attstring = el.attributes[attname]
15
+ prefix = attstring.scan(/(.+):.+/).to_s
16
+ if el.namespaces[prefix]
17
+ namespace = el.namespaces[prefix]
18
+ else
19
+ raise "Namespace used in CURIE but not declared"
20
+ end
21
+ return namespace + attstring.scan(/.+:(.+)/).to_s
22
+ end
23
+
24
+ def iterate (el)
25
+
26
+ if el.attributes['about']
27
+ if el.attributes['about'] =~ /^http/
28
+ # deal with as absolute
29
+ subject = el.attributes['about'].to_s
30
+ else
31
+ # time to go xml:base sneakin'
32
+ if xml.base?
33
+ subject = Addressable::URI.parse(xml.base)
34
+ subject = subject + el.attributes['about']
35
+ else
36
+ subject = Addressable::URI.parse(@uri)
37
+ subject = subject + el.attributes['about']
38
+ end
39
+ end
40
+ else
41
+ subject = @uri
42
+ end
43
+
44
+ if el.attributes['property']
45
+ if el.attributes['property'] =~ /^http/
46
+ property = el.attributes['property']
47
+ else
48
+ # curie hunt!
49
+ property = self.parse_ns_curie(el, "property")
50
+ end
51
+ end
52
+
53
+ if el.attributes['content']
54
+ value = el.attributes['content']
55
+ else
56
+ value = el.text
57
+ end
58
+
59
+ @graph.add_triple subject.to_s, URIRef.new(property), value
60
+ end
61
+
62
+ end
63
+ end
@@ -0,0 +1,254 @@
1
+ #require 'ruby-debug'
2
+ require 'xml'
3
+ include Reddy
4
+
5
+ module Reddy
6
+ include LibXML
7
+
8
+ class RdfXmlParser
9
+
10
+ attr_accessor :xml, :graph
11
+ def initialize(xml_str, uri = nil)
12
+ @@syntax_base = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
13
+ @@rdf_type = @@syntax_base + "type"
14
+ @excl = ["http://www.w3.org/1999/02/22-rdf-syntax-ns#resource",
15
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#nodeID",
16
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#about",
17
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#ID"]
18
+ @uri = Addressable::URI.parse(uri).to_s unless uri.nil?
19
+ @graph = Reddy::Graph.new
20
+ @xml = LibXML::XML::Parser.string(xml_str).parse
21
+ @id_mapping = Hash.new
22
+ root = @xml.root
23
+ if is_rdf_root?(root)
24
+ root.each_element {|el|
25
+ parse_descriptions(el)
26
+ }
27
+ else
28
+ root.each_element {|n|
29
+ if is_rdf_root?(n)
30
+ n.each_element {|el|
31
+ parse_descriptions(el)
32
+ }
33
+ end
34
+ }
35
+ end
36
+ end
37
+
38
+ private
39
+ def is_rdf_root? (node)
40
+ #TODO: clean this method up to make it more like Ruby and less like retarded Java
41
+ if node.name == "RDF"
42
+ if !node.namespace.nil? && node.namespace_node.href == @@syntax_base
43
+ return true
44
+ end
45
+ else
46
+ return false
47
+ end
48
+ end
49
+
50
+ def parse_descriptions(el, subject=nil)
51
+ # subject
52
+ subject = parse_subject(el) if subject.nil?
53
+ # class and container classes
54
+ # following commented out - if we decide that special Container handling is required, we can do it here.
55
+ # until then, the procedure I'm using is simple: checking for rdf:li elements when handling children
56
+ # case [el.namespace_node.href, el.name]
57
+ # when [@@syntax_base, "Bag"]
58
+ # when [@@syntax_base, "Seq"]
59
+ # when [@@syntax_base, "Alt"]
60
+ # when [@@syntax_base, "Description"]
61
+ # #when [@@syntax_base, "Statement"]
62
+ # #when [@@syntax_base, "Container"] - from my reading of RDFS 1.0 (2004)
63
+ # #§5.1.1, we should not expect to find Containers inside public documents.
64
+ # else
65
+ # @graph.add_triple(subject, @@rdf_type, url_helper(el.name, el.namespace_node.href, el.base))
66
+ # end
67
+ # If we ever decide to do special handling for OWL, here's where we can shove it. If. --tom
68
+ unless el.name == "Description" && el.namespace_node.href == @@syntax_base
69
+ @graph.add_triple(subject, @@rdf_type, url_helper(el.name, el.namespace_node.href, el.base))
70
+ end
71
+
72
+ # read each attribute that's not in @@syntax_base
73
+ el.attributes.each { |att|
74
+ @graph.add_triple(subject, url_helper(att.name, att.ns.href, el.base), att.value) unless att.ns.href == @@syntax_base
75
+ }
76
+ li_counter = 0 # this will increase for each li we iterate through
77
+ el.each_element {|child|
78
+ predicate = url_helper(child.name, child.namespace_node.href, child.base)
79
+ if predicate.to_s == @@syntax_base + "li"
80
+ li_counter += 1
81
+ predicate = Addressable::URI.parse(predicate.to_s)
82
+ predicate.fragment = "_#{li_counter.to_s}"
83
+ predicate = predicate.to_s
84
+ end
85
+ object = child.content
86
+ if el.attributes.get_attribute_ns(@@syntax_base, "nodeID")
87
+ @graph.add_triple(subject, predicate, forge_bnode_from_string(child.attributes.get_attribute_ns(@@syntax_base, "nodeID").value))
88
+ elsif child.attributes.get_attribute_ns(@@syntax_base, "resource")
89
+ @graph.add_triple(subject, predicate, URIRef.new(base_helper(child.attributes.get_attribute_ns(@@syntax_base, "resource").value, child.base).to_s))
90
+ end
91
+ child.each {|contents|
92
+ if contents.text? and contents.content.strip.length != 0
93
+ object = contents.content
94
+ @graph.add_triple(subject, predicate, object)
95
+ end
96
+ }
97
+ child.each_element {|cel|
98
+ object = parse_subject(cel)
99
+ if child.attributes.get_attribute_ns(@@syntax_base, "parseType")
100
+ case child.attributes.get_attribute_ns(@@syntax_base, "parseType").value
101
+ when "XMLLiteral"
102
+ object = Literal.typed(cel.namespaced_to_s, "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral")
103
+ @graph.add_triple(subject, predicate, object)
104
+ when "Literal"
105
+ if smells_like_xml?(cel.namespaced_to_s)
106
+ object = Literal.typed(cel.to_s, "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral")
107
+ @graph.add_triple(subject, predicate, object)
108
+ else
109
+ object = cel.to_s
110
+ @graph.add_triple(subject, predicate, object)
111
+ end
112
+ when "Resource"
113
+ object = BNode.new
114
+ @graph.add_triple(subject, predicate, object)
115
+ parse_descriptions(cel, object)
116
+ #when "Collection";
117
+ end
118
+ else
119
+ @graph.add_triple(subject, predicate, object)
120
+ parse_descriptions(cel)
121
+ end
122
+ }
123
+
124
+ # reification
125
+ if child.attributes.get_attribute_ns(@@syntax_base, "ID")
126
+ if id_check?(child.attributes.get_attribute_ns(@@syntax_base, "ID").value)
127
+ rsubject = url_helper("#" + child.attributes.get_attribute_ns(@@syntax_base, "ID").value, child.base)
128
+ @graph.add_triple(rsubject, URIRef.new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), URIRef.new("http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement"))
129
+ @graph.add_triple(rsubject, URIRef.new("http://www.w3.org/1999/02/22-rdf-syntax-ns#subject"), subject)
130
+ @graph.add_triple(rsubject, URIRef.new("http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate"), predicate)
131
+ @graph.add_triple(rsubject, URIRef.new("http://www.w3.org/1999/02/22-rdf-syntax-ns#object"), object)
132
+ else
133
+ raise
134
+ end
135
+ end
136
+
137
+ }
138
+
139
+ end
140
+
141
+ private
142
+ def fail_check(el)
143
+ if el.attributes.get_attribute_ns(@@syntax_base, "aboutEach")
144
+ raise Reddy::AboutEachException
145
+ end
146
+ if el.attributes.get_attribute_ns(@@syntax_base, "aboutEachPrefix")
147
+ raise Reddy::AboutEachException
148
+ end
149
+ if el.attributes.get_attribute_ns(@@syntax_base, "bagID")
150
+ raise "Bad BagID" unless el.attributes.get_attribute_ns(@@syntax_base, "bagID").value =~ /^[a-zA-Z_][a-zA-Z0-9]*$/
151
+ end
152
+ end
153
+
154
+ def parse_subject(el)
155
+ fail_check(el)
156
+
157
+ if el.attributes.get_attribute_ns(@@syntax_base, "about")
158
+ #debugger if el.attributes.get_attribute_ns(@@syntax_base, "about").value =~ /artist$/
159
+ return URIRef.new(base_helper(el.attributes.get_attribute_ns(@@syntax_base, "about").value, el.base).to_s)
160
+ elsif el.attributes.get_attribute_ns(@@syntax_base, "ID")
161
+ id = el.attributes.get_attribute_ns(@@syntax_base, "ID")
162
+ if id_check?(id.value)
163
+ return url_helper("#" + id.value, "", el.base)
164
+ else
165
+ raise
166
+ end
167
+ elsif el.attributes.get_attribute_ns(@@syntax_base, "nodeID")
168
+ return BNode.new(el.attributes.get_attribute_ns(@@syntax_base, "nodeID").value)
169
+ else
170
+ return BNode.new
171
+ end
172
+ subject = nil
173
+ el.attributes.each_attribute do |att|
174
+ uri = url_helper(att.namespace + att.name).to_s
175
+ value = att.to_s
176
+ if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#bagID"
177
+ raise
178
+ if name =~ /^[a-zA-Z_][a-zA-Z0-9]*$/
179
+ # TODO: do something intelligent with the bagID
180
+ else
181
+ raise
182
+ end
183
+ end
184
+
185
+ if uri == @@syntax_base + "#resource" || uri == @@syntax_base + "#about" #specified resource
186
+ subject = URIRef.new(base_helper(value, el.base))
187
+ end
188
+
189
+ if uri.to_s == @@syntax_base + "#nodeID" #BNode with ID
190
+ # we have a BNode with an identifier. First, we need to do syntax checking.
191
+ if value =~ /^[a-zA-Z_][a-zA-Z0-9]*$/
192
+ # now we check to see if the graph has the value
193
+ return forge_bnode_from_string(value)
194
+ end
195
+ end
196
+ end
197
+
198
+ return subject
199
+ end
200
+
201
+ def forge_bnode_from_string(value)
202
+ if @graph.has_bnode_identifier?(value)
203
+ # if so, pull it in - no need to recreate objects.
204
+ subject = @graph.get_bnode_by_identifier(value)
205
+ else
206
+ # if not, create a new one.
207
+ subject = BNode.new(value)
208
+ end
209
+
210
+ return subject
211
+ end
212
+
213
+ def id_check?(id)
214
+ !(!(id =~ /^[a-zA-Z_]\w*$/))
215
+ end
216
+
217
+ protected
218
+
219
+ def smells_like_xml?(str)
220
+ !(!(str =~ /xmlns/))
221
+ end
222
+
223
+ def base_helper(uri, base = nil)
224
+ uri = Addressable::URI.parse(uri)
225
+ if uri.relative?
226
+ if !base.nil?
227
+ uri = Addressable::URI.parse(base)
228
+ elsif !@uri.nil?
229
+ uri = Addressable::URI.parse(@uri) + uri
230
+ end
231
+ end
232
+ #debugger if @uri.to_s =~ /bbc\.co\.uk/
233
+ return uri.to_s
234
+ end
235
+
236
+ def url_helper(name, ns, base = nil)
237
+ if ns != "" and !ns.nil?
238
+ if ns.to_s.split("")[-1] == "#"
239
+ a = Addressable::URI.parse(ns) + Addressable::URI.parse("#" + name)
240
+ else
241
+ a = Addressable::URI.parse(ns) + Addressable::URI.parse(name)
242
+ end
243
+ else
244
+ a = Addressable::URI.parse(name)
245
+ end
246
+ if a.relative?
247
+ a = base_helper(a.to_s, base)
248
+ end
249
+
250
+ return URIRef.new(a.to_s)
251
+ end
252
+
253
+ end
254
+ end
@@ -0,0 +1,97 @@
1
+ require 'rexml/document'
2
+
3
+ # @ignore
4
+ # def subdocument_writer(el)
5
+ # el.prefixes.each { |ns|
6
+ # el.add_attribute('xmlns:' + ns, el.namespaces[ns].to_s)
7
+ # }
8
+ # return el.to_s
9
+ # end
10
+
11
+ class REXML::Element
12
+ public
13
+
14
+ ##
15
+ # Tells you whether or not an element has a set xml:lang.
16
+ #
17
+ # @author Tom Morris
18
+ def lang?
19
+ self.lang.nil? ? false : true
20
+ end
21
+
22
+ ##
23
+ # Tells you what the set xml:lang is for an element.
24
+ #
25
+ # ==== Returns
26
+ # @return [String] The URI of the xml:lang.
27
+ #
28
+ # @author Tom Morris
29
+ def lang
30
+ if self.attributes['xml:lang']
31
+ return self.attributes['xml:lang'].to_s
32
+ elsif self.parent != nil
33
+ return self.parent.lang
34
+ else
35
+ return nil
36
+ end
37
+ end
38
+
39
+ ##
40
+ # Tells you whether or not an element has a set xml:base.
41
+ #
42
+ # @author Tom Morris
43
+ def base?
44
+ if self.base != nil
45
+ true
46
+ else
47
+ false
48
+ end
49
+ end
50
+
51
+ ##
52
+ # Tells you what the set xml:lang is for an element.
53
+ #
54
+ # ==== Returns
55
+ # @return [String] The URI of the xml:base.
56
+ #
57
+ # @author Tom Morris
58
+ def base
59
+ if self.attributes['xml:base']
60
+ return self.attributes['xml:base'].to_s
61
+ elsif self.parent != nil
62
+ return self.parent.base
63
+ else
64
+ return nil
65
+ end
66
+ end
67
+
68
+ ##
69
+ # Allows you to write out an XML representation of a particular element and it's children, fixing namespace issues.
70
+ #
71
+ # ==== Returns
72
+ # @return [String] The XML of the element and it's children.
73
+ #
74
+ # @author Tom Morris
75
+ def write_reddy(excl=[])
76
+ # TODO: add optional list argument of excluded namespaces
77
+ self.prefixes.each { |ns|
78
+ self.add_attribute('xmlns:' + ns, self.namespaces[ns].to_s) unless excl.include? self.namespaces[ns]
79
+ }
80
+ self.support_write_recursive(self.namespaces, self)
81
+ return self.to_s
82
+ end
83
+
84
+ protected
85
+ def support_write_recursive(array, el)
86
+ el.each_element { |e|
87
+ unless array.has_key?(e.prefix) && array.has_value?(e.namespace)
88
+ if e.prefix != ""
89
+ e.add_attribute('xmlns:' + e.prefix, e.namespace)
90
+ else
91
+ e.add_attribute('xmlns', e.namespace)
92
+ end
93
+ end
94
+ self.support_write_recursive(array, e)
95
+ }
96
+ end
97
+ end