reddy 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. data/HACKNOTES +2 -0
  2. data/History.txt +3 -0
  3. data/Manifest.txt +80 -0
  4. data/README.rdoc +48 -0
  5. data/README.txt +62 -0
  6. data/Rakefile +67 -0
  7. data/lib/reddy.rb +8 -0
  8. data/lib/reddy/bnode.rb +70 -0
  9. data/lib/reddy/exceptions/about_each_exception.rb +2 -0
  10. data/lib/reddy/exceptions/uri_relative_exception.rb +2 -0
  11. data/lib/reddy/graph.rb +182 -0
  12. data/lib/reddy/libxml_hacks.rb +6 -0
  13. data/lib/reddy/literal.rb +211 -0
  14. data/lib/reddy/n3_grammar.treetop +129 -0
  15. data/lib/reddy/n3parser.rb +145 -0
  16. data/lib/reddy/namespace.rb +73 -0
  17. data/lib/reddy/rdfaparser.rb +63 -0
  18. data/lib/reddy/rdfxmlparser.rb +254 -0
  19. data/lib/reddy/rexml_hacks.rb +97 -0
  20. data/lib/reddy/triple.rb +95 -0
  21. data/lib/reddy/uriref.rb +66 -0
  22. data/reddy.gemspec +50 -0
  23. data/spec/bnode_spec.rb +29 -0
  24. data/spec/graph_spec.rb +138 -0
  25. data/spec/literal_spec.rb +142 -0
  26. data/spec/n3parser_spec.rb +86 -0
  27. data/spec/namespaces_spec.rb +44 -0
  28. data/spec/parser_spec.rb +391 -0
  29. data/spec/rdfa_parser_spec.rb +28 -0
  30. data/spec/rexml_hacks_spec.rb +99 -0
  31. data/spec/triple_spec.rb +108 -0
  32. data/spec/uriref_spec.rb +96 -0
  33. data/test/longtests_spec.rb +25 -0
  34. data/test/n3_tests/lcsh/sh85062913.n3 +41 -0
  35. data/test/n3_tests/lcsh/sh85062913.nt +21 -0
  36. data/test/n3_tests/lcsh/sh85082139.n3 +157 -0
  37. data/test/n3_tests/lcsh/sh85082139.nt +79 -0
  38. data/test/n3_tests/lcsh/sh85118553.n3 +123 -0
  39. data/test/n3_tests/lcsh/sh85118553.nt +63 -0
  40. data/test/n3_tests/misc/on_now-01.n3 +30 -0
  41. data/test/n3_tests/misc/on_now-01.nt +15 -0
  42. data/test/n3_tests/n3p/simple-01.n3 +1 -0
  43. data/test/n3_tests/n3p/simple-01.nt +0 -0
  44. data/test/n3_tests/n3p/simple-02.n3 +4 -0
  45. data/test/n3_tests/n3p/simple-02.nt +0 -0
  46. data/test/n3_tests/n3p/simple-03.n3 +5 -0
  47. data/test/n3_tests/n3p/simple-03.nt +1 -0
  48. data/test/n3_tests/n3p/simple-04.n3 +6 -0
  49. data/test/n3_tests/n3p/simple-04.nt +3 -0
  50. data/test/n3_tests/n3p/simple-05.n3 +7 -0
  51. data/test/n3_tests/n3p/simple-05.nt +2 -0
  52. data/test/n3_tests/n3p/simple-06.n3 +6 -0
  53. data/test/n3_tests/n3p/simple-06.nt +4 -0
  54. data/test/n3_tests/n3p/simple-07.n3 +7 -0
  55. data/test/n3_tests/n3p/simple-07.nt +6 -0
  56. data/test/perf_test/test.rb +11 -0
  57. data/test/perf_test/tommorris.rdf +2267 -0
  58. data/test/rdf_tests/cc197bad-dc9c-440d-a5b5-d52ba2e14234.nt +24 -0
  59. data/test/rdf_tests/cc197bad-dc9c-440d-a5b5-d52ba2e14234.rdf +46 -0
  60. data/test/rdf_tests/tm_001.nt +1 -0
  61. data/test/rdf_tests/tm_001.rdf +7 -0
  62. data/test/rdf_tests/xml-literal-mixed.nt +7 -0
  63. data/test/rdf_tests/xml-literal-mixed.rdf +15 -0
  64. data/test/ruby_fundamentals.spec.rb +17 -0
  65. data/test/test_helper.rb +2 -0
  66. data/test/test_reddy.rb +11 -0
  67. data/test/test_uris.rb +13 -0
  68. data/test/xml.rdf +6 -0
  69. metadata +198 -0
@@ -0,0 +1,73 @@
1
+ module Reddy
2
+ class Namespace
3
+ attr_accessor :short, :uri, :fragment
4
+
5
+ ##
6
+ # Creates a new namespace given a URI and the short name.
7
+ #
8
+ # ==== Example
9
+ # Namespace.new("http://xmlns.com/foaf/0.1/", "foaf") # => returns a new Foaf namespace
10
+ #
11
+ # @param [String] uri the URI of the namespace
12
+ # @param [String] short the short name of the namespace
13
+ # @param [Boolean] fragment are the identifiers on this resource fragment identifiers? (e.g. '#') Defaults to false.
14
+ #
15
+ # ==== Returns
16
+ # @return [Namespace] The newly created namespace.
17
+ #
18
+ # @raise [Error] Checks validity of the desired shortname and raises if it is incorrect.
19
+ # @author Tom Morris, Pius Uzamere
20
+
21
+ def initialize(uri, short, fragment = false)
22
+ @uri = uri
23
+ @fragment = fragment
24
+ if shortname_valid?(short)
25
+ @short = short
26
+ else
27
+ raise
28
+ end
29
+ end
30
+
31
+ ##
32
+ # Allows the construction of arbitrary URIs on the namespace.
33
+ #
34
+ # ==== Example
35
+ # foaf = Namespace.new("http://xmlns.com/foaf/0.1/", "foaf"); foaf.knows # => returns a new URIRef with URI "http://xmlns.com/foaf/0.1/knows"
36
+ # foaf = Namespace.new("http://xmlns.com/foaf/0.1/", "foaf", true); foaf.knows # => returns a new URIRef with URI "http://xmlns.com/foaf/0.1/#knows"
37
+ #
38
+ # @param [String] uri the URI of the namespace
39
+ # @param [String] short the short name of the namespace
40
+ # @param [Boolean] fragment are the identifiers on this resource fragment identifiers? (e.g. '#') Defaults to false.
41
+ #
42
+ # ==== Returns
43
+ # @return [URIRef] The newly created URIRegerence.
44
+ #
45
+ # @raise [Error] Checks validity of the desired shortname and raises if it is incorrect.
46
+ # @author Tom Morris, Pius Uzamere
47
+
48
+ def method_missing(methodname, *args)
49
+ unless fragment
50
+ URIRef.new(@uri + methodname.to_s)
51
+ else
52
+ URIRef.new(@uri + '#' + methodname.to_s)
53
+ end
54
+ end
55
+
56
+ def bind(graph)
57
+ if graph.class == Graph
58
+ graph.bind(self)
59
+ else
60
+ raise
61
+ end
62
+ end
63
+
64
+ private
65
+ def shortname_valid?(shortname)
66
+ if shortname =~ /[a-zA-Z_][a-zA-Z0-9_]+/
67
+ return true
68
+ else
69
+ return false
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,63 @@
1
+ module Reddy
2
+ class Reddy::RdfaParser
3
+ attr_accessor :xml, :uri, :graph
4
+
5
+ def initialize (str, uri)
6
+ @doc_string = str
7
+ @xml = REXML::Document.new(str)
8
+ @uri = uri
9
+ @graph = Graph.new
10
+ self.iterate(@xml.root.elements[2].elements[1].elements[1])
11
+ end
12
+
13
+ def parse_ns_curie(el, attname)
14
+ attstring = el.attributes[attname]
15
+ prefix = attstring.scan(/(.+):.+/).to_s
16
+ if el.namespaces[prefix]
17
+ namespace = el.namespaces[prefix]
18
+ else
19
+ raise "Namespace used in CURIE but not declared"
20
+ end
21
+ return namespace + attstring.scan(/.+:(.+)/).to_s
22
+ end
23
+
24
+ def iterate (el)
25
+
26
+ if el.attributes['about']
27
+ if el.attributes['about'] =~ /^http/
28
+ # deal with as absolute
29
+ subject = el.attributes['about'].to_s
30
+ else
31
+ # time to go xml:base sneakin'
32
+ if xml.base?
33
+ subject = Addressable::URI.parse(xml.base)
34
+ subject = subject + el.attributes['about']
35
+ else
36
+ subject = Addressable::URI.parse(@uri)
37
+ subject = subject + el.attributes['about']
38
+ end
39
+ end
40
+ else
41
+ subject = @uri
42
+ end
43
+
44
+ if el.attributes['property']
45
+ if el.attributes['property'] =~ /^http/
46
+ property = el.attributes['property']
47
+ else
48
+ # curie hunt!
49
+ property = self.parse_ns_curie(el, "property")
50
+ end
51
+ end
52
+
53
+ if el.attributes['content']
54
+ value = el.attributes['content']
55
+ else
56
+ value = el.text
57
+ end
58
+
59
+ @graph.add_triple subject.to_s, URIRef.new(property), value
60
+ end
61
+
62
+ end
63
+ end
@@ -0,0 +1,254 @@
1
+ #require 'ruby-debug'
2
+ require 'xml'
3
+ include Reddy
4
+
5
+ module Reddy
6
+ include LibXML
7
+
8
+ class RdfXmlParser
9
+
10
+ attr_accessor :xml, :graph
11
+ def initialize(xml_str, uri = nil)
12
+ @@syntax_base = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
13
+ @@rdf_type = @@syntax_base + "type"
14
+ @excl = ["http://www.w3.org/1999/02/22-rdf-syntax-ns#resource",
15
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#nodeID",
16
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#about",
17
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#ID"]
18
+ @uri = Addressable::URI.parse(uri).to_s unless uri.nil?
19
+ @graph = Reddy::Graph.new
20
+ @xml = LibXML::XML::Parser.string(xml_str).parse
21
+ @id_mapping = Hash.new
22
+ root = @xml.root
23
+ if is_rdf_root?(root)
24
+ root.each_element {|el|
25
+ parse_descriptions(el)
26
+ }
27
+ else
28
+ root.each_element {|n|
29
+ if is_rdf_root?(n)
30
+ n.each_element {|el|
31
+ parse_descriptions(el)
32
+ }
33
+ end
34
+ }
35
+ end
36
+ end
37
+
38
+ private
39
+ def is_rdf_root? (node)
40
+ #TODO: clean this method up to make it more like Ruby and less like retarded Java
41
+ if node.name == "RDF"
42
+ if !node.namespace.nil? && node.namespace_node.href == @@syntax_base
43
+ return true
44
+ end
45
+ else
46
+ return false
47
+ end
48
+ end
49
+
50
+ def parse_descriptions(el, subject=nil)
51
+ # subject
52
+ subject = parse_subject(el) if subject.nil?
53
+ # class and container classes
54
+ # following commented out - if we decide that special Container handling is required, we can do it here.
55
+ # until then, the procedure I'm using is simple: checking for rdf:li elements when handling children
56
+ # case [el.namespace_node.href, el.name]
57
+ # when [@@syntax_base, "Bag"]
58
+ # when [@@syntax_base, "Seq"]
59
+ # when [@@syntax_base, "Alt"]
60
+ # when [@@syntax_base, "Description"]
61
+ # #when [@@syntax_base, "Statement"]
62
+ # #when [@@syntax_base, "Container"] - from my reading of RDFS 1.0 (2004)
63
+ # #§5.1.1, we should not expect to find Containers inside public documents.
64
+ # else
65
+ # @graph.add_triple(subject, @@rdf_type, url_helper(el.name, el.namespace_node.href, el.base))
66
+ # end
67
+ # If we ever decide to do special handling for OWL, here's where we can shove it. If. --tom
68
+ unless el.name == "Description" && el.namespace_node.href == @@syntax_base
69
+ @graph.add_triple(subject, @@rdf_type, url_helper(el.name, el.namespace_node.href, el.base))
70
+ end
71
+
72
+ # read each attribute that's not in @@syntax_base
73
+ el.attributes.each { |att|
74
+ @graph.add_triple(subject, url_helper(att.name, att.ns.href, el.base), att.value) unless att.ns.href == @@syntax_base
75
+ }
76
+ li_counter = 0 # this will increase for each li we iterate through
77
+ el.each_element {|child|
78
+ predicate = url_helper(child.name, child.namespace_node.href, child.base)
79
+ if predicate.to_s == @@syntax_base + "li"
80
+ li_counter += 1
81
+ predicate = Addressable::URI.parse(predicate.to_s)
82
+ predicate.fragment = "_#{li_counter.to_s}"
83
+ predicate = predicate.to_s
84
+ end
85
+ object = child.content
86
+ if el.attributes.get_attribute_ns(@@syntax_base, "nodeID")
87
+ @graph.add_triple(subject, predicate, forge_bnode_from_string(child.attributes.get_attribute_ns(@@syntax_base, "nodeID").value))
88
+ elsif child.attributes.get_attribute_ns(@@syntax_base, "resource")
89
+ @graph.add_triple(subject, predicate, URIRef.new(base_helper(child.attributes.get_attribute_ns(@@syntax_base, "resource").value, child.base).to_s))
90
+ end
91
+ child.each {|contents|
92
+ if contents.text? and contents.content.strip.length != 0
93
+ object = contents.content
94
+ @graph.add_triple(subject, predicate, object)
95
+ end
96
+ }
97
+ child.each_element {|cel|
98
+ object = parse_subject(cel)
99
+ if child.attributes.get_attribute_ns(@@syntax_base, "parseType")
100
+ case child.attributes.get_attribute_ns(@@syntax_base, "parseType").value
101
+ when "XMLLiteral"
102
+ object = Literal.typed(cel.namespaced_to_s, "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral")
103
+ @graph.add_triple(subject, predicate, object)
104
+ when "Literal"
105
+ if smells_like_xml?(cel.namespaced_to_s)
106
+ object = Literal.typed(cel.to_s, "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral")
107
+ @graph.add_triple(subject, predicate, object)
108
+ else
109
+ object = cel.to_s
110
+ @graph.add_triple(subject, predicate, object)
111
+ end
112
+ when "Resource"
113
+ object = BNode.new
114
+ @graph.add_triple(subject, predicate, object)
115
+ parse_descriptions(cel, object)
116
+ #when "Collection";
117
+ end
118
+ else
119
+ @graph.add_triple(subject, predicate, object)
120
+ parse_descriptions(cel)
121
+ end
122
+ }
123
+
124
+ # reification
125
+ if child.attributes.get_attribute_ns(@@syntax_base, "ID")
126
+ if id_check?(child.attributes.get_attribute_ns(@@syntax_base, "ID").value)
127
+ rsubject = url_helper("#" + child.attributes.get_attribute_ns(@@syntax_base, "ID").value, child.base)
128
+ @graph.add_triple(rsubject, URIRef.new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), URIRef.new("http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement"))
129
+ @graph.add_triple(rsubject, URIRef.new("http://www.w3.org/1999/02/22-rdf-syntax-ns#subject"), subject)
130
+ @graph.add_triple(rsubject, URIRef.new("http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate"), predicate)
131
+ @graph.add_triple(rsubject, URIRef.new("http://www.w3.org/1999/02/22-rdf-syntax-ns#object"), object)
132
+ else
133
+ raise
134
+ end
135
+ end
136
+
137
+ }
138
+
139
+ end
140
+
141
+ private
142
+ def fail_check(el)
143
+ if el.attributes.get_attribute_ns(@@syntax_base, "aboutEach")
144
+ raise Reddy::AboutEachException
145
+ end
146
+ if el.attributes.get_attribute_ns(@@syntax_base, "aboutEachPrefix")
147
+ raise Reddy::AboutEachException
148
+ end
149
+ if el.attributes.get_attribute_ns(@@syntax_base, "bagID")
150
+ raise "Bad BagID" unless el.attributes.get_attribute_ns(@@syntax_base, "bagID").value =~ /^[a-zA-Z_][a-zA-Z0-9]*$/
151
+ end
152
+ end
153
+
154
+ def parse_subject(el)
155
+ fail_check(el)
156
+
157
+ if el.attributes.get_attribute_ns(@@syntax_base, "about")
158
+ #debugger if el.attributes.get_attribute_ns(@@syntax_base, "about").value =~ /artist$/
159
+ return URIRef.new(base_helper(el.attributes.get_attribute_ns(@@syntax_base, "about").value, el.base).to_s)
160
+ elsif el.attributes.get_attribute_ns(@@syntax_base, "ID")
161
+ id = el.attributes.get_attribute_ns(@@syntax_base, "ID")
162
+ if id_check?(id.value)
163
+ return url_helper("#" + id.value, "", el.base)
164
+ else
165
+ raise
166
+ end
167
+ elsif el.attributes.get_attribute_ns(@@syntax_base, "nodeID")
168
+ return BNode.new(el.attributes.get_attribute_ns(@@syntax_base, "nodeID").value)
169
+ else
170
+ return BNode.new
171
+ end
172
+ subject = nil
173
+ el.attributes.each_attribute do |att|
174
+ uri = url_helper(att.namespace + att.name).to_s
175
+ value = att.to_s
176
+ if uri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#bagID"
177
+ raise
178
+ if name =~ /^[a-zA-Z_][a-zA-Z0-9]*$/
179
+ # TODO: do something intelligent with the bagID
180
+ else
181
+ raise
182
+ end
183
+ end
184
+
185
+ if uri == @@syntax_base + "#resource" || uri == @@syntax_base + "#about" #specified resource
186
+ subject = URIRef.new(base_helper(value, el.base))
187
+ end
188
+
189
+ if uri.to_s == @@syntax_base + "#nodeID" #BNode with ID
190
+ # we have a BNode with an identifier. First, we need to do syntax checking.
191
+ if value =~ /^[a-zA-Z_][a-zA-Z0-9]*$/
192
+ # now we check to see if the graph has the value
193
+ return forge_bnode_from_string(value)
194
+ end
195
+ end
196
+ end
197
+
198
+ return subject
199
+ end
200
+
201
+ def forge_bnode_from_string(value)
202
+ if @graph.has_bnode_identifier?(value)
203
+ # if so, pull it in - no need to recreate objects.
204
+ subject = @graph.get_bnode_by_identifier(value)
205
+ else
206
+ # if not, create a new one.
207
+ subject = BNode.new(value)
208
+ end
209
+
210
+ return subject
211
+ end
212
+
213
+ def id_check?(id)
214
+ !(!(id =~ /^[a-zA-Z_]\w*$/))
215
+ end
216
+
217
+ protected
218
+
219
+ def smells_like_xml?(str)
220
+ !(!(str =~ /xmlns/))
221
+ end
222
+
223
+ def base_helper(uri, base = nil)
224
+ uri = Addressable::URI.parse(uri)
225
+ if uri.relative?
226
+ if !base.nil?
227
+ uri = Addressable::URI.parse(base)
228
+ elsif !@uri.nil?
229
+ uri = Addressable::URI.parse(@uri) + uri
230
+ end
231
+ end
232
+ #debugger if @uri.to_s =~ /bbc\.co\.uk/
233
+ return uri.to_s
234
+ end
235
+
236
+ def url_helper(name, ns, base = nil)
237
+ if ns != "" and !ns.nil?
238
+ if ns.to_s.split("")[-1] == "#"
239
+ a = Addressable::URI.parse(ns) + Addressable::URI.parse("#" + name)
240
+ else
241
+ a = Addressable::URI.parse(ns) + Addressable::URI.parse(name)
242
+ end
243
+ else
244
+ a = Addressable::URI.parse(name)
245
+ end
246
+ if a.relative?
247
+ a = base_helper(a.to_s, base)
248
+ end
249
+
250
+ return URIRef.new(a.to_s)
251
+ end
252
+
253
+ end
254
+ end
@@ -0,0 +1,97 @@
1
+ require 'rexml/document'
2
+
3
+ # @ignore
4
+ # def subdocument_writer(el)
5
+ # el.prefixes.each { |ns|
6
+ # el.add_attribute('xmlns:' + ns, el.namespaces[ns].to_s)
7
+ # }
8
+ # return el.to_s
9
+ # end
10
+
11
+ class REXML::Element
12
+ public
13
+
14
+ ##
15
+ # Tells you whether or not an element has a set xml:lang.
16
+ #
17
+ # @author Tom Morris
18
+ def lang?
19
+ self.lang.nil? ? false : true
20
+ end
21
+
22
+ ##
23
+ # Tells you what the set xml:lang is for an element.
24
+ #
25
+ # ==== Returns
26
+ # @return [String] The URI of the xml:lang.
27
+ #
28
+ # @author Tom Morris
29
+ def lang
30
+ if self.attributes['xml:lang']
31
+ return self.attributes['xml:lang'].to_s
32
+ elsif self.parent != nil
33
+ return self.parent.lang
34
+ else
35
+ return nil
36
+ end
37
+ end
38
+
39
+ ##
40
+ # Tells you whether or not an element has a set xml:base.
41
+ #
42
+ # @author Tom Morris
43
+ def base?
44
+ if self.base != nil
45
+ true
46
+ else
47
+ false
48
+ end
49
+ end
50
+
51
+ ##
52
+ # Tells you what the set xml:lang is for an element.
53
+ #
54
+ # ==== Returns
55
+ # @return [String] The URI of the xml:base.
56
+ #
57
+ # @author Tom Morris
58
+ def base
59
+ if self.attributes['xml:base']
60
+ return self.attributes['xml:base'].to_s
61
+ elsif self.parent != nil
62
+ return self.parent.base
63
+ else
64
+ return nil
65
+ end
66
+ end
67
+
68
+ ##
69
+ # Allows you to write out an XML representation of a particular element and it's children, fixing namespace issues.
70
+ #
71
+ # ==== Returns
72
+ # @return [String] The XML of the element and it's children.
73
+ #
74
+ # @author Tom Morris
75
+ def write_reddy(excl=[])
76
+ # TODO: add optional list argument of excluded namespaces
77
+ self.prefixes.each { |ns|
78
+ self.add_attribute('xmlns:' + ns, self.namespaces[ns].to_s) unless excl.include? self.namespaces[ns]
79
+ }
80
+ self.support_write_recursive(self.namespaces, self)
81
+ return self.to_s
82
+ end
83
+
84
+ protected
85
+ def support_write_recursive(array, el)
86
+ el.each_element { |e|
87
+ unless array.has_key?(e.prefix) && array.has_value?(e.namespace)
88
+ if e.prefix != ""
89
+ e.add_attribute('xmlns:' + e.prefix, e.namespace)
90
+ else
91
+ e.add_attribute('xmlns', e.namespace)
92
+ end
93
+ end
94
+ self.support_write_recursive(array, e)
95
+ }
96
+ end
97
+ end