rdf_context 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -379,6 +379,10 @@ module RdfContext
379
379
  end
380
380
  end
381
381
 
382
+ def <=>(other)
383
+ self.to_s <=> other.to_s
384
+ end
385
+
382
386
  def hash
383
387
  [@contents, @encoding, @lang].hash
384
388
  end
@@ -425,10 +429,11 @@ module RdfContext
425
429
  encoding.xml_args(@contents, @lang)
426
430
  end
427
431
 
432
+ def untyped?; encoding == Encoding.the_null_encoding; end
433
+ def typed?; encoding != Encoding.the_null_encoding; end
434
+
428
435
  # Is this an XMLLiteral?
429
- def xmlliteral?
430
- encoding.is_a?(XMLLiteral)
431
- end
436
+ def xmlliteral?; encoding == Encoding.xmlliteral; end
432
437
 
433
438
  # Output literal contents as a string
434
439
  def to_s
@@ -28,7 +28,7 @@ module RdfContext
28
28
  # @param [String] uri:: the URI of the document
29
29
  # @param [Hash] options:: Options from
30
30
  # <em>options[:debug]</em>:: Array to place debug messages
31
- # <em>options[:type]</em>:: One of _rdfxml_, _html_, or _n3_
31
+ # <em>options[:type]</em>:: One of _rdfxml_, _html_, or _n3_ (among others)
32
32
  # <em>options[:strict]</em>:: Raise Error if true, continue with lax parsing, otherwise
33
33
  # @return [Graph]:: Returns the graph containing parsed triples
34
34
  # @raise [Error]:: Raises RdfError if _strict_
@@ -62,9 +62,9 @@ module RdfContext
62
62
 
63
63
  # Create a delegate of a specific parser class
64
64
  @delegate ||= case options[:type].to_s
65
- when "n3", "ntriples", "turtle" then N3Parser.new(options)
66
- when "rdfa", "html", "xhtml" then RdfaParser.new(options)
67
- when "xml", "rdf", "rdfxml" then RdfXmlParser.new(options)
65
+ when "n3", "ntriples", "turtle", "ttl", "n3", "notation3" then N3Parser.new(options)
66
+ when "rdfa", "html", "xhtml" then RdfaParser.new(options)
67
+ when "xml", "rdf", "rdfxml" then RdfXmlParser.new(options)
68
68
  else
69
69
  RdfXmlParser.new(options)
70
70
  # raise ParserException.new("type option must be one of :rdfxml, :html, or :n3")
@@ -0,0 +1,26 @@
1
+ require File.join(File.dirname(__FILE__), '..', 'uriref')
2
+
3
+ module RdfContext
4
+ # Abstract serializer
5
+ class AbstractSerializer
6
+ attr_accessor :graph, :base
7
+
8
+ def initialize(graph)
9
+ @graph = graph
10
+ @base = nil
11
+ end
12
+
13
+ # Serialize the graph
14
+ #
15
+ # @param [IO, StreamIO] stream:: Stream in which to place serialized graph
16
+ # @param [Hash] options:: Options for parser
17
+ # <em>options[:base]</em>:: Base URI of graph, used to shorting URI references
18
+ def serialize(stream, options = {})
19
+ end
20
+
21
+ def relativize(uri)
22
+ uri = uri.to_s
23
+ self.base ? uri.sub(/^#{self.base}/, "") : uri
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,12 @@
1
+ require File.join(File.dirname(__FILE__), 'abstract_serializer')
2
+
3
+ module RdfContext
4
+ # Serialize RDF graphs in NTriples format
5
+ class NTSerializer < AbstractSerializer
6
+ def serialize(stream, base = nil)
7
+ @graph.triples.collect do |t|
8
+ stream.write(t.to_ntriples + "\n")
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,140 @@
1
+ require File.join(File.dirname(__FILE__), 'abstract_serializer')
2
+ require File.join(File.dirname(__FILE__), '..', 'bnode')
3
+ require File.join(File.dirname(__FILE__), '..', 'literal')
4
+
5
+ module RdfContext
6
+ # Recursive serializer
7
+ class RecursiveSerializer < AbstractSerializer
8
+ MAX_DEPTH = 10
9
+ INDENT_STRING = " "
10
+
11
+ def initialize(graph)
12
+ super(graph)
13
+ @stream = nil
14
+ self.reset
15
+ end
16
+
17
+ def top_classes; [RDFS_NS.Class]; end
18
+ def predicate_order; [RDF_TYPE, RDFS_NS.label, DC_NS.title]; end
19
+
20
+ def is_done?(subject)
21
+ @serialized.include?(subject)
22
+ end
23
+
24
+ # Mark a subject as done.
25
+ def subject_done(subject)
26
+ @serialized[subject] = true
27
+ end
28
+
29
+ def order_subjects
30
+ seen = {}
31
+ subjects = []
32
+
33
+ top_classes.each do |class_uri|
34
+ graph.triples(Triple.new(nil, RDF_TYPE, class_uri)).map {|t| t.subject}.sort.uniq.each do |subject|
35
+ #puts "order_subjects: #{subject.inspect}"
36
+ subjects << subject
37
+ seen[subject] = @top_levels[subject] = true
38
+ end
39
+ end
40
+
41
+ # Sort subjects by resources over bnodes, ref_counts and the subject URI itself
42
+ recursable = @subjects.keys.
43
+ select {|s| !seen.include?(s)}.
44
+ map {|r| [r.is_a?(BNode) ? 1 : 0, ref_count(r), r]}.
45
+ sort
46
+
47
+ subjects += recursable.map{|r| r.last}
48
+ end
49
+
50
+ def preprocess
51
+ @graph.triples.each {|t| preprocess_triple(t)}
52
+ end
53
+
54
+ def preprocess_triple(triple)
55
+ #puts "preprocess: #{triple.inspect}"
56
+ references = ref_count(triple.object) + 1
57
+ @references[triple.object] = references
58
+ @subjects[triple.subject] = true
59
+ end
60
+
61
+ # Return the number of times this node has been referenced in the object position
62
+ def ref_count(node)
63
+ @references.fetch(node, 0)
64
+ end
65
+
66
+ # Return a QName for the URI, or nil. Adds namespace of QName to defined namespaces
67
+ def get_qname(uri)
68
+ if uri.is_a?(URIRef)
69
+ begin
70
+ qn = @graph.qname(uri)
71
+ rescue RdfException
72
+ return false # no namespace
73
+ end
74
+ # Local parts with . will mess up serialization
75
+ return false if qn.index('.')
76
+
77
+ add_namespace(uri.namespace)
78
+ return qn
79
+ end
80
+ end
81
+
82
+ def add_namespace(ns)
83
+ @namespaces[ns.prefix.to_s] = ns
84
+ end
85
+
86
+ # URI -> Namespace bindings (similar to graph) for looking up qnames
87
+ def uri_binding
88
+ @uri_binding ||= @namespaces.values.inject({}) {|hash, ns| hash[ns.uri.to_s] = ns; hash}
89
+ end
90
+
91
+ def reset
92
+ @depth = 0
93
+ @lists = {}
94
+ @namespaces = {}
95
+ @references = {}
96
+ @serialized = {}
97
+ @subjects = {}
98
+ @top_levels = {}
99
+ end
100
+
101
+ # Take a hash from predicate uris to lists of values.
102
+ # Sort the lists of values. Return a sorted list of properties.
103
+ def sort_properties(properties)
104
+ properties.keys.each do |k|
105
+ properties[k] = properties[k].sort do |a, b|
106
+ a_li = a.is_a?(URIRef) && a.short_name =~ /^_\d+$/ ? a.to_i : a.to_s
107
+ b_li = b.is_a?(URIRef) && b.short_name =~ /^_\d+$/ ? b.to_i : b.to_s
108
+
109
+ a_li <=> b_li
110
+ end
111
+ end
112
+
113
+ # Make sorted list of properties
114
+ prop_list = []
115
+
116
+ predicate_order.each do |prop|
117
+ next unless properties[prop]
118
+ prop_list << prop.to_s
119
+ end
120
+
121
+ properties.keys.sort.each do |prop|
122
+ next if prop_list.include?(prop.to_s)
123
+ prop_list << prop.to_s
124
+ end
125
+
126
+ puts "sort_properties: #{prop_list.to_sentence}" if $DEBUG
127
+ prop_list
128
+ end
129
+
130
+ # Returns indent string multiplied by the depth
131
+ def indent(modifier = 0)
132
+ INDENT_STRING * (@depth + modifier)
133
+ end
134
+
135
+ # Write text
136
+ def write(text)
137
+ @stream.write(text)
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,219 @@
1
+ require File.join(File.dirname(__FILE__), 'recursive_serializer')
2
+
3
+ module RdfContext
4
+ # Abstract serializer
5
+ class TurtleSerializer < RecursiveSerializer
6
+ SUBJECT = 0
7
+ VERB = 1
8
+ OBJECT = 2
9
+
10
+ def reset
11
+ super
12
+ @shortNames = {}
13
+ @started = false
14
+ end
15
+
16
+ def get_qname(uri)
17
+ if uri.is_a?(URIRef)
18
+ md = uri.to_s.match(/^#{@base}(.*)$/) if @base
19
+ return "<#{md[1]}>" if md
20
+
21
+ super(uri)
22
+ end
23
+ end
24
+
25
+ def preprocess_triple(triple)
26
+ super
27
+
28
+ # Pre-fetch qnames, to fill namespaces
29
+ get_qname(triple.subject)
30
+ get_qname(triple.predicate)
31
+ get_qname(triple.object)
32
+
33
+ @references[triple.predicate] = ref_count(triple.predicate) + 1
34
+ end
35
+
36
+ def label(node)
37
+ get_qname(node) || node.to_n3
38
+ end
39
+
40
+ def start_document
41
+ @started = true
42
+
43
+ write("#{indent}@base <#{@base}> .\n") if @base
44
+
45
+ ns_list = @namespaces.values.sort_by {|ns| ns.prefix}
46
+ unless ns_list.empty?
47
+ ns_str = ns_list.map do |ns|
48
+ "#{indent}@prefix #{ns.prefix}: <#{ns.uri}> ."
49
+ end.join("\n") + "\n"
50
+ write(ns_str)
51
+ end
52
+ end
53
+
54
+ def end_document; end
55
+
56
+ # Checks if l is a valid RDF list, i.e. no nodes have other properties.
57
+ def is_valid_list(l)
58
+ props = @graph.properties(l)
59
+ #puts "is_valid_list: #{props.inspect}" if $DEBUG
60
+ return false unless props.has_key?(RDF_NS.first.to_s) || l == RDF_NS.nil
61
+ while l && l != RDF_NS.nil do
62
+ #puts "is_valid_list(length): #{props.length}" if $DEBUG
63
+ return false unless props.has_key?(RDF_NS.first.to_s) && props.has_key?(RDF_NS.rest.to_s)
64
+ n = props[RDF_NS.rest.to_s]
65
+ #puts "is_valid_list(n): #{n.inspect}" if $DEBUG
66
+ return false unless n.is_a?(Array) && n.length == 1
67
+ l = n.first
68
+ props = @graph.properties(l)
69
+ end
70
+ #puts "is_valid_list: valid" if $DEBUG
71
+ true
72
+ end
73
+
74
+ def do_list(l)
75
+ puts "do_list: #{l.inspect}" if $DEBUG
76
+ position = SUBJECT
77
+ while l do
78
+ p = @graph.properties(l)
79
+ item = p.fetch(RDF_NS.first.to_s, []).first
80
+ if item
81
+ path(item, position)
82
+ subject_done(l)
83
+ position = OBJECT
84
+ end
85
+ l = p.fetch(RDF_NS.rest.to_s, []).first
86
+ end
87
+ end
88
+
89
+ def p_list(node, position)
90
+ return false if !is_valid_list(node)
91
+ #puts "p_list: #{node.inspect}, #{position}" if $DEBUG
92
+
93
+ write(position == SUBJECT ? "(" : " (")
94
+ @depth += 2
95
+ do_list(node)
96
+ @depth -= 2
97
+ write(')')
98
+ end
99
+
100
+ def p_squared?(node, position)
101
+ node.is_a?(BNode) &&
102
+ !@serialized.has_key?(node) &&
103
+ ref_count(node) <= 1
104
+ end
105
+
106
+ def p_squared(node, position)
107
+ return false unless p_squared?(node, position)
108
+
109
+ #puts "p_squared: #{node.inspect}, #{position}" if $DEBUG
110
+ subject_done(node)
111
+ write(position == SUBJECT ? '[' : ' [')
112
+ @depth += 2
113
+ predicate_list(node)
114
+ @depth -= 2
115
+ write(']')
116
+
117
+ true
118
+ end
119
+
120
+ def p_default(node, position)
121
+ #puts "p_default: #{node.inspect}, #{position}" if $DEBUG
122
+ l = (position == SUBJECT ? "" : " ") + label(node)
123
+ write(l)
124
+ end
125
+
126
+ def path(node, position)
127
+ puts "path: #{node.inspect}, pos: #{position}, []: #{is_valid_list(node)}, p2?: #{p_squared?(node, position)}, rc: #{ref_count(node)}" if $DEBUG
128
+ raise RdfException, "Cannot serialize node '#{node}'" unless p_list(node, position) || p_squared(node, position) || p_default(node, position)
129
+ end
130
+
131
+ def verb(node)
132
+ puts "verb: #{node.inspect}" if $DEBUG
133
+ if node == RDF_TYPE
134
+ write(" a")
135
+ else
136
+ path(node, VERB)
137
+ end
138
+ end
139
+
140
+ def object_list(objects)
141
+ puts "object_list: #{objects.inspect}" if $DEBUG
142
+ return if objects.empty?
143
+
144
+ objects.each_with_index do |obj, i|
145
+ write(",\n#{indent(2)}") if i > 0
146
+ path(obj, OBJECT)
147
+ end
148
+ end
149
+
150
+ def predicate_list(subject)
151
+ properties = @graph.properties(subject)
152
+ prop_list = sort_properties(properties) - [RDF_NS.first.to_s, RDF_NS.rest.to_s]
153
+ puts "predicate_list: #{prop_list.inspect}" if $DEBUG
154
+ return if prop_list.empty?
155
+
156
+ prop_list.each_with_index do |prop, i|
157
+ write(";\n#{indent(2)}") if i > 0
158
+ verb(URIRef.new(prop))
159
+ object_list(properties[prop])
160
+ end
161
+ end
162
+
163
+ def s_squared?(subject)
164
+ ref_count(subject) == 0 && subject.is_a?(BNode) && !is_valid_list(subject)
165
+ end
166
+
167
+ def s_squared(subject)
168
+ return false unless s_squared?(subject)
169
+
170
+ write("\n#{indent} [")
171
+ @depth += 1
172
+ predicate_list(subject)
173
+ @depth -= 1
174
+ write("] .")
175
+ true
176
+ end
177
+
178
+ def s_default(subject)
179
+ write("\n#{indent}")
180
+ path(subject, SUBJECT)
181
+ predicate_list(subject)
182
+ write(" .")
183
+ true
184
+ end
185
+
186
+ def statement(subject)
187
+ puts "statement: #{subject.inspect}, s2?: #{s_squared(subject)}" if $DEBUG
188
+ subject_done(subject)
189
+ s_squared(subject) || s_default(subject)
190
+ end
191
+
192
+ # Serialize the graph
193
+ #
194
+ # @param [IO, StreamIO] stream:: Stream in which to place serialized graph
195
+ # @param [Hash] options:: Options for parser
196
+ # <em>options[:base]</em>:: Base URI of graph, used to shorting URI references
197
+ def serialize(stream, options = {})
198
+ puts "\nserialize: #{@graph.inspect}" if $DEBUG
199
+ reset
200
+ @stream = stream
201
+ @base = options[:base]
202
+
203
+ @graph.bind(RDF_NS)
204
+ @graph.bind(RDFS_NS)
205
+
206
+ preprocess
207
+ start_document
208
+
209
+ order_subjects.each do |subject|
210
+ #puts "subj: #{subject.inspect}"
211
+ unless is_done?(subject)
212
+ statement(subject)
213
+ end
214
+ end
215
+
216
+ end_document
217
+ end
218
+ end
219
+ end