rdf_context 0.5.0 → 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -379,6 +379,10 @@ module RdfContext
379
379
  end
380
380
  end
381
381
 
382
+ def <=>(other)
383
+ self.to_s <=> other.to_s
384
+ end
385
+
382
386
  def hash
383
387
  [@contents, @encoding, @lang].hash
384
388
  end
@@ -425,10 +429,11 @@ module RdfContext
425
429
  encoding.xml_args(@contents, @lang)
426
430
  end
427
431
 
432
+ def untyped?; encoding == Encoding.the_null_encoding; end
433
+ def typed?; encoding != Encoding.the_null_encoding; end
434
+
428
435
  # Is this an XMLLiteral?
429
- def xmlliteral?
430
- encoding.is_a?(XMLLiteral)
431
- end
436
+ def xmlliteral?; encoding == Encoding.xmlliteral; end
432
437
 
433
438
  # Output literal contents as a string
434
439
  def to_s
@@ -28,7 +28,7 @@ module RdfContext
28
28
  # @param [String] uri:: the URI of the document
29
29
  # @param [Hash] options:: Options from
30
30
  # <em>options[:debug]</em>:: Array to place debug messages
31
- # <em>options[:type]</em>:: One of _rdfxml_, _html_, or _n3_
31
+ # <em>options[:type]</em>:: One of _rdfxml_, _html_, or _n3_ (among others)
32
32
  # <em>options[:strict]</em>:: Raise Error if true, continue with lax parsing, otherwise
33
33
  # @return [Graph]:: Returns the graph containing parsed triples
34
34
  # @raise [Error]:: Raises RdfError if _strict_
@@ -62,9 +62,9 @@ module RdfContext
62
62
 
63
63
  # Create a delegate of a specific parser class
64
64
  @delegate ||= case options[:type].to_s
65
- when "n3", "ntriples", "turtle" then N3Parser.new(options)
66
- when "rdfa", "html", "xhtml" then RdfaParser.new(options)
67
- when "xml", "rdf", "rdfxml" then RdfXmlParser.new(options)
65
+ when "n3", "ntriples", "turtle", "ttl", "n3", "notation3" then N3Parser.new(options)
66
+ when "rdfa", "html", "xhtml" then RdfaParser.new(options)
67
+ when "xml", "rdf", "rdfxml" then RdfXmlParser.new(options)
68
68
  else
69
69
  RdfXmlParser.new(options)
70
70
  # raise ParserException.new("type option must be one of :rdfxml, :html, or :n3")
@@ -0,0 +1,26 @@
1
+ require File.join(File.dirname(__FILE__), '..', 'uriref')
2
+
3
+ module RdfContext
4
+ # Abstract serializer
5
+ class AbstractSerializer
6
+ attr_accessor :graph, :base
7
+
8
+ def initialize(graph)
9
+ @graph = graph
10
+ @base = nil
11
+ end
12
+
13
+ # Serialize the graph
14
+ #
15
+ # @param [IO, StreamIO] stream:: Stream in which to place serialized graph
16
+ # @param [Hash] options:: Options for parser
17
+ # <em>options[:base]</em>:: Base URI of graph, used to shorting URI references
18
+ def serialize(stream, options = {})
19
+ end
20
+
21
+ def relativize(uri)
22
+ uri = uri.to_s
23
+ self.base ? uri.sub(/^#{self.base}/, "") : uri
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,12 @@
1
+ require File.join(File.dirname(__FILE__), 'abstract_serializer')
2
+
3
+ module RdfContext
4
+ # Serialize RDF graphs in NTriples format
5
+ class NTSerializer < AbstractSerializer
6
+ def serialize(stream, base = nil)
7
+ @graph.triples.collect do |t|
8
+ stream.write(t.to_ntriples + "\n")
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,140 @@
1
+ require File.join(File.dirname(__FILE__), 'abstract_serializer')
2
+ require File.join(File.dirname(__FILE__), '..', 'bnode')
3
+ require File.join(File.dirname(__FILE__), '..', 'literal')
4
+
5
+ module RdfContext
6
+ # Recursive serializer
7
+ class RecursiveSerializer < AbstractSerializer
8
+ MAX_DEPTH = 10
9
+ INDENT_STRING = " "
10
+
11
+ def initialize(graph)
12
+ super(graph)
13
+ @stream = nil
14
+ self.reset
15
+ end
16
+
17
+ def top_classes; [RDFS_NS.Class]; end
18
+ def predicate_order; [RDF_TYPE, RDFS_NS.label, DC_NS.title]; end
19
+
20
+ def is_done?(subject)
21
+ @serialized.include?(subject)
22
+ end
23
+
24
+ # Mark a subject as done.
25
+ def subject_done(subject)
26
+ @serialized[subject] = true
27
+ end
28
+
29
+ def order_subjects
30
+ seen = {}
31
+ subjects = []
32
+
33
+ top_classes.each do |class_uri|
34
+ graph.triples(Triple.new(nil, RDF_TYPE, class_uri)).map {|t| t.subject}.sort.uniq.each do |subject|
35
+ #puts "order_subjects: #{subject.inspect}"
36
+ subjects << subject
37
+ seen[subject] = @top_levels[subject] = true
38
+ end
39
+ end
40
+
41
+ # Sort subjects by resources over bnodes, ref_counts and the subject URI itself
42
+ recursable = @subjects.keys.
43
+ select {|s| !seen.include?(s)}.
44
+ map {|r| [r.is_a?(BNode) ? 1 : 0, ref_count(r), r]}.
45
+ sort
46
+
47
+ subjects += recursable.map{|r| r.last}
48
+ end
49
+
50
+ def preprocess
51
+ @graph.triples.each {|t| preprocess_triple(t)}
52
+ end
53
+
54
+ def preprocess_triple(triple)
55
+ #puts "preprocess: #{triple.inspect}"
56
+ references = ref_count(triple.object) + 1
57
+ @references[triple.object] = references
58
+ @subjects[triple.subject] = true
59
+ end
60
+
61
+ # Return the number of times this node has been referenced in the object position
62
+ def ref_count(node)
63
+ @references.fetch(node, 0)
64
+ end
65
+
66
+ # Return a QName for the URI, or nil. Adds namespace of QName to defined namespaces
67
+ def get_qname(uri)
68
+ if uri.is_a?(URIRef)
69
+ begin
70
+ qn = @graph.qname(uri)
71
+ rescue RdfException
72
+ return false # no namespace
73
+ end
74
+ # Local parts with . will mess up serialization
75
+ return false if qn.index('.')
76
+
77
+ add_namespace(uri.namespace)
78
+ return qn
79
+ end
80
+ end
81
+
82
+ def add_namespace(ns)
83
+ @namespaces[ns.prefix.to_s] = ns
84
+ end
85
+
86
+ # URI -> Namespace bindings (similar to graph) for looking up qnames
87
+ def uri_binding
88
+ @uri_binding ||= @namespaces.values.inject({}) {|hash, ns| hash[ns.uri.to_s] = ns; hash}
89
+ end
90
+
91
+ def reset
92
+ @depth = 0
93
+ @lists = {}
94
+ @namespaces = {}
95
+ @references = {}
96
+ @serialized = {}
97
+ @subjects = {}
98
+ @top_levels = {}
99
+ end
100
+
101
+ # Take a hash from predicate uris to lists of values.
102
+ # Sort the lists of values. Return a sorted list of properties.
103
+ def sort_properties(properties)
104
+ properties.keys.each do |k|
105
+ properties[k] = properties[k].sort do |a, b|
106
+ a_li = a.is_a?(URIRef) && a.short_name =~ /^_\d+$/ ? a.to_i : a.to_s
107
+ b_li = b.is_a?(URIRef) && b.short_name =~ /^_\d+$/ ? b.to_i : b.to_s
108
+
109
+ a_li <=> b_li
110
+ end
111
+ end
112
+
113
+ # Make sorted list of properties
114
+ prop_list = []
115
+
116
+ predicate_order.each do |prop|
117
+ next unless properties[prop]
118
+ prop_list << prop.to_s
119
+ end
120
+
121
+ properties.keys.sort.each do |prop|
122
+ next if prop_list.include?(prop.to_s)
123
+ prop_list << prop.to_s
124
+ end
125
+
126
+ puts "sort_properties: #{prop_list.to_sentence}" if $DEBUG
127
+ prop_list
128
+ end
129
+
130
+ # Returns indent string multiplied by the depth
131
+ def indent(modifier = 0)
132
+ INDENT_STRING * (@depth + modifier)
133
+ end
134
+
135
+ # Write text
136
+ def write(text)
137
+ @stream.write(text)
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,219 @@
1
+ require File.join(File.dirname(__FILE__), 'recursive_serializer')
2
+
3
+ module RdfContext
4
+ # Abstract serializer
5
+ class TurtleSerializer < RecursiveSerializer
6
+ SUBJECT = 0
7
+ VERB = 1
8
+ OBJECT = 2
9
+
10
+ def reset
11
+ super
12
+ @shortNames = {}
13
+ @started = false
14
+ end
15
+
16
+ def get_qname(uri)
17
+ if uri.is_a?(URIRef)
18
+ md = uri.to_s.match(/^#{@base}(.*)$/) if @base
19
+ return "<#{md[1]}>" if md
20
+
21
+ super(uri)
22
+ end
23
+ end
24
+
25
+ def preprocess_triple(triple)
26
+ super
27
+
28
+ # Pre-fetch qnames, to fill namespaces
29
+ get_qname(triple.subject)
30
+ get_qname(triple.predicate)
31
+ get_qname(triple.object)
32
+
33
+ @references[triple.predicate] = ref_count(triple.predicate) + 1
34
+ end
35
+
36
+ def label(node)
37
+ get_qname(node) || node.to_n3
38
+ end
39
+
40
+ def start_document
41
+ @started = true
42
+
43
+ write("#{indent}@base <#{@base}> .\n") if @base
44
+
45
+ ns_list = @namespaces.values.sort_by {|ns| ns.prefix}
46
+ unless ns_list.empty?
47
+ ns_str = ns_list.map do |ns|
48
+ "#{indent}@prefix #{ns.prefix}: <#{ns.uri}> ."
49
+ end.join("\n") + "\n"
50
+ write(ns_str)
51
+ end
52
+ end
53
+
54
+ def end_document; end
55
+
56
+ # Checks if l is a valid RDF list, i.e. no nodes have other properties.
57
+ def is_valid_list(l)
58
+ props = @graph.properties(l)
59
+ #puts "is_valid_list: #{props.inspect}" if $DEBUG
60
+ return false unless props.has_key?(RDF_NS.first.to_s) || l == RDF_NS.nil
61
+ while l && l != RDF_NS.nil do
62
+ #puts "is_valid_list(length): #{props.length}" if $DEBUG
63
+ return false unless props.has_key?(RDF_NS.first.to_s) && props.has_key?(RDF_NS.rest.to_s)
64
+ n = props[RDF_NS.rest.to_s]
65
+ #puts "is_valid_list(n): #{n.inspect}" if $DEBUG
66
+ return false unless n.is_a?(Array) && n.length == 1
67
+ l = n.first
68
+ props = @graph.properties(l)
69
+ end
70
+ #puts "is_valid_list: valid" if $DEBUG
71
+ true
72
+ end
73
+
74
+ def do_list(l)
75
+ puts "do_list: #{l.inspect}" if $DEBUG
76
+ position = SUBJECT
77
+ while l do
78
+ p = @graph.properties(l)
79
+ item = p.fetch(RDF_NS.first.to_s, []).first
80
+ if item
81
+ path(item, position)
82
+ subject_done(l)
83
+ position = OBJECT
84
+ end
85
+ l = p.fetch(RDF_NS.rest.to_s, []).first
86
+ end
87
+ end
88
+
89
+ def p_list(node, position)
90
+ return false if !is_valid_list(node)
91
+ #puts "p_list: #{node.inspect}, #{position}" if $DEBUG
92
+
93
+ write(position == SUBJECT ? "(" : " (")
94
+ @depth += 2
95
+ do_list(node)
96
+ @depth -= 2
97
+ write(')')
98
+ end
99
+
100
+ def p_squared?(node, position)
101
+ node.is_a?(BNode) &&
102
+ !@serialized.has_key?(node) &&
103
+ ref_count(node) <= 1
104
+ end
105
+
106
+ def p_squared(node, position)
107
+ return false unless p_squared?(node, position)
108
+
109
+ #puts "p_squared: #{node.inspect}, #{position}" if $DEBUG
110
+ subject_done(node)
111
+ write(position == SUBJECT ? '[' : ' [')
112
+ @depth += 2
113
+ predicate_list(node)
114
+ @depth -= 2
115
+ write(']')
116
+
117
+ true
118
+ end
119
+
120
+ def p_default(node, position)
121
+ #puts "p_default: #{node.inspect}, #{position}" if $DEBUG
122
+ l = (position == SUBJECT ? "" : " ") + label(node)
123
+ write(l)
124
+ end
125
+
126
+ def path(node, position)
127
+ puts "path: #{node.inspect}, pos: #{position}, []: #{is_valid_list(node)}, p2?: #{p_squared?(node, position)}, rc: #{ref_count(node)}" if $DEBUG
128
+ raise RdfException, "Cannot serialize node '#{node}'" unless p_list(node, position) || p_squared(node, position) || p_default(node, position)
129
+ end
130
+
131
+ def verb(node)
132
+ puts "verb: #{node.inspect}" if $DEBUG
133
+ if node == RDF_TYPE
134
+ write(" a")
135
+ else
136
+ path(node, VERB)
137
+ end
138
+ end
139
+
140
+ def object_list(objects)
141
+ puts "object_list: #{objects.inspect}" if $DEBUG
142
+ return if objects.empty?
143
+
144
+ objects.each_with_index do |obj, i|
145
+ write(",\n#{indent(2)}") if i > 0
146
+ path(obj, OBJECT)
147
+ end
148
+ end
149
+
150
+ def predicate_list(subject)
151
+ properties = @graph.properties(subject)
152
+ prop_list = sort_properties(properties) - [RDF_NS.first.to_s, RDF_NS.rest.to_s]
153
+ puts "predicate_list: #{prop_list.inspect}" if $DEBUG
154
+ return if prop_list.empty?
155
+
156
+ prop_list.each_with_index do |prop, i|
157
+ write(";\n#{indent(2)}") if i > 0
158
+ verb(URIRef.new(prop))
159
+ object_list(properties[prop])
160
+ end
161
+ end
162
+
163
+ def s_squared?(subject)
164
+ ref_count(subject) == 0 && subject.is_a?(BNode) && !is_valid_list(subject)
165
+ end
166
+
167
+ def s_squared(subject)
168
+ return false unless s_squared?(subject)
169
+
170
+ write("\n#{indent} [")
171
+ @depth += 1
172
+ predicate_list(subject)
173
+ @depth -= 1
174
+ write("] .")
175
+ true
176
+ end
177
+
178
+ def s_default(subject)
179
+ write("\n#{indent}")
180
+ path(subject, SUBJECT)
181
+ predicate_list(subject)
182
+ write(" .")
183
+ true
184
+ end
185
+
186
+ def statement(subject)
187
+ puts "statement: #{subject.inspect}, s2?: #{s_squared(subject)}" if $DEBUG
188
+ subject_done(subject)
189
+ s_squared(subject) || s_default(subject)
190
+ end
191
+
192
+ # Serialize the graph
193
+ #
194
+ # @param [IO, StreamIO] stream:: Stream in which to place serialized graph
195
+ # @param [Hash] options:: Options for parser
196
+ # <em>options[:base]</em>:: Base URI of graph, used to shorting URI references
197
+ def serialize(stream, options = {})
198
+ puts "\nserialize: #{@graph.inspect}" if $DEBUG
199
+ reset
200
+ @stream = stream
201
+ @base = options[:base]
202
+
203
+ @graph.bind(RDF_NS)
204
+ @graph.bind(RDFS_NS)
205
+
206
+ preprocess
207
+ start_document
208
+
209
+ order_subjects.each do |subject|
210
+ #puts "subj: #{subject.inspect}"
211
+ unless is_done?(subject)
212
+ statement(subject)
213
+ end
214
+ end
215
+
216
+ end_document
217
+ end
218
+ end
219
+ end