rdf-turtle 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,562 @@
1
+ require 'rdf/turtle/terminals'
2
+
3
+ module RDF::Turtle
4
+ ##
5
+ # A Turtle serialiser
6
+ #
7
+ # Note that the natural interface is to write a whole graph at a time.
8
+ # Writing statements or Triples will create a graph to add them to
9
+ # and then serialize the graph.
10
+ #
11
+ # @example Obtaining a Turtle writer class
12
+ # RDF::Writer.for(:n3) #=> RDF::Turtle::Writer
13
+ # RDF::Writer.for("etc/test.n3")
14
+ # RDF::Writer.for("etc/test.ttl")
15
+ # RDF::Writer.for(:file_name => "etc/test.n3")
16
+ # RDF::Writer.for(:file_name => "etc/test.ttl")
17
+ # RDF::Writer.for(:file_extension => "n3")
18
+ # RDF::Writer.for(:file_extension => "ttl")
19
+ # RDF::Writer.for(:content_type => "text/n3")
20
+ # RDF::Writer.for(:content_type => "text/turtle")
21
+ #
22
+ # @example Serializing RDF graph into an Turtle file
23
+ # RDF::Turtle::Writer.open("etc/test.n3") do |writer|
24
+ # writer << graph
25
+ # end
26
+ #
27
+ # @example Serializing RDF statements into an Turtle file
28
+ # RDF::Turtle::Writer.open("etc/test.n3") do |writer|
29
+ # graph.each_statement do |statement|
30
+ # writer << statement
31
+ # end
32
+ # end
33
+ #
34
+ # @example Serializing RDF statements into an Turtle string
35
+ # RDF::Turtle::Writer.buffer do |writer|
36
+ # graph.each_statement do |statement|
37
+ # writer << statement
38
+ # end
39
+ # end
40
+ #
41
+ # The writer will add prefix definitions, and use them for creating @prefix definitions, and minting QNames
42
+ #
43
+ # @example Creating @base and @prefix definitions in output
44
+ # RDF::Turtle::Writer.buffer(:base_uri => "http://example.com/", :prefixes => {
45
+ # nil => "http://example.com/ns#",
46
+ # :foaf => "http://xmlns.com/foaf/0.1/"}
47
+ # ) do |writer|
48
+ # graph.each_statement do |statement|
49
+ # writer << statement
50
+ # end
51
+ # end
52
+ #
53
+ # @author [Gregg Kellogg](http://kellogg-assoc.com/)
54
+ class Writer < RDF::Writer
55
+ format RDF::Turtle::Format
56
+
57
+ # @return [Graph] Graph of statements serialized
58
+ attr_accessor :graph
59
+ # @return [URI] Base URI used for relativizing URIs
60
+ attr_accessor :base_uri
61
+
62
+ ##
63
+ # Initializes the Turtle writer instance.
64
+ #
65
+ # @param [IO, File] output
66
+ # the output stream
67
+ # @param [Hash{Symbol => Object}] options
68
+ # any additional options
69
+ # @option options [Encoding] :encoding (Encoding::UTF_8)
70
+ # the encoding to use on the output stream (Ruby 1.9+)
71
+ # @option options [Boolean] :canonicalize (false)
72
+ # whether to canonicalize literals when serializing
73
+ # @option options [Hash] :prefixes (Hash.new)
74
+ # the prefix mappings to use (not supported by all writers)
75
+ # @option options [#to_s] :base_uri (nil)
76
+ # the base URI to use when constructing relative URIs
77
+ # @option options [Integer] :max_depth (3)
78
+ # Maximum depth for recursively defining resources, defaults to 3
79
+ # @option options [Boolean] :standard_prefixes (false)
80
+ # Add standard prefixes to @prefixes, if necessary.
81
+ # @option options [String] :default_namespace (nil)
82
+ # URI to use as default namespace, same as prefixes[nil]
83
+ # @yield [writer] `self`
84
+ # @yieldparam [RDF::Writer] writer
85
+ # @yieldreturn [void]
86
+ # @yield [writer]
87
+ # @yieldparam [RDF::Writer] writer
88
+ def initialize(output = $stdout, options = {}, &block)
89
+ super do
90
+ @graph = RDF::Graph.new
91
+ @uri_to_pname = {}
92
+ @uri_to_prefix = {}
93
+ if block_given?
94
+ case block.arity
95
+ when 0 then instance_eval(&block)
96
+ else block.call(self)
97
+ end
98
+ end
99
+ end
100
+ end
101
+
102
+ ##
103
+ # Write whole graph
104
+ #
105
+ # @param [Graph] graph
106
+ # @return [void]
107
+ def write_graph(graph)
108
+ @graph = graph
109
+ end
110
+
111
+ ##
112
+ # Addes a statement to be serialized
113
+ # @param [RDF::Statement] statement
114
+ # @return [void]
115
+ def write_statement(statement)
116
+ @graph.insert(statement)
117
+ end
118
+
119
+ ##
120
+ # Addes a triple to be serialized
121
+ # @param [RDF::Resource] subject
122
+ # @param [RDF::URI] predicate
123
+ # @param [RDF::Value] object
124
+ # @return [void]
125
+ # @raise [NotImplementedError] unless implemented in subclass
126
+ # @abstract
127
+ def write_triple(subject, predicate, object)
128
+ @graph.insert(Statement.new(subject, predicate, object))
129
+ end
130
+
131
+ ##
132
+ # Outputs the Turtle representation of all stored triples.
133
+ #
134
+ # @return [void]
135
+ # @see #write_triple
136
+ def write_epilogue
137
+ @max_depth = @options[:max_depth] || 3
138
+ @base_uri = RDF::URI(@options[:base_uri])
139
+ @debug = @options[:debug]
140
+
141
+ self.reset
142
+
143
+ add_debug "\nserialize: graph: #{@graph.size}"
144
+
145
+ preprocess
146
+ start_document
147
+
148
+ order_subjects.each do |subject|
149
+ unless is_done?(subject)
150
+ statement(subject)
151
+ end
152
+ end
153
+ end
154
+
155
+ # Return a QName for the URI, or nil. Adds namespace of QName to defined prefixes
156
+ # @param [RDF::Resource] resource
157
+ # @return [String, nil] value to use to identify URI
158
+ def get_pname(resource)
159
+ case resource
160
+ when RDF::Node
161
+ return resource.to_s
162
+ when RDF::URI
163
+ uri = resource.to_s
164
+ else
165
+ return nil
166
+ end
167
+
168
+ pname = case
169
+ when @uri_to_pname.has_key?(uri)
170
+ return @uri_to_pname[uri]
171
+ when u = @uri_to_prefix.keys.detect {|u| uri.index(u.to_s) == 0}
172
+ # Use a defined prefix
173
+ prefix = @uri_to_prefix[u]
174
+ prefix(prefix, u) unless u.to_s.empty? # Define for output
175
+ add_debug "get_pname: add prefix #{prefix.inspect} => #{u}"
176
+ uri.sub(u.to_s, "#{prefix}:")
177
+ when @options[:standard_prefixes] && vocab = RDF::Vocabulary.each.to_a.detect {|v| uri.index(v.to_uri.to_s) == 0}
178
+ prefix = vocab.__name__.to_s.split('::').last.downcase
179
+ @uri_to_prefix[vocab.to_uri.to_s] = prefix
180
+ prefix(prefix, vocab.to_uri) # Define for output
181
+ add_debug "get_pname: add standard prefix #{prefix.inspect} => #{vocab.to_uri}"
182
+ uri.sub(vocab.to_uri.to_s, "#{prefix}:")
183
+ else
184
+ nil
185
+ end
186
+
187
+ # Make sure pname is a valid pname
188
+ if pname
189
+ md = Terminals::PNAME_LN.match(pname) || Terminals::PNAME_NS.match(pname)
190
+ pname = nil unless md.to_s.length == pname.length
191
+ end
192
+
193
+ @uri_to_pname[uri] = pname
194
+ rescue Addressable::URI::InvalidURIError => e
195
+ raise RDF::WriterError, "Invalid URI #{resource.inspect}: #{e.message}"
196
+ end
197
+
198
+ # Take a hash from predicate uris to lists of values.
199
+ # Sort the lists of values. Return a sorted list of properties.
200
+ # @param [Hash{String => Array<Resource>}] properties A hash of Property to Resource mappings
201
+ # @return [Array<String>}] Ordered list of properties. Uses predicate_order.
202
+ def sort_properties(properties)
203
+ properties.keys.each do |k|
204
+ properties[k] = properties[k].sort do |a, b|
205
+ a_li = a.to_s.index(RDF._.to_s) == 0 ? a.to_s.match(/\d+$/).to_s.to_i : a.to_s
206
+ b_li = b.to_s.index(RDF._.to_s) == 0 ? b.to_s.match(/\d+$/).to_s.to_i : b.to_s
207
+
208
+ a_li <=> b_li
209
+ end
210
+ end
211
+
212
+ # Make sorted list of properties
213
+ prop_list = []
214
+
215
+ predicate_order.each do |prop|
216
+ next unless properties[prop]
217
+ prop_list << prop.to_s
218
+ end
219
+
220
+ properties.keys.sort.each do |prop|
221
+ next if prop_list.include?(prop.to_s)
222
+ prop_list << prop.to_s
223
+ end
224
+
225
+ add_debug "sort_properties: #{prop_list.join(', ')}"
226
+ prop_list
227
+ end
228
+
229
+ ##
230
+ # Returns the N-Triples representation of a literal.
231
+ #
232
+ # @param [RDF::Literal, String, #to_s] literal
233
+ # @param [Hash{Symbol => Object}] options
234
+ # @return [String]
235
+ def format_literal(literal, options = {})
236
+ literal = literal.dup.canonicalize! if @options[:canonicalize]
237
+ case literal
238
+ when RDF::Literal
239
+ case literal.datatype
240
+ when RDF::XSD.boolean, RDF::XSD.integer, RDF::XSD.decimal
241
+ literal.to_s
242
+ when RDF::XSD.double
243
+ literal.to_s.sub('E', 'e') # Favor lower case exponent
244
+ else
245
+ text = quoted(literal.value)
246
+ text << "@#{literal.language}" if literal.has_language?
247
+ text << "^^#{format_uri(literal.datatype)}" if literal.has_datatype?
248
+ text
249
+ end
250
+ else
251
+ quoted(literal.to_s)
252
+ end
253
+ end
254
+
255
+ ##
256
+ # Returns the Turtle representation of a URI reference.
257
+ #
258
+ # @param [RDF::URI] literal
259
+ # @param [Hash{Symbol => Object}] options
260
+ # @return [String]
261
+ def format_uri(uri, options = {})
262
+ md = relativize(uri)
263
+ add_debug("relativize(#{uri.inspect}) => #{md.inspect}") if md != uri.to_s
264
+ md != uri.to_s ? "<#{md}>" : (get_pname(uri) || "<#{uri}>")
265
+ end
266
+
267
+ ##
268
+ # Returns the Turtle representation of a blank node.
269
+ #
270
+ # @param [RDF::Node] node
271
+ # @param [Hash{Symbol => Object}] options
272
+ # @return [String]
273
+ def format_node(node, options = {})
274
+ "_:%s" % node.id
275
+ end
276
+
277
+ protected
278
+ # Output @base and @prefix definitions
279
+ def start_document
280
+ @started = true
281
+
282
+ @output.write("#{indent}@base <#{@base_uri}> .\n") unless @base_uri.to_s.empty?
283
+
284
+ add_debug("start_document: #{prefixes.inspect}")
285
+ prefixes.keys.sort_by(&:to_s).each do |prefix|
286
+ @output.write("#{indent}@prefix #{prefix}: <#{prefixes[prefix]}> .\n")
287
+ end
288
+ end
289
+
290
+ # If @base_uri is defined, use it to try to make uri relative
291
+ # @param [#to_s] uri
292
+ # @return [String]
293
+ def relativize(uri)
294
+ uri = uri.to_s
295
+ @base_uri ? uri.sub(@base_uri.to_s, "") : uri
296
+ end
297
+
298
+ # Defines rdf:type of subjects to be emitted at the beginning of the graph. Defaults to rdfs:Class
299
+ # @return [Array<URI>]
300
+ def top_classes; [RDF::RDFS.Class]; end
301
+
302
+ # Defines order of predicates to to emit at begninning of a resource description. Defaults to
303
+ # [rdf:type, rdfs:label, dc:title]
304
+ # @return [Array<URI>]
305
+ def predicate_order; [RDF.type, RDF::RDFS.label, RDF::DC.title]; end
306
+
307
+ # Order subjects for output. Override this to output subjects in another order.
308
+ #
309
+ # Uses #top_classes and #base_uri.
310
+ # @return [Array<Resource>] Ordered list of subjects
311
+ def order_subjects
312
+ seen = {}
313
+ subjects = []
314
+
315
+ # Start with base_uri
316
+ if base_uri && @subjects.keys.include?(base_uri)
317
+ subjects << base_uri
318
+ seen[base_uri] = true
319
+ end
320
+
321
+ # Add distinguished classes
322
+ top_classes.each do |class_uri|
323
+ graph.query(:predicate => RDF.type, :object => class_uri).map {|st| st.subject}.sort.uniq.each do |subject|
324
+ add_debug "order_subjects: #{subject.inspect}"
325
+ subjects << subject
326
+ seen[subject] = true
327
+ end
328
+ end
329
+
330
+ # Sort subjects by resources over bnodes, ref_counts and the subject URI itself
331
+ recursable = @subjects.keys.
332
+ select {|s| !seen.include?(s)}.
333
+ map {|r| [r.is_a?(RDF::Node) ? 1 : 0, ref_count(r), r]}.
334
+ sort
335
+
336
+ subjects += recursable.map{|r| r.last}
337
+ end
338
+
339
+ # Perform any preprocessing of statements required
340
+ def preprocess
341
+ # Load defined prefixes
342
+ (@options[:prefixes] || {}).each_pair do |k, v|
343
+ @uri_to_prefix[v.to_s] = k
344
+ end
345
+ @options[:prefixes] = {} # Will define actual used when matched
346
+
347
+ prefix(nil, @options[:default_namespace]) if @options[:default_namespace]
348
+
349
+ @graph.each {|statement| preprocess_statement(statement)}
350
+ end
351
+
352
+ # Perform any statement preprocessing required. This is used to perform reference counts and determine required
353
+ # prefixes.
354
+ # @param [Statement] statement
355
+ def preprocess_statement(statement)
356
+ #add_debug "preprocess: #{statement.inspect}"
357
+ references = ref_count(statement.object) + 1
358
+ @references[statement.object] = references
359
+ @subjects[statement.subject] = true
360
+
361
+ # Pre-fetch pnames, to fill prefixes
362
+ get_pname(statement.subject)
363
+ get_pname(statement.predicate)
364
+ get_pname(statement.object)
365
+ get_pname(statement.object.datatype) if statement.object.literal? && statement.object.datatype
366
+
367
+ @references[statement.predicate] = ref_count(statement.predicate) + 1
368
+ end
369
+
370
+ # Return the number of times this node has been referenced in the object position
371
+ # @return [Integer]
372
+ def ref_count(node)
373
+ @references.fetch(node, 0)
374
+ end
375
+
376
+ # Returns indent string multiplied by the depth
377
+ # @param [Integer] modifier Increase depth by specified amount
378
+ # @return [String] A number of spaces, depending on current depth
379
+ def indent(modifier = 0)
380
+ " " * (@depth + modifier)
381
+ end
382
+
383
+ # Reset internal helper instance variables
384
+ def reset
385
+ @depth = 0
386
+ @lists = {}
387
+ @namespaces = {}
388
+ @references = {}
389
+ @serialized = {}
390
+ @subjects = {}
391
+ @shortNames = {}
392
+ @started = false
393
+ end
394
+
395
+ ##
396
+ # Use single- or multi-line quotes. If literal contains \t, \n, or \r, use a multiline quote,
397
+ # otherwise, use a single-line
398
+ # @param [String] string
399
+ # @return [String]
400
+ def quoted(string)
401
+ if string.to_s.match(/[\t\n\r]/)
402
+ string = string.gsub('\\', '\\\\').gsub('"""', '\\"""')
403
+ %("""#{string}""")
404
+ else
405
+ "\"#{escaped(string)}\""
406
+ end
407
+ end
408
+
409
+ private
410
+
411
+ # Add debug event to debug array, if specified
412
+ #
413
+ # @param [String] message::
414
+ def add_debug(message)
415
+ STDERR.puts message if ::RDF::Turtle::debug?
416
+ @debug << message if @debug.is_a?(Array)
417
+ end
418
+
419
+ # Checks if l is a valid RDF list, i.e. no nodes have other properties.
420
+ def is_valid_list(l)
421
+ #add_debug "is_valid_list: #{l.inspect}"
422
+ return RDF::List.new(l, @graph).valid?
423
+ end
424
+
425
+ def do_list(l)
426
+ list = RDF::List.new(l, @graph)
427
+ add_debug "do_list: #{list.inspect}"
428
+ position = :subject
429
+ list.each_statement do |st|
430
+ next unless st.predicate == RDF.first
431
+ add_debug " list this: #{st.subject} first: #{st.object}[#{position}]"
432
+ path(st.object, position)
433
+ subject_done(st.subject)
434
+ position = :object
435
+ end
436
+ end
437
+
438
+ def p_list(node, position)
439
+ return false if !is_valid_list(node)
440
+ #add_debug "p_list: #{node.inspect}, #{position}"
441
+
442
+ @output.write(position == :subject ? "(" : " (")
443
+ @depth += 2
444
+ do_list(node)
445
+ @depth -= 2
446
+ @output.write(')')
447
+ end
448
+
449
+ def p_squared?(node, position)
450
+ node.is_a?(RDF::Node) &&
451
+ !@serialized.has_key?(node) &&
452
+ ref_count(node) <= 1
453
+ end
454
+
455
+ def p_squared(node, position)
456
+ return false unless p_squared?(node, position)
457
+
458
+ #add_debug "p_squared: #{node.inspect}, #{position}"
459
+ subject_done(node)
460
+ @output.write(position == :subject ? '[' : ' [')
461
+ @depth += 2
462
+ predicate_list(node)
463
+ @depth -= 2
464
+ @output.write(']')
465
+
466
+ true
467
+ end
468
+
469
+ def p_default(node, position)
470
+ #add_debug "p_default: #{node.inspect}, #{position}"
471
+ l = (position == :subject ? "" : " ") + format_value(node)
472
+ @output.write(l)
473
+ end
474
+
475
+ def path(node, position)
476
+ add_debug "path: #{node.inspect}, pos: #{position}, []: #{is_valid_list(node)}, p2?: #{p_squared?(node, position)}, rc: #{ref_count(node)}"
477
+ raise RDF::WriterError, "Cannot serialize node '#{node}'" unless p_list(node, position) || p_squared(node, position) || p_default(node, position)
478
+ end
479
+
480
+ def verb(node)
481
+ add_debug "verb: #{node.inspect}"
482
+ if node == RDF.type
483
+ @output.write(" a")
484
+ else
485
+ path(node, :predicate)
486
+ end
487
+ end
488
+
489
+ def object_list(objects)
490
+ add_debug "object_list: #{objects.inspect}"
491
+ return if objects.empty?
492
+
493
+ objects.each_with_index do |obj, i|
494
+ @output.write(",\n#{indent(4)}") if i > 0
495
+ path(obj, :object)
496
+ end
497
+ end
498
+
499
+ def predicate_list(subject)
500
+ properties = {}
501
+ @graph.query(:subject => subject) do |st|
502
+ properties[st.predicate.to_s] ||= []
503
+ properties[st.predicate.to_s] << st.object
504
+ end
505
+
506
+ prop_list = sort_properties(properties) - [RDF.first.to_s, RDF.rest.to_s]
507
+ add_debug "predicate_list: #{prop_list.inspect}"
508
+ return if prop_list.empty?
509
+
510
+ prop_list.each_with_index do |prop, i|
511
+ begin
512
+ @output.write(";\n#{indent(2)}") if i > 0
513
+ prop[0, 2] == "_:"
514
+ verb(prop[0, 2] == "_:" ? RDF::Node.new(prop.split(':').last) : RDF::URI.intern(prop))
515
+ object_list(properties[prop])
516
+ rescue Addressable::URI::InvalidURIError => e
517
+ add_debug "Predicate #{prop.inspect} is an invalid URI: #{e.message}"
518
+ end
519
+ end
520
+ end
521
+
522
+ def s_squared?(subject)
523
+ ref_count(subject) == 0 && subject.is_a?(RDF::Node) && !is_valid_list(subject)
524
+ end
525
+
526
+ def s_squared(subject)
527
+ return false unless s_squared?(subject)
528
+
529
+ add_debug "s_squared: #{subject.inspect}"
530
+ @output.write("\n#{indent} [")
531
+ @depth += 1
532
+ predicate_list(subject)
533
+ @depth -= 1
534
+ @output.write("] .")
535
+ true
536
+ end
537
+
538
+ def s_default(subject)
539
+ @output.write("\n#{indent}")
540
+ path(subject, :subject)
541
+ predicate_list(subject)
542
+ @output.write(" .")
543
+ true
544
+ end
545
+
546
+ def statement(subject)
547
+ add_debug "statement: #{subject.inspect}, s2?: #{s_squared?(subject)}"
548
+ subject_done(subject)
549
+ s_squared(subject) || s_default(subject)
550
+ @output.puts
551
+ end
552
+
553
+ def is_done?(subject)
554
+ @serialized.include?(subject)
555
+ end
556
+
557
+ # Mark a subject as done.
558
+ def subject_done(subject)
559
+ @serialized[subject] = true
560
+ end
561
+ end
562
+ end