rdf-turtle 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,562 @@
1
+ require 'rdf/turtle/terminals'
2
+
3
+ module RDF::Turtle
4
+ ##
5
+ # A Turtle serialiser
6
+ #
7
+ # Note that the natural interface is to write a whole graph at a time.
8
+ # Writing statements or Triples will create a graph to add them to
9
+ # and then serialize the graph.
10
+ #
11
+ # @example Obtaining a Turtle writer class
12
+ # RDF::Writer.for(:n3) #=> RDF::Turtle::Writer
13
+ # RDF::Writer.for("etc/test.n3")
14
+ # RDF::Writer.for("etc/test.ttl")
15
+ # RDF::Writer.for(:file_name => "etc/test.n3")
16
+ # RDF::Writer.for(:file_name => "etc/test.ttl")
17
+ # RDF::Writer.for(:file_extension => "n3")
18
+ # RDF::Writer.for(:file_extension => "ttl")
19
+ # RDF::Writer.for(:content_type => "text/n3")
20
+ # RDF::Writer.for(:content_type => "text/turtle")
21
+ #
22
+ # @example Serializing RDF graph into an Turtle file
23
+ # RDF::Turtle::Writer.open("etc/test.n3") do |writer|
24
+ # writer << graph
25
+ # end
26
+ #
27
+ # @example Serializing RDF statements into an Turtle file
28
+ # RDF::Turtle::Writer.open("etc/test.n3") do |writer|
29
+ # graph.each_statement do |statement|
30
+ # writer << statement
31
+ # end
32
+ # end
33
+ #
34
+ # @example Serializing RDF statements into an Turtle string
35
+ # RDF::Turtle::Writer.buffer do |writer|
36
+ # graph.each_statement do |statement|
37
+ # writer << statement
38
+ # end
39
+ # end
40
+ #
41
+ # The writer will add prefix definitions, and use them for creating @prefix definitions, and minting QNames
42
+ #
43
+ # @example Creating @base and @prefix definitions in output
44
+ # RDF::Turtle::Writer.buffer(:base_uri => "http://example.com/", :prefixes => {
45
+ # nil => "http://example.com/ns#",
46
+ # :foaf => "http://xmlns.com/foaf/0.1/"}
47
+ # ) do |writer|
48
+ # graph.each_statement do |statement|
49
+ # writer << statement
50
+ # end
51
+ # end
52
+ #
53
+ # @author [Gregg Kellogg](http://kellogg-assoc.com/)
54
+ class Writer < RDF::Writer
55
+ format RDF::Turtle::Format
56
+
57
+ # @return [Graph] Graph of statements serialized
58
+ attr_accessor :graph
59
+ # @return [URI] Base URI used for relativizing URIs
60
+ attr_accessor :base_uri
61
+
62
+ ##
63
+ # Initializes the Turtle writer instance.
64
+ #
65
+ # @param [IO, File] output
66
+ # the output stream
67
+ # @param [Hash{Symbol => Object}] options
68
+ # any additional options
69
+ # @option options [Encoding] :encoding (Encoding::UTF_8)
70
+ # the encoding to use on the output stream (Ruby 1.9+)
71
+ # @option options [Boolean] :canonicalize (false)
72
+ # whether to canonicalize literals when serializing
73
+ # @option options [Hash] :prefixes (Hash.new)
74
+ # the prefix mappings to use (not supported by all writers)
75
+ # @option options [#to_s] :base_uri (nil)
76
+ # the base URI to use when constructing relative URIs
77
+ # @option options [Integer] :max_depth (3)
78
+ # Maximum depth for recursively defining resources, defaults to 3
79
+ # @option options [Boolean] :standard_prefixes (false)
80
+ # Add standard prefixes to @prefixes, if necessary.
81
+ # @option options [String] :default_namespace (nil)
82
+ # URI to use as default namespace, same as prefixes[nil]
83
+ # @yield [writer] `self`
84
+ # @yieldparam [RDF::Writer] writer
85
+ # @yieldreturn [void]
86
+ # @yield [writer]
87
+ # @yieldparam [RDF::Writer] writer
88
+ def initialize(output = $stdout, options = {}, &block)
89
+ super do
90
+ @graph = RDF::Graph.new
91
+ @uri_to_pname = {}
92
+ @uri_to_prefix = {}
93
+ if block_given?
94
+ case block.arity
95
+ when 0 then instance_eval(&block)
96
+ else block.call(self)
97
+ end
98
+ end
99
+ end
100
+ end
101
+
102
+ ##
103
+ # Write whole graph
104
+ #
105
+ # @param [Graph] graph
106
+ # @return [void]
107
+ def write_graph(graph)
108
+ @graph = graph
109
+ end
110
+
111
+ ##
112
+ # Addes a statement to be serialized
113
+ # @param [RDF::Statement] statement
114
+ # @return [void]
115
+ def write_statement(statement)
116
+ @graph.insert(statement)
117
+ end
118
+
119
+ ##
120
+ # Addes a triple to be serialized
121
+ # @param [RDF::Resource] subject
122
+ # @param [RDF::URI] predicate
123
+ # @param [RDF::Value] object
124
+ # @return [void]
125
+ # @raise [NotImplementedError] unless implemented in subclass
126
+ # @abstract
127
+ def write_triple(subject, predicate, object)
128
+ @graph.insert(Statement.new(subject, predicate, object))
129
+ end
130
+
131
+ ##
132
+ # Outputs the Turtle representation of all stored triples.
133
+ #
134
+ # @return [void]
135
+ # @see #write_triple
136
+ def write_epilogue
137
+ @max_depth = @options[:max_depth] || 3
138
+ @base_uri = RDF::URI(@options[:base_uri])
139
+ @debug = @options[:debug]
140
+
141
+ self.reset
142
+
143
+ add_debug "\nserialize: graph: #{@graph.size}"
144
+
145
+ preprocess
146
+ start_document
147
+
148
+ order_subjects.each do |subject|
149
+ unless is_done?(subject)
150
+ statement(subject)
151
+ end
152
+ end
153
+ end
154
+
155
+ # Return a QName for the URI, or nil. Adds namespace of QName to defined prefixes
156
+ # @param [RDF::Resource] resource
157
+ # @return [String, nil] value to use to identify URI
158
+ def get_pname(resource)
159
+ case resource
160
+ when RDF::Node
161
+ return resource.to_s
162
+ when RDF::URI
163
+ uri = resource.to_s
164
+ else
165
+ return nil
166
+ end
167
+
168
+ pname = case
169
+ when @uri_to_pname.has_key?(uri)
170
+ return @uri_to_pname[uri]
171
+ when u = @uri_to_prefix.keys.detect {|u| uri.index(u.to_s) == 0}
172
+ # Use a defined prefix
173
+ prefix = @uri_to_prefix[u]
174
+ prefix(prefix, u) unless u.to_s.empty? # Define for output
175
+ add_debug "get_pname: add prefix #{prefix.inspect} => #{u}"
176
+ uri.sub(u.to_s, "#{prefix}:")
177
+ when @options[:standard_prefixes] && vocab = RDF::Vocabulary.each.to_a.detect {|v| uri.index(v.to_uri.to_s) == 0}
178
+ prefix = vocab.__name__.to_s.split('::').last.downcase
179
+ @uri_to_prefix[vocab.to_uri.to_s] = prefix
180
+ prefix(prefix, vocab.to_uri) # Define for output
181
+ add_debug "get_pname: add standard prefix #{prefix.inspect} => #{vocab.to_uri}"
182
+ uri.sub(vocab.to_uri.to_s, "#{prefix}:")
183
+ else
184
+ nil
185
+ end
186
+
187
+ # Make sure pname is a valid pname
188
+ if pname
189
+ md = Terminals::PNAME_LN.match(pname) || Terminals::PNAME_NS.match(pname)
190
+ pname = nil unless md.to_s.length == pname.length
191
+ end
192
+
193
+ @uri_to_pname[uri] = pname
194
+ rescue Addressable::URI::InvalidURIError => e
195
+ raise RDF::WriterError, "Invalid URI #{resource.inspect}: #{e.message}"
196
+ end
197
+
198
+ # Take a hash from predicate uris to lists of values.
199
+ # Sort the lists of values. Return a sorted list of properties.
200
+ # @param [Hash{String => Array<Resource>}] properties A hash of Property to Resource mappings
201
+ # @return [Array<String>}] Ordered list of properties. Uses predicate_order.
202
+ def sort_properties(properties)
203
+ properties.keys.each do |k|
204
+ properties[k] = properties[k].sort do |a, b|
205
+ a_li = a.to_s.index(RDF._.to_s) == 0 ? a.to_s.match(/\d+$/).to_s.to_i : a.to_s
206
+ b_li = b.to_s.index(RDF._.to_s) == 0 ? b.to_s.match(/\d+$/).to_s.to_i : b.to_s
207
+
208
+ a_li <=> b_li
209
+ end
210
+ end
211
+
212
+ # Make sorted list of properties
213
+ prop_list = []
214
+
215
+ predicate_order.each do |prop|
216
+ next unless properties[prop]
217
+ prop_list << prop.to_s
218
+ end
219
+
220
+ properties.keys.sort.each do |prop|
221
+ next if prop_list.include?(prop.to_s)
222
+ prop_list << prop.to_s
223
+ end
224
+
225
+ add_debug "sort_properties: #{prop_list.join(', ')}"
226
+ prop_list
227
+ end
228
+
229
+ ##
230
+ # Returns the N-Triples representation of a literal.
231
+ #
232
+ # @param [RDF::Literal, String, #to_s] literal
233
+ # @param [Hash{Symbol => Object}] options
234
+ # @return [String]
235
+ def format_literal(literal, options = {})
236
+ literal = literal.dup.canonicalize! if @options[:canonicalize]
237
+ case literal
238
+ when RDF::Literal
239
+ case literal.datatype
240
+ when RDF::XSD.boolean, RDF::XSD.integer, RDF::XSD.decimal
241
+ literal.to_s
242
+ when RDF::XSD.double
243
+ literal.to_s.sub('E', 'e') # Favor lower case exponent
244
+ else
245
+ text = quoted(literal.value)
246
+ text << "@#{literal.language}" if literal.has_language?
247
+ text << "^^#{format_uri(literal.datatype)}" if literal.has_datatype?
248
+ text
249
+ end
250
+ else
251
+ quoted(literal.to_s)
252
+ end
253
+ end
254
+
255
+ ##
256
+ # Returns the Turtle representation of a URI reference.
257
+ #
258
+ # @param [RDF::URI] literal
259
+ # @param [Hash{Symbol => Object}] options
260
+ # @return [String]
261
+ def format_uri(uri, options = {})
262
+ md = relativize(uri)
263
+ add_debug("relativize(#{uri.inspect}) => #{md.inspect}") if md != uri.to_s
264
+ md != uri.to_s ? "<#{md}>" : (get_pname(uri) || "<#{uri}>")
265
+ end
266
+
267
+ ##
268
+ # Returns the Turtle representation of a blank node.
269
+ #
270
+ # @param [RDF::Node] node
271
+ # @param [Hash{Symbol => Object}] options
272
+ # @return [String]
273
+ def format_node(node, options = {})
274
+ "_:%s" % node.id
275
+ end
276
+
277
+ protected
278
+ # Output @base and @prefix definitions
279
+ def start_document
280
+ @started = true
281
+
282
+ @output.write("#{indent}@base <#{@base_uri}> .\n") unless @base_uri.to_s.empty?
283
+
284
+ add_debug("start_document: #{prefixes.inspect}")
285
+ prefixes.keys.sort_by(&:to_s).each do |prefix|
286
+ @output.write("#{indent}@prefix #{prefix}: <#{prefixes[prefix]}> .\n")
287
+ end
288
+ end
289
+
290
+ # If @base_uri is defined, use it to try to make uri relative
291
+ # @param [#to_s] uri
292
+ # @return [String]
293
+ def relativize(uri)
294
+ uri = uri.to_s
295
+ @base_uri ? uri.sub(@base_uri.to_s, "") : uri
296
+ end
297
+
298
+ # Defines rdf:type of subjects to be emitted at the beginning of the graph. Defaults to rdfs:Class
299
+ # @return [Array<URI>]
300
+ def top_classes; [RDF::RDFS.Class]; end
301
+
302
+ # Defines order of predicates to to emit at begninning of a resource description. Defaults to
303
+ # [rdf:type, rdfs:label, dc:title]
304
+ # @return [Array<URI>]
305
+ def predicate_order; [RDF.type, RDF::RDFS.label, RDF::DC.title]; end
306
+
307
+ # Order subjects for output. Override this to output subjects in another order.
308
+ #
309
+ # Uses #top_classes and #base_uri.
310
+ # @return [Array<Resource>] Ordered list of subjects
311
+ def order_subjects
312
+ seen = {}
313
+ subjects = []
314
+
315
+ # Start with base_uri
316
+ if base_uri && @subjects.keys.include?(base_uri)
317
+ subjects << base_uri
318
+ seen[base_uri] = true
319
+ end
320
+
321
+ # Add distinguished classes
322
+ top_classes.each do |class_uri|
323
+ graph.query(:predicate => RDF.type, :object => class_uri).map {|st| st.subject}.sort.uniq.each do |subject|
324
+ add_debug "order_subjects: #{subject.inspect}"
325
+ subjects << subject
326
+ seen[subject] = true
327
+ end
328
+ end
329
+
330
+ # Sort subjects by resources over bnodes, ref_counts and the subject URI itself
331
+ recursable = @subjects.keys.
332
+ select {|s| !seen.include?(s)}.
333
+ map {|r| [r.is_a?(RDF::Node) ? 1 : 0, ref_count(r), r]}.
334
+ sort
335
+
336
+ subjects += recursable.map{|r| r.last}
337
+ end
338
+
339
+ # Perform any preprocessing of statements required
340
+ def preprocess
341
+ # Load defined prefixes
342
+ (@options[:prefixes] || {}).each_pair do |k, v|
343
+ @uri_to_prefix[v.to_s] = k
344
+ end
345
+ @options[:prefixes] = {} # Will define actual used when matched
346
+
347
+ prefix(nil, @options[:default_namespace]) if @options[:default_namespace]
348
+
349
+ @graph.each {|statement| preprocess_statement(statement)}
350
+ end
351
+
352
+ # Perform any statement preprocessing required. This is used to perform reference counts and determine required
353
+ # prefixes.
354
+ # @param [Statement] statement
355
+ def preprocess_statement(statement)
356
+ #add_debug "preprocess: #{statement.inspect}"
357
+ references = ref_count(statement.object) + 1
358
+ @references[statement.object] = references
359
+ @subjects[statement.subject] = true
360
+
361
+ # Pre-fetch pnames, to fill prefixes
362
+ get_pname(statement.subject)
363
+ get_pname(statement.predicate)
364
+ get_pname(statement.object)
365
+ get_pname(statement.object.datatype) if statement.object.literal? && statement.object.datatype
366
+
367
+ @references[statement.predicate] = ref_count(statement.predicate) + 1
368
+ end
369
+
370
+ # Return the number of times this node has been referenced in the object position
371
+ # @return [Integer]
372
+ def ref_count(node)
373
+ @references.fetch(node, 0)
374
+ end
375
+
376
+ # Returns indent string multiplied by the depth
377
+ # @param [Integer] modifier Increase depth by specified amount
378
+ # @return [String] A number of spaces, depending on current depth
379
+ def indent(modifier = 0)
380
+ " " * (@depth + modifier)
381
+ end
382
+
383
+ # Reset internal helper instance variables
384
+ def reset
385
+ @depth = 0
386
+ @lists = {}
387
+ @namespaces = {}
388
+ @references = {}
389
+ @serialized = {}
390
+ @subjects = {}
391
+ @shortNames = {}
392
+ @started = false
393
+ end
394
+
395
+ ##
396
+ # Use single- or multi-line quotes. If literal contains \t, \n, or \r, use a multiline quote,
397
+ # otherwise, use a single-line
398
+ # @param [String] string
399
+ # @return [String]
400
+ def quoted(string)
401
+ if string.to_s.match(/[\t\n\r]/)
402
+ string = string.gsub('\\', '\\\\').gsub('"""', '\\"""')
403
+ %("""#{string}""")
404
+ else
405
+ "\"#{escaped(string)}\""
406
+ end
407
+ end
408
+
409
+ private
410
+
411
+ # Add debug event to debug array, if specified
412
+ #
413
+ # @param [String] message::
414
+ def add_debug(message)
415
+ STDERR.puts message if ::RDF::Turtle::debug?
416
+ @debug << message if @debug.is_a?(Array)
417
+ end
418
+
419
+ # Checks if l is a valid RDF list, i.e. no nodes have other properties.
420
+ def is_valid_list(l)
421
+ #add_debug "is_valid_list: #{l.inspect}"
422
+ return RDF::List.new(l, @graph).valid?
423
+ end
424
+
425
+ def do_list(l)
426
+ list = RDF::List.new(l, @graph)
427
+ add_debug "do_list: #{list.inspect}"
428
+ position = :subject
429
+ list.each_statement do |st|
430
+ next unless st.predicate == RDF.first
431
+ add_debug " list this: #{st.subject} first: #{st.object}[#{position}]"
432
+ path(st.object, position)
433
+ subject_done(st.subject)
434
+ position = :object
435
+ end
436
+ end
437
+
438
+ def p_list(node, position)
439
+ return false if !is_valid_list(node)
440
+ #add_debug "p_list: #{node.inspect}, #{position}"
441
+
442
+ @output.write(position == :subject ? "(" : " (")
443
+ @depth += 2
444
+ do_list(node)
445
+ @depth -= 2
446
+ @output.write(')')
447
+ end
448
+
449
+ def p_squared?(node, position)
450
+ node.is_a?(RDF::Node) &&
451
+ !@serialized.has_key?(node) &&
452
+ ref_count(node) <= 1
453
+ end
454
+
455
+ def p_squared(node, position)
456
+ return false unless p_squared?(node, position)
457
+
458
+ #add_debug "p_squared: #{node.inspect}, #{position}"
459
+ subject_done(node)
460
+ @output.write(position == :subject ? '[' : ' [')
461
+ @depth += 2
462
+ predicate_list(node)
463
+ @depth -= 2
464
+ @output.write(']')
465
+
466
+ true
467
+ end
468
+
469
+ def p_default(node, position)
470
+ #add_debug "p_default: #{node.inspect}, #{position}"
471
+ l = (position == :subject ? "" : " ") + format_value(node)
472
+ @output.write(l)
473
+ end
474
+
475
+ def path(node, position)
476
+ add_debug "path: #{node.inspect}, pos: #{position}, []: #{is_valid_list(node)}, p2?: #{p_squared?(node, position)}, rc: #{ref_count(node)}"
477
+ raise RDF::WriterError, "Cannot serialize node '#{node}'" unless p_list(node, position) || p_squared(node, position) || p_default(node, position)
478
+ end
479
+
480
+ def verb(node)
481
+ add_debug "verb: #{node.inspect}"
482
+ if node == RDF.type
483
+ @output.write(" a")
484
+ else
485
+ path(node, :predicate)
486
+ end
487
+ end
488
+
489
+ def object_list(objects)
490
+ add_debug "object_list: #{objects.inspect}"
491
+ return if objects.empty?
492
+
493
+ objects.each_with_index do |obj, i|
494
+ @output.write(",\n#{indent(4)}") if i > 0
495
+ path(obj, :object)
496
+ end
497
+ end
498
+
499
+ def predicate_list(subject)
500
+ properties = {}
501
+ @graph.query(:subject => subject) do |st|
502
+ properties[st.predicate.to_s] ||= []
503
+ properties[st.predicate.to_s] << st.object
504
+ end
505
+
506
+ prop_list = sort_properties(properties) - [RDF.first.to_s, RDF.rest.to_s]
507
+ add_debug "predicate_list: #{prop_list.inspect}"
508
+ return if prop_list.empty?
509
+
510
+ prop_list.each_with_index do |prop, i|
511
+ begin
512
+ @output.write(";\n#{indent(2)}") if i > 0
513
+ prop[0, 2] == "_:"
514
+ verb(prop[0, 2] == "_:" ? RDF::Node.new(prop.split(':').last) : RDF::URI.intern(prop))
515
+ object_list(properties[prop])
516
+ rescue Addressable::URI::InvalidURIError => e
517
+ add_debug "Predicate #{prop.inspect} is an invalid URI: #{e.message}"
518
+ end
519
+ end
520
+ end
521
+
522
+ def s_squared?(subject)
523
+ ref_count(subject) == 0 && subject.is_a?(RDF::Node) && !is_valid_list(subject)
524
+ end
525
+
526
+ def s_squared(subject)
527
+ return false unless s_squared?(subject)
528
+
529
+ add_debug "s_squared: #{subject.inspect}"
530
+ @output.write("\n#{indent} [")
531
+ @depth += 1
532
+ predicate_list(subject)
533
+ @depth -= 1
534
+ @output.write("] .")
535
+ true
536
+ end
537
+
538
+ def s_default(subject)
539
+ @output.write("\n#{indent}")
540
+ path(subject, :subject)
541
+ predicate_list(subject)
542
+ @output.write(" .")
543
+ true
544
+ end
545
+
546
+ def statement(subject)
547
+ add_debug "statement: #{subject.inspect}, s2?: #{s_squared?(subject)}"
548
+ subject_done(subject)
549
+ s_squared(subject) || s_default(subject)
550
+ @output.puts
551
+ end
552
+
553
+ def is_done?(subject)
554
+ @serialized.include?(subject)
555
+ end
556
+
557
+ # Mark a subject as done.
558
+ def subject_done(subject)
559
+ @serialized[subject] = true
560
+ end
561
+ end
562
+ end