rdf-n3 0.2.3.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. data/.gitignore +1 -0
  2. data/.yardopts +4 -3
  3. data/{History.txt → History.md} +30 -6
  4. data/{README.rdoc → README.md} +56 -19
  5. data/Rakefile +15 -29
  6. data/VERSION +1 -1
  7. data/example-files/sp2b.n3 +50177 -0
  8. data/lib/rdf/n3.rb +2 -2
  9. data/lib/rdf/n3/reader.rb +560 -367
  10. data/lib/rdf/n3/reader/meta.rb +640 -0
  11. data/lib/rdf/n3/reader/n3-selectors.n3 +0 -0
  12. data/lib/rdf/n3/reader/parser.rb +229 -0
  13. data/lib/rdf/n3/vocab.rb +1 -0
  14. data/lib/rdf/n3/writer.rb +324 -265
  15. data/rdf-n3.gemspec +24 -26
  16. data/script/build_meta +242 -0
  17. data/script/parse +62 -13
  18. data/script/tc +4 -4
  19. data/spec/cwm_spec.rb +11 -3
  20. data/spec/n3reader_spec.rb +233 -63
  21. data/spec/rdf_helper.rb +15 -15
  22. data/spec/spec_helper.rb +10 -4
  23. data/spec/swap_spec.rb +11 -35
  24. data/spec/swap_test/n3parser.tests +14 -14
  25. data/spec/swap_test/n3parser.yml +0 -19
  26. data/spec/swap_test/nodeID/classes.ref.rdf +1 -1
  27. data/spec/swap_test/ref/contexts-1.n3 +12 -0
  28. data/spec/swap_test/ref/prefix2.rdf +33 -0
  29. data/spec/swap_test/ref/strquot.n3 +0 -1
  30. data/spec/swap_test/ref/xml-syntax-basic-serialization.rdf +1 -1
  31. data/spec/swap_test/regression.n3 +5 -5
  32. data/spec/swap_test/regression.yml +53 -23
  33. data/spec/turtle/manifest-bad.yml +91 -0
  34. data/spec/turtle/manifest.yml +187 -0
  35. data/spec/turtle_spec.rb +12 -20
  36. data/spec/writer_spec.rb +39 -37
  37. metadata +43 -48
  38. data/lib/rdf/n3/patches/qname_hacks.rb +0 -57
  39. data/lib/rdf/n3/patches/seq.rb +0 -34
  40. data/lib/rdf/n3/reader/n3_grammar.rb +0 -3764
  41. data/lib/rdf/n3/reader/n3_grammar.treetop +0 -227
  42. data/lib/rdf/n3/reader/n3_grammar_18.rb +0 -3764
  43. data/lib/rdf/n3/reader/n3_grammar_18.treetop +0 -227
  44. data/spec/literal_spec.rb +0 -245
@@ -0,0 +1,229 @@
1
+
2
+ module RDF::N3
3
+ module Parser
4
+ START = 'http://www.w3.org/2000/10/swap/grammar/n3#document'
5
+ R_WHITESPACE = Regexp.compile('\A\s*(?:#.*$)?')
6
+ R_MLSTRING = Regexp.compile("(\"\"\"[^\"\\\\]*(?:(?:\\\\.|\"(?!\"\"))[^\"\\\\]*)*\"\"\")")
7
+ SINGLE_CHARACTER_SELECTORS = %{\t\r\n !\"#$\%&'()*.,+/;<=>?[\\]^`{|}~}
8
+ NOT_QNAME_CHARS = SINGLE_CHARACTER_SELECTORS + "@"
9
+ NOT_NAME_CHARS = NOT_QNAME_CHARS + ":"
10
+
11
+ def error(str)
12
+ raise RDF::ReaderError, "\n#{@line}\n#{'-' * @pos}^\nError on line #{@lineno} at offset #{@pos}: #{str}"
13
+ end
14
+
15
+ def parse(prod)
16
+ todo_stack = [{:prod => prod, :terms => nil}]
17
+ while !todo_stack.empty?
18
+ pushed = false
19
+ if todo_stack.last[:terms].nil?
20
+ todo_stack.last[:terms] = []
21
+ tok = self.token
22
+ #puts "parse tok: '#{tok}', prod #{todo_stack.last[:prod]}"
23
+
24
+ # Got an opened production
25
+ onStart(abbr(todo_stack.last[:prod]))
26
+ break if tok.nil?
27
+
28
+ cur_prod = todo_stack.last[:prod]
29
+ prod_branch = @branches[cur_prod]
30
+ error("No branches found for '#{abbr(cur_prod)}'") if prod_branch.nil?
31
+ sequence = prod_branch[tok]
32
+ if sequence.nil?
33
+ dump_stack(todo_stack) if $verbose
34
+ expected = prod_branch.values.uniq.map {|u| u.map {|v| abbr(v).inspect}.join(",")}
35
+ error("Found '#{tok}' when parsing a #{abbr(cur_prod)}. expected #{expected.join(' | ')}")
36
+ end
37
+ #puts "sequence: #{sequence.inspect}"
38
+ todo_stack.last[:terms] += sequence
39
+ end
40
+
41
+ #puts "parse: #{todo_stack.last.inspect}"
42
+ while !todo_stack.last[:terms].to_a.empty?
43
+ term = todo_stack.last[:terms].shift
44
+ if term.is_a?(String)
45
+ puts "parse term(string): #{term}" if $verbose
46
+ word = buffer[0, term.length]
47
+ if word == term
48
+ onToken(term, word)
49
+ consume(term.length)
50
+ elsif '@' + word.chop == term && @keywords.include?(word.chop)
51
+ onToken(term, word.chop)
52
+ consume(term.length - 1)
53
+ else
54
+ error("Found '#{buffer[0, 10]}...'; #{term} expected")
55
+ end
56
+ elsif regexp = @regexps[term]
57
+ if abbr(term) == 'string' && buffer[0, 3] == '"""'
58
+ # Read until end of multi-line comment if this is the start of a multi-line comment
59
+ until R_MLSTRING.match(buffer)
60
+ begin
61
+ next_line = @input.readline
62
+ @line += next_line
63
+ @lineno += 1
64
+ rescue EOFError => e
65
+ error("EOF reached searching for end of multi-line comment")
66
+ end
67
+ end
68
+ #puts "ml-str now #{buffer.dump}"
69
+ end
70
+ md = regexp.match(buffer)
71
+ error("Token(#{abbr(term)}) '#{buffer[0, 10]}...' should match #{regexp}") unless md
72
+ puts "parse term(#{abbr(term)}:regexp): #{term}, #{regexp}.match('#{buffer[0, 10]}...') => '#{md.inspect}'" if $verbose
73
+ onToken(abbr(term), md.to_s)
74
+ consume(md[0].length)
75
+ else
76
+ puts "parse term(push): #{term}" if $verbose
77
+ todo_stack << {:prod => term, :terms => nil}
78
+ pushed = true
79
+ break
80
+ end
81
+ self.token
82
+ end
83
+
84
+ while !pushed && todo_stack.last[:terms].to_a.empty?
85
+ todo_stack.pop
86
+ self.onFinish
87
+ end
88
+ end
89
+ while !todo_stack.empty?
90
+ todo_stack.pop
91
+ self.onFinish
92
+ end
93
+ end
94
+
95
+ # Memoizer for get_token
96
+ def token
97
+ unless @memo.has_key?(@pos)
98
+ tok = self.get_token
99
+ @memo[@pos] = tok
100
+ puts "token: '#{tok}'('#{buffer[0, 10]}...')" if buffer && $verbose
101
+ end
102
+ @memo[@pos]
103
+ end
104
+
105
+ def get_token
106
+ whitespace
107
+
108
+ return nil if buffer.nil?
109
+
110
+ ch2 = buffer[0, 2]
111
+ return ch2 if %w(=> <= ^^).include?(ch2)
112
+
113
+ ch = buffer[0, 1]
114
+ @keyword_mode = false if ch == '.' && @keyword_mode
115
+
116
+ return ch if SINGLE_CHARACTER_SELECTORS.include?(ch)
117
+ return ":" if ch == ":"
118
+ return "0" if "+-0123456789".include?(ch)
119
+
120
+ if ch == '@'
121
+ return '@' if @pos > 0 && @line[@pos-1, 1] == '"'
122
+
123
+ j = 0
124
+ j += 1 while buffer[j+1, 1] && !NOT_NAME_CHARS.include?(buffer[j+1, 1])
125
+ name = buffer[1, j]
126
+ if name == 'keywords'
127
+ @keywords = []
128
+ @keyword_mode = true
129
+ end
130
+ return '@' + name
131
+ end
132
+
133
+ j = 0
134
+ j += 1 while buffer[j, 1] && !NOT_QNAME_CHARS.include?(buffer[j, 1])
135
+ word = buffer[0, j]
136
+ error("Tokenizer expected qname, found #{buffer[0, 10]}") unless word
137
+ if @keyword_mode
138
+ @keywords << word
139
+ elsif @keywords.include?(word)
140
+ if word == 'keywords'
141
+ @keywords = []
142
+ @keyword_mode = true
143
+ end
144
+ return '@' + word.to_s # implicit keyword
145
+ end
146
+
147
+ 'a'
148
+ end
149
+
150
+ def whitespace
151
+ while buffer && md = R_WHITESPACE.match(buffer)
152
+ return unless md[0].length > 0
153
+ consume(md[0].length)
154
+ #puts "ws: '#{md[0]}', pos=#{@pos}"
155
+ end
156
+ end
157
+
158
+ def readline
159
+ @line = @input.readline
160
+ @lineno += 1
161
+ @line.force_encoding(Encoding::UTF_8) if @line.respond_to?(:force_encoding) # for Ruby 1.9+
162
+ puts "readline[#{@lineno}]: '#{@line}'" if $verbose
163
+ @pos = 0
164
+ @line
165
+ rescue EOFError => e
166
+ @line, @pos = nil, 0
167
+ end
168
+
169
+ # Return data from current off set to end of line
170
+ def buffer
171
+ @line[@pos, @line.length - @pos] unless @line.nil?
172
+ end
173
+
174
+ # Cause n characters of line to be consumed. Read new line while line is empty or until eof
175
+ def consume(n)
176
+ @memo = {}
177
+ @pos += n
178
+ readline while @line && @line.length <= @pos
179
+ #puts "consume[#{n}]: '#{buffer}'" if $verbose
180
+ end
181
+
182
+ def abbr(prodURI)
183
+ prodURI.to_s.split('#').last
184
+ end
185
+
186
+ def onStart(prod)
187
+ $stdout.puts ' ' * @productions.length + prod
188
+ @productions << prod
189
+ end
190
+
191
+ def onFinish
192
+ prod = @productions.pop()
193
+ $stdout.puts ' ' * @productions.length + '/' + prod
194
+ end
195
+
196
+ def onToken(prod, tok)
197
+ $stdout.puts ' ' * @productions.length + "#{prod}(#{tok})"
198
+ end
199
+
200
+ def dump_stack(stack)
201
+ STDERR.puts "\nstack trace:"
202
+ stack.reverse.each do |se|
203
+ STDERR.puts "#{se[:prod]}"
204
+ STDERR.puts " " + case se[:terms]
205
+ when nil then "nil"
206
+ when [] then "empty"
207
+ else se[:terms].join(",\n ")
208
+ end
209
+ end
210
+ end
211
+
212
+ def test(input, branches, regexps)
213
+ # FIXME: for now, read in entire doc, eventually, process as stream
214
+ @input = input.respond_to?(:read) ? (input.rewind; input) : StringIO.new(input.to_s)
215
+ @lineno = 0
216
+ readline # Prime the pump
217
+ $stdout ||= STDOUT
218
+
219
+ @memo = {}
220
+ @keyword_mode = false
221
+ @keywords = %w(a is of this has)
222
+ @productions = []
223
+
224
+ @branches = branches
225
+ @regexps = regexps
226
+ parse(START.to_sym)
227
+ end
228
+ end
229
+ end
@@ -1,3 +1,4 @@
1
1
  module RDF
2
2
  class LOG < Vocabulary("http://www.w3.org/2000/10/swap/log#"); end
3
+ class REI < Vocabulary("http://www.w3.org/2004/06/rei#"); end
3
4
  end
@@ -38,63 +38,94 @@ module RDF::N3
38
38
  # end
39
39
  # end
40
40
  #
41
+ # The writer will add prefix definitions, and use them for creating @prefix definitions, and minting QNames
42
+ #
43
+ # @example Creating @base and @prefix definitions in output
44
+ # RDF::N3::Writer.buffer(:base_uri => "http://example.com/", :prefixes => {
45
+ # nil => "http://example.com/ns#",
46
+ # :foaf => "http://xmlns.com/foaf/0.1/"}
47
+ # ) do |writer|
48
+ # graph.each_statement do |statement|
49
+ # writer << statement
50
+ # end
51
+ # end
52
+ #
41
53
  # @author [Gregg Kellogg](http://kellogg-assoc.com/)
42
54
  class Writer < RDF::Writer
43
55
  format RDF::N3::Format
44
56
 
45
- SUBJECT = 0
46
- VERB = 1
47
- OBJECT = 2
48
-
49
- attr_accessor :graph, :base_uri
50
-
57
+ # @return [Graph] Graph of statements serialized
58
+ attr_accessor :graph
59
+ # @return [URI] Base URI used for relativizing URIs
60
+ attr_accessor :base_uri
61
+
51
62
  ##
52
63
  # Initializes the Turtle writer instance.
53
64
  #
54
- # Opitons:
55
- # max_depth:: Maximum depth for recursively defining resources, defaults to 3
56
- # base_uri:: Base URI of graph, used to shorting URI references
57
- # default_namespace:: URI to use as default namespace
58
- #
59
- # @param [IO, File] output
65
+ # @param [IO, File] output
66
+ # the output stream
60
67
  # @param [Hash{Symbol => Object}] options
61
- # @option options [Integer] :max_depth (nil)
62
- # @option options [String, #to_s] :base_uri (nil)
63
- # @option options [String, #to_s] :lang (nil)
64
- # @option options [Array] :attributes (nil)
65
- # @option options [String] :default_namespace
68
+ # any additional options
69
+ # @option options [Encoding] :encoding (Encoding::UTF_8)
70
+ # the encoding to use on the output stream (Ruby 1.9+)
71
+ # @option options [Boolean] :canonicalize (false)
72
+ # whether to canonicalize literals when serializing
73
+ # @option options [Hash] :prefixes (Hash.new)
74
+ # the prefix mappings to use (not supported by all writers)
75
+ # @option options [#to_s] :base_uri (nil)
76
+ # the base URI to use when constructing relative URIs
77
+ # @option options [Integer] :max_depth (3)
78
+ # Maximum depth for recursively defining resources, defaults to 3
79
+ # @option options [Boolean] :standard_prefixes (false)
80
+ # Add standard prefixes to @prefixes, if necessary.
81
+ # @option options [String] :default_namespace (nil)
82
+ # URI to use as default namespace, same as prefixes[nil]
83
+ # @yield [writer] `self`
84
+ # @yieldparam [RDF::Writer] writer
85
+ # @yieldreturn [void]
66
86
  # @yield [writer]
67
87
  # @yieldparam [RDF::Writer] writer
68
88
  def initialize(output = $stdout, options = {}, &block)
69
- @graph = RDF::Graph.new
70
- @stream = output
71
- super
89
+ super do
90
+ @graph = RDF::Graph.new
91
+ @uri_to_qname = {}
92
+ prefix(nil, @options[:default_namespace]) if @options[:default_namespace]
93
+ if block_given?
94
+ case block.arity
95
+ when 0 then instance_eval(&block)
96
+ else block.call(self)
97
+ end
98
+ end
99
+ end
72
100
  end
73
101
 
74
102
  ##
103
+ # Write whole graph
104
+ #
75
105
  # @param [Graph] graph
76
106
  # @return [void]
77
- def insert_graph(graph)
107
+ def write_graph(graph)
78
108
  @graph = graph
79
109
  end
80
110
 
81
111
  ##
82
- # @param [Statement] statement
112
+ # Addes a statement to be serialized
113
+ # @param [RDF::Statement] statement
83
114
  # @return [void]
84
- def insert_statement(statement)
85
- @graph << statement
115
+ def write_statement(statement)
116
+ @graph.insert(statement)
86
117
  end
87
118
 
88
119
  ##
89
- # Stores the RDF/XML representation of a triple.
90
- #
120
+ # Addes a triple to be serialized
91
121
  # @param [RDF::Resource] subject
92
122
  # @param [RDF::URI] predicate
93
123
  # @param [RDF::Value] object
94
124
  # @return [void]
95
- # @see #write_epilogue
96
- def insert_triple(subject, predicate, object)
97
- @graph << RDF::Statement.new(subject, predicate, object)
125
+ # @raise [NotImplementedError] unless implemented in subclass
126
+ # @abstract
127
+ def write_triple(subject, predicate, object)
128
+ @graph.insert(Statement.new(subject, predicate, object))
98
129
  end
99
130
 
100
131
  ##
@@ -106,69 +137,285 @@ module RDF::N3
106
137
  @max_depth = @options[:max_depth] || 3
107
138
  @base_uri = @options[:base_uri]
108
139
  @debug = @options[:debug]
109
- @default_namespace = @options[:default_namespace]
110
140
 
111
141
  self.reset
112
142
 
113
143
  add_debug "\nserialize: graph: #{@graph.size}"
114
144
 
115
- add_namespace("", @default_namespace) if @default_namespace
116
-
117
145
  preprocess
118
146
  start_document
119
147
 
120
148
  order_subjects.each do |subject|
121
- #puts "subj: #{subject.inspect}"
149
+ #STDERR.puts "subj: #{subject.inspect}"
122
150
  unless is_done?(subject)
123
151
  statement(subject)
124
152
  end
125
153
  end
126
154
  end
127
155
 
156
+ # Return a QName for the URI, or nil. Adds namespace of QName to defined prefixes
157
+ # @param [URI,#to_s] uri
158
+ # @return [Array<Symbol,Symbol>, nil] Prefix, Suffix pair or nil, if none found
159
+ def get_qname(uri)
160
+ uri = RDF::URI.intern(uri.to_s) unless uri.is_a?(URI)
161
+
162
+ unless @uri_to_qname.has_key?(uri)
163
+ # Find in defined prefixes
164
+ prefixes.each_pair do |prefix, vocab|
165
+ if uri.to_s.index(vocab.to_s) == 0
166
+ local_name = uri.to_s[(vocab.to_s.length)..-1]
167
+ add_debug "get_qname(ns): #{prefix}:#{local_name}"
168
+ return @uri_to_qname[uri] = [prefix, local_name.to_sym]
169
+ end
170
+ end
171
+
172
+ # Use a default vocabulary
173
+ if @options[:standard_prefixes] && vocab = RDF::Vocabulary.detect {|v| uri.to_s.index(v.to_uri.to_s) == 0}
174
+ prefix = vocab.__name__.to_s.split('::').last.downcase
175
+ prefixes[prefix.to_sym] = vocab.to_uri
176
+ suffix = uri.to_s[vocab.to_uri.to_s.size..-1]
177
+ return @uri_to_qname[uri] = [prefix.to_sym, suffix.empty? ? nil : suffix.to_sym] if prefix && suffix
178
+ end
179
+
180
+ @uri_to_qname[uri] = nil
181
+ end
182
+
183
+ @uri_to_qname[uri]
184
+ rescue Addressable::URI::InvalidURIError => e
185
+ @uri_to_qname[uri] = nil
186
+ end
187
+
188
+ # Take a hash from predicate uris to lists of values.
189
+ # Sort the lists of values. Return a sorted list of properties.
190
+ # @param [Hash{String => Array<Resource>}] properties A hash of Property to Resource mappings
191
+ # @return [Array<String>}] Ordered list of properties. Uses predicate_order.
192
+ def sort_properties(properties)
193
+ properties.keys.each do |k|
194
+ properties[k] = properties[k].sort do |a, b|
195
+ a_li = a.is_a?(RDF::URI) && get_qname(a) && get_qname(a).last.to_s =~ /^_\d+$/ ? a.to_i : a.to_s
196
+ b_li = b.is_a?(RDF::URI) && get_qname(b) && get_qname(b).last.to_s =~ /^_\d+$/ ? b.to_i : b.to_s
197
+
198
+ a_li <=> b_li
199
+ end
200
+ end
201
+
202
+ # Make sorted list of properties
203
+ prop_list = []
204
+
205
+ predicate_order.each do |prop|
206
+ next unless properties[prop]
207
+ prop_list << prop.to_s
208
+ end
209
+
210
+ properties.keys.sort.each do |prop|
211
+ next if prop_list.include?(prop.to_s)
212
+ prop_list << prop.to_s
213
+ end
214
+
215
+ add_debug "sort_properties: #{prop_list.to_sentence}"
216
+ prop_list
217
+ end
218
+
219
+ ##
220
+ # Returns the N-Triples representation of a literal.
221
+ #
222
+ # @param [RDF::Literal, String, #to_s] literal
223
+ # @param [Hash{Symbol => Object}] options
224
+ # @return [String]
225
+ def format_literal(literal, options = {})
226
+ case literal
227
+ when RDF::Literal
228
+ text = quoted(literal.value)
229
+ text << "@#{literal.language}" if literal.has_language?
230
+ text << "^^#{format_uri(literal.datatype)}" if literal.has_datatype?
231
+ text
232
+ else
233
+ quoted(literal.to_s)
234
+ end
235
+ end
236
+
237
+ ##
238
+ # Returns the Turtle/N3 representation of a URI reference.
239
+ #
240
+ # @param [RDF::URI] literal
241
+ # @param [Hash{Symbol => Object}] options
242
+ # @return [String]
243
+ def format_uri(uri, options = {})
244
+ md = relativize(uri)
245
+ if md && md != uri.to_s
246
+ "<%s>" % md
247
+ elsif qname = get_qname(uri)
248
+ qname.map(&:to_s).join(":")
249
+ else
250
+ "<%s>" % uri_for(uri)
251
+ end
252
+ end
253
+
254
+ ##
255
+ # Returns the Turtle/N3 representation of a blank node.
256
+ #
257
+ # @param [RDF::Node] node
258
+ # @param [Hash{Symbol => Object}] options
259
+ # @return [String]
260
+ def format_node(node, options = {})
261
+ "_:%s" % node.id
262
+ end
263
+
128
264
  protected
265
+ # Output @base and @prefix definitions
129
266
  def start_document
130
267
  @started = true
131
268
 
132
- write("#{indent}@base <#{@base_uri}> .\n") if @base_uri
269
+ @output.write("#{indent}@base <#{@base_uri}> .\n") if @base_uri
270
+
271
+ add_debug("start_document: #{prefixes.inspect}")
272
+ prefixes.keys.sort_by(&:to_s).each do |prefix|
273
+ @output.write("#{indent}@prefix #{prefix}: <#{prefixes[prefix]}> .\n")
274
+ end
275
+ end
276
+
277
+ # If @base_uri is defined, use it to try to make uri relative
278
+ # @param [#to_s] uri
279
+ # @return [String]
280
+ def relativize(uri)
281
+ uri = uri.to_s
282
+ @base_uri ? uri.sub(@base_uri.to_s, "") : uri
283
+ end
284
+
285
+ # Defines rdf:type of subjects to be emitted at the beginning of the graph. Defaults to rdfs:Class
286
+ # @return [Array<URI>]
287
+ def top_classes; [RDF::RDFS.Class]; end
288
+
289
+ # Defines order of predicates to to emit at begninning of a resource description. Defaults to
290
+ # [rdf:type, rdfs:label, dc:title]
291
+ # @return [Array<URI>]
292
+ def predicate_order; [RDF.type, RDF::RDFS.label, RDF::DC.title]; end
293
+
294
+ # Order subjects for output. Override this to output subjects in another order.
295
+ #
296
+ # Uses top_classes
297
+ # @return [Array<Resource>] Ordered list of subjects
298
+ def order_subjects
299
+ seen = {}
300
+ subjects = []
133
301
 
134
- add_debug("start_document: #{@namespaces.inspect}")
135
- @namespaces.keys.sort.each do |prefix|
136
- write("#{indent}@prefix #{prefix}: <#{@namespaces[prefix]}> .\n")
302
+ top_classes.each do |class_uri|
303
+ graph.query(:predicate => RDF.type, :object => class_uri).map {|st| st.subject}.sort.uniq.each do |subject|
304
+ #add_debug "order_subjects: #{subject.inspect}"
305
+ subjects << subject
306
+ seen[subject] = @top_levels[subject] = true
307
+ end
137
308
  end
309
+
310
+ # Sort subjects by resources over bnodes, ref_counts and the subject URI itself
311
+ recursable = @subjects.keys.
312
+ select {|s| !seen.include?(s)}.
313
+ map {|r| [r.is_a?(RDF::Node) ? 1 : 0, ref_count(r), r]}.
314
+ sort
315
+
316
+ subjects += recursable.map{|r| r.last}
317
+ end
318
+
319
+ # Perform any preprocessing of statements required
320
+ def preprocess
321
+ @graph.each {|statement| preprocess_statement(statement)}
138
322
  end
139
323
 
140
- def end_document
141
- write("\n")
324
+ # Perform any statement preprocessing required. This is used to perform reference counts and determine required
325
+ # prefixes.
326
+ # @param [Statement] statement
327
+ def preprocess_statement(statement)
328
+ #add_debug "preprocess: #{statement.inspect}"
329
+ references = ref_count(statement.object) + 1
330
+ @references[statement.object] = references
331
+ @subjects[statement.subject] = true
332
+
333
+ # Pre-fetch qnames, to fill prefixes
334
+ get_qname(statement.subject)
335
+ get_qname(statement.predicate)
336
+ get_qname(statement.object)
337
+
338
+ @references[statement.predicate] = ref_count(statement.predicate) + 1
339
+ end
340
+
341
+ # Return the number of times this node has been referenced in the object position
342
+ # @return [Integer]
343
+ def ref_count(node)
344
+ @references.fetch(node, 0)
345
+ end
346
+
347
+ # Returns indent string multiplied by the depth
348
+ # @param [Integer] modifier Increase depth by specified amount
349
+ # @return [String] A number of spaces, depending on current depth
350
+ def indent(modifier = 0)
351
+ " " * (@depth + modifier)
352
+ end
353
+
354
+ # Reset internal helper instance variables
355
+ def reset
356
+ @depth = 0
357
+ @lists = {}
358
+ @namespaces = {}
359
+ @references = {}
360
+ @serialized = {}
361
+ @subjects = {}
362
+ @top_levels = {}
363
+ @shortNames = {}
364
+ @started = false
365
+ end
366
+
367
+ ##
368
+ # Use single- or multi-line quotes. If literal contains \t, \n, or \r, use a multiline quote,
369
+ # otherwise, use a single-line
370
+ # @param [String] string
371
+ # @return [String]
372
+ def quoted(string)
373
+ if string.to_s.match(/[\t\n\r]/)
374
+ string = string.gsub('\\', '\\\\').gsub('"""', '\\"""')
375
+ %("""#{string}""")
376
+ else
377
+ "\"#{escaped(string)}\""
378
+ end
142
379
  end
380
+
381
+ private
143
382
 
383
+ # Add debug event to debug array, if specified
384
+ #
385
+ # @param [String] message::
386
+ def add_debug(message)
387
+ STDERR.puts message if ::RDF::N3::debug?
388
+ @debug << message if @debug.is_a?(Array)
389
+ end
390
+
144
391
  # Checks if l is a valid RDF list, i.e. no nodes have other properties.
145
392
  def is_valid_list(l)
146
393
  props = @graph.properties(l)
147
- #puts "is_valid_list: #{props.inspect}" if ::RDF::N3::debug?
394
+ #STDERR.puts "is_valid_list: #{props.inspect}" if ::RDF::N3::debug?
148
395
  return false unless props.has_key?(RDF.first.to_s) || l == RDF.nil
149
396
  while l && l != RDF.nil do
150
- #puts "is_valid_list(length): #{props.length}" if ::RDF::N3::debug?
397
+ #STDERR.puts "is_valid_list(length): #{props.length}" if ::RDF::N3::debug?
151
398
  return false unless props.has_key?(RDF.first.to_s) && props.has_key?(RDF.rest.to_s)
152
399
  n = props[RDF.rest.to_s]
153
- #puts "is_valid_list(n): #{n.inspect}" if ::RDF::N3::debug?
400
+ #STDERR.puts "is_valid_list(n): #{n.inspect}" if ::RDF::N3::debug?
154
401
  return false unless n.is_a?(Array) && n.length == 1
155
402
  l = n.first
156
403
  props = @graph.properties(l)
157
404
  end
158
- #puts "is_valid_list: valid" if ::RDF::N3::debug?
405
+ #STDERR.puts "is_valid_list: valid" if ::RDF::N3::debug?
159
406
  true
160
407
  end
161
408
 
162
409
  def do_list(l)
163
- puts "do_list: #{l.inspect}" if ::RDF::N3::debug?
164
- position = SUBJECT
410
+ STDERR.puts "do_list: #{l.inspect}" if ::RDF::N3::debug?
411
+ position = :subject
165
412
  while l do
166
413
  p = @graph.properties(l)
167
414
  item = p.fetch(RDF.first.to_s, []).first
168
415
  if item
169
416
  path(item, position)
170
417
  subject_done(l)
171
- position = OBJECT
418
+ position = :object
172
419
  end
173
420
  l = p.fetch(RDF.rest.to_s, []).first
174
421
  end
@@ -176,13 +423,13 @@ module RDF::N3
176
423
 
177
424
  def p_list(node, position)
178
425
  return false if !is_valid_list(node)
179
- #puts "p_list: #{node.inspect}, #{position}" if ::RDF::N3::debug?
426
+ #STDERR.puts "p_list: #{node.inspect}, #{position}" if ::RDF::N3::debug?
180
427
 
181
- write(position == SUBJECT ? "(" : " (")
428
+ @output.write(position == :subject ? "(" : " (")
182
429
  @depth += 2
183
430
  do_list(node)
184
431
  @depth -= 2
185
- write(')')
432
+ @output.write(')')
186
433
  end
187
434
 
188
435
  def p_squared?(node, position)
@@ -194,57 +441,62 @@ module RDF::N3
194
441
  def p_squared(node, position)
195
442
  return false unless p_squared?(node, position)
196
443
 
197
- #puts "p_squared: #{node.inspect}, #{position}" if ::RDF::N3::debug?
444
+ #STDERR.puts "p_squared: #{node.inspect}, #{position}" if ::RDF::N3::debug?
198
445
  subject_done(node)
199
- write(position == SUBJECT ? '[' : ' [')
446
+ @output.write(position == :subject ? '[' : ' [')
200
447
  @depth += 2
201
448
  predicate_list(node)
202
449
  @depth -= 2
203
- write(']')
450
+ @output.write(']')
204
451
 
205
452
  true
206
453
  end
207
454
 
208
455
  def p_default(node, position)
209
- #puts "p_default: #{node.inspect}, #{position}" if ::RDF::N3::debug?
210
- l = (position == SUBJECT ? "" : " ") + format_value(node)
211
- write(l)
456
+ #STDERR.puts "p_default: #{node.inspect}, #{position}" if ::RDF::N3::debug?
457
+ l = (position == :subject ? "" : " ") + format_value(node)
458
+ @output.write(l)
212
459
  end
213
460
 
214
461
  def path(node, position)
215
- puts "path: #{node.inspect}, pos: #{position}, []: #{is_valid_list(node)}, p2?: #{p_squared?(node, position)}, rc: #{ref_count(node)}" if ::RDF::N3::debug?
462
+ STDERR.puts "path: #{node.inspect}, pos: #{position}, []: #{is_valid_list(node)}, p2?: #{p_squared?(node, position)}, rc: #{ref_count(node)}" if ::RDF::N3::debug?
216
463
  raise RDF::WriterError, "Cannot serialize node '#{node}'" unless p_list(node, position) || p_squared(node, position) || p_default(node, position)
217
464
  end
218
465
 
219
466
  def verb(node)
220
- puts "verb: #{node.inspect}" if ::RDF::N3::debug?
467
+ STDERR.puts "verb: #{node.inspect}" if ::RDF::N3::debug?
221
468
  if node == RDF.type
222
- write(" a")
469
+ @output.write(" a")
223
470
  else
224
- path(node, VERB)
471
+ path(node, :predicate)
225
472
  end
226
473
  end
227
474
 
228
475
  def object_list(objects)
229
- puts "object_list: #{objects.inspect}" if ::RDF::N3::debug?
476
+ STDERR.puts "object_list: #{objects.inspect}" if ::RDF::N3::debug?
230
477
  return if objects.empty?
231
478
 
232
479
  objects.each_with_index do |obj, i|
233
- write(",\n#{indent(2)}") if i > 0
234
- path(obj, OBJECT)
480
+ @output.write(",\n#{indent(2)}") if i > 0
481
+ path(obj, :object)
235
482
  end
236
483
  end
237
484
 
238
485
  def predicate_list(subject)
239
486
  properties = @graph.properties(subject)
240
487
  prop_list = sort_properties(properties) - [RDF.first.to_s, RDF.rest.to_s]
241
- puts "predicate_list: #{prop_list.inspect}" if ::RDF::N3::debug?
488
+ STDERR.puts "predicate_list: #{prop_list.inspect}" if ::RDF::N3::debug?
242
489
  return if prop_list.empty?
243
490
 
244
491
  prop_list.each_with_index do |prop, i|
245
- write(";\n#{indent(2)}") if i > 0
246
- verb(RDF::URI.new(prop))
247
- object_list(properties[prop])
492
+ begin
493
+ @output.write(";\n#{indent(2)}") if i > 0
494
+ prop[0, 2] == "_:"
495
+ verb(prop[0, 2] == "_:" ? RDF::Node.new(prop.split(':').last) : RDF::URI.intern(prop))
496
+ object_list(properties[prop])
497
+ rescue Addressable::URI::InvalidURIError => e
498
+ STDERR.puts "Predicate #{prop.inspect} is an invalid URI: #{e.message}"
499
+ end
248
500
  end
249
501
  end
250
502
 
@@ -255,39 +507,28 @@ module RDF::N3
255
507
  def s_squared(subject)
256
508
  return false unless s_squared?(subject)
257
509
 
258
- write("\n#{indent} [")
510
+ @output.write("\n#{indent} [")
259
511
  @depth += 1
260
512
  predicate_list(subject)
261
513
  @depth -= 1
262
- write("] .")
514
+ @output.write("] .")
263
515
  true
264
516
  end
265
517
 
266
518
  def s_default(subject)
267
- write("\n#{indent}")
268
- path(subject, SUBJECT)
519
+ @output.write("\n#{indent}")
520
+ path(subject, :subject)
269
521
  predicate_list(subject)
270
- write(" .")
522
+ @output.write(" .")
271
523
  true
272
524
  end
273
525
 
274
- def relativize(uri)
275
- uri = uri.to_s
276
- @base_uri ? uri.sub(@base_uri.to_s, "") : uri
277
- end
278
-
279
526
  def statement(subject)
280
- puts "statement: #{subject.inspect}, s2?: #{s_squared(subject)}" if ::RDF::N3::debug?
527
+ STDERR.puts "statement: #{subject.inspect}, s2?: #{s_squared(subject)}" if ::RDF::N3::debug?
281
528
  subject_done(subject)
282
529
  s_squared(subject) || s_default(subject)
283
530
  end
284
531
 
285
- MAX_DEPTH = 10
286
- INDENT_STRING = " "
287
-
288
- def top_classes; [RDF::RDFS.Class]; end
289
- def predicate_order; [RDF.type, RDF::RDFS.label, RDF::DC.title]; end
290
-
291
532
  def is_done?(subject)
292
533
  @serialized.include?(subject)
293
534
  end
@@ -296,187 +537,5 @@ module RDF::N3
296
537
  def subject_done(subject)
297
538
  @serialized[subject] = true
298
539
  end
299
-
300
- def order_subjects
301
- seen = {}
302
- subjects = []
303
-
304
- top_classes.each do |class_uri|
305
- graph.query(:predicate => RDF.type, :object => class_uri).map {|st| st.subject}.sort.uniq.each do |subject|
306
- #add_debug "order_subjects: #{subject.inspect}"
307
- subjects << subject
308
- seen[subject] = @top_levels[subject] = true
309
- end
310
- end
311
-
312
- # Sort subjects by resources over bnodes, ref_counts and the subject URI itself
313
- recursable = @subjects.keys.
314
- select {|s| !seen.include?(s)}.
315
- map {|r| [r.is_a?(RDF::Node) ? 1 : 0, ref_count(r), r]}.
316
- sort
317
-
318
- subjects += recursable.map{|r| r.last}
319
- end
320
-
321
- def preprocess
322
- @graph.each {|statement| preprocess_statement(statement)}
323
- end
324
-
325
- def preprocess_statement(statement)
326
- #add_debug "preprocess: #{statement.inspect}"
327
- references = ref_count(statement.object) + 1
328
- @references[statement.object] = references
329
- @subjects[statement.subject] = true
330
-
331
- # Pre-fetch qnames, to fill namespaces
332
- get_qname(statement.subject)
333
- get_qname(statement.predicate)
334
- get_qname(statement.object)
335
-
336
- @references[statement.predicate] = ref_count(statement.predicate) + 1
337
- end
338
-
339
- # Return the number of times this node has been referenced in the object position
340
- def ref_count(node)
341
- @references.fetch(node, 0)
342
- end
343
-
344
- # Return a QName for the URI, or nil. Adds namespace of QName to defined namespaces
345
- def get_qname(uri)
346
- if uri.is_a?(RDF::URI)
347
- md = relativize(uri)
348
- return "<#{md}>" unless md == uri.to_s
349
-
350
- # Duplicate logic from URI#qname to remember namespace assigned
351
-
352
- if uri.qname
353
- return ":#{uri.qname.last}" if uri.vocab == @default_namespace
354
- add_namespace(uri.qname.first, uri.vocab)
355
- add_debug "get_qname(uri.qname): #{uri.qname.join(':')}"
356
- return uri.qname.join(":")
357
- end
358
-
359
- # No vocabulary assigned, find one from cache of created namespace URIs
360
- @namespaces.each_pair do |prefix, vocab|
361
- if uri.to_s.index(vocab.to_s) == 0
362
- uri.vocab = vocab
363
- local_name = uri.to_s[(vocab.to_s.length)..-1]
364
- if vocab == @default_namespace
365
- add_debug "get_qname(ns): :#{local_name}"
366
- return ":#{local_name}"
367
- else
368
- add_debug "get_qname(ns): #{prefix}:#{local_name}"
369
- return "#{prefix}:#{local_name}"
370
- end
371
- end
372
- end
373
-
374
- nil
375
- end
376
- end
377
-
378
- def add_namespace(prefix, ns)
379
- return if @namespaces.has_key?(prefix.to_s)
380
- uri = (ns.respond_to?(:to_uri) ? ns.to_uri : ns).to_s
381
- add_debug "add_namespace: '#{prefix}', <#{uri}>"
382
- @namespaces[prefix.to_s] = uri
383
- end
384
-
385
- def reset
386
- @depth = 0
387
- @lists = {}
388
- @namespaces = {}
389
- @references = {}
390
- @serialized = {}
391
- @subjects = {}
392
- @top_levels = {}
393
- @shortNames = {}
394
- @started = false
395
- end
396
-
397
- # Take a hash from predicate uris to lists of values.
398
- # Sort the lists of values. Return a sorted list of properties.
399
- def sort_properties(properties)
400
- properties.keys.each do |k|
401
- properties[k] = properties[k].sort do |a, b|
402
- a_li = a.is_a?(RDF::URI) && a.qname && a.qname.last =~ /^_\d+$/ ? a.to_i : a.to_s
403
- b_li = b.is_a?(RDF::URI) && b.qname && b.qname.last =~ /^_\d+$/ ? b.to_i : b.to_s
404
-
405
- a_li <=> b_li
406
- end
407
- end
408
-
409
- # Make sorted list of properties
410
- prop_list = []
411
-
412
- predicate_order.each do |prop|
413
- next unless properties[prop]
414
- prop_list << prop.to_s
415
- end
416
-
417
- properties.keys.sort.each do |prop|
418
- next if prop_list.include?(prop.to_s)
419
- prop_list << prop.to_s
420
- end
421
-
422
- add_debug "sort_properties: #{prop_list.to_sentence}"
423
- prop_list
424
- end
425
-
426
- # Add debug event to debug array, if specified
427
- #
428
- # @param [String] message::
429
- def add_debug(message)
430
- STDERR.puts message if ::RDF::N3::debug?
431
- @debug << message if @debug.is_a?(Array)
432
- end
433
-
434
- # Returns indent string multiplied by the depth
435
- def indent(modifier = 0)
436
- INDENT_STRING * (@depth + modifier)
437
- end
438
-
439
- # Write text
440
- def write(text)
441
- @stream.write(text)
442
- end
443
-
444
- ##
445
- # Returns the N-Triples representation of a literal.
446
- #
447
- # @param [RDF::Literal, String, #to_s] literal
448
- # @param [Hash{Symbol => Object}] options
449
- # @return [String]
450
- def format_literal(literal, options = {})
451
- case literal
452
- when RDF::Literal
453
- text = quoted(literal.value)
454
- text << "@#{literal.language}" if literal.has_language?
455
- text << "^^#{format_uri(literal.datatype)}" if literal.has_datatype?
456
- text
457
- else
458
- quoted(literal.to_s)
459
- end
460
- end
461
-
462
- ##
463
- # Returns the Turtle/N3 representation of a URI reference.
464
- #
465
- # @param [RDF::URI] literal
466
- # @param [Hash{Symbol => Object}] options
467
- # @return [String]
468
- def format_uri(uri, options = {})
469
- get_qname(uri) || "<%s>" % uri_for(uri)
470
- end
471
-
472
- ##
473
- # Returns the Turtle/N3 representation of a blank node.
474
- #
475
- # @param [RDF::Node] node
476
- # @param [Hash{Symbol => Object}] options
477
- # @return [String]
478
- def format_node(node, options = {})
479
- "_:%s" % node.id
480
- end
481
540
  end
482
541
  end