rdf-n3 0.2.3.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. data/.gitignore +1 -0
  2. data/.yardopts +4 -3
  3. data/{History.txt → History.md} +30 -6
  4. data/{README.rdoc → README.md} +56 -19
  5. data/Rakefile +15 -29
  6. data/VERSION +1 -1
  7. data/example-files/sp2b.n3 +50177 -0
  8. data/lib/rdf/n3.rb +2 -2
  9. data/lib/rdf/n3/reader.rb +560 -367
  10. data/lib/rdf/n3/reader/meta.rb +640 -0
  11. data/lib/rdf/n3/reader/n3-selectors.n3 +0 -0
  12. data/lib/rdf/n3/reader/parser.rb +229 -0
  13. data/lib/rdf/n3/vocab.rb +1 -0
  14. data/lib/rdf/n3/writer.rb +324 -265
  15. data/rdf-n3.gemspec +24 -26
  16. data/script/build_meta +242 -0
  17. data/script/parse +62 -13
  18. data/script/tc +4 -4
  19. data/spec/cwm_spec.rb +11 -3
  20. data/spec/n3reader_spec.rb +233 -63
  21. data/spec/rdf_helper.rb +15 -15
  22. data/spec/spec_helper.rb +10 -4
  23. data/spec/swap_spec.rb +11 -35
  24. data/spec/swap_test/n3parser.tests +14 -14
  25. data/spec/swap_test/n3parser.yml +0 -19
  26. data/spec/swap_test/nodeID/classes.ref.rdf +1 -1
  27. data/spec/swap_test/ref/contexts-1.n3 +12 -0
  28. data/spec/swap_test/ref/prefix2.rdf +33 -0
  29. data/spec/swap_test/ref/strquot.n3 +0 -1
  30. data/spec/swap_test/ref/xml-syntax-basic-serialization.rdf +1 -1
  31. data/spec/swap_test/regression.n3 +5 -5
  32. data/spec/swap_test/regression.yml +53 -23
  33. data/spec/turtle/manifest-bad.yml +91 -0
  34. data/spec/turtle/manifest.yml +187 -0
  35. data/spec/turtle_spec.rb +12 -20
  36. data/spec/writer_spec.rb +39 -37
  37. metadata +43 -48
  38. data/lib/rdf/n3/patches/qname_hacks.rb +0 -57
  39. data/lib/rdf/n3/patches/seq.rb +0 -34
  40. data/lib/rdf/n3/reader/n3_grammar.rb +0 -3764
  41. data/lib/rdf/n3/reader/n3_grammar.treetop +0 -227
  42. data/lib/rdf/n3/reader/n3_grammar_18.rb +0 -3764
  43. data/lib/rdf/n3/reader/n3_grammar_18.treetop +0 -227
  44. data/spec/literal_spec.rb +0 -245
@@ -0,0 +1,229 @@
1
+
2
+ module RDF::N3
3
+ module Parser
4
+ START = 'http://www.w3.org/2000/10/swap/grammar/n3#document'
5
+ R_WHITESPACE = Regexp.compile('\A\s*(?:#.*$)?')
6
+ R_MLSTRING = Regexp.compile("(\"\"\"[^\"\\\\]*(?:(?:\\\\.|\"(?!\"\"))[^\"\\\\]*)*\"\"\")")
7
+ SINGLE_CHARACTER_SELECTORS = %{\t\r\n !\"#$\%&'()*.,+/;<=>?[\\]^`{|}~}
8
+ NOT_QNAME_CHARS = SINGLE_CHARACTER_SELECTORS + "@"
9
+ NOT_NAME_CHARS = NOT_QNAME_CHARS + ":"
10
+
11
+ def error(str)
12
+ raise RDF::ReaderError, "\n#{@line}\n#{'-' * @pos}^\nError on line #{@lineno} at offset #{@pos}: #{str}"
13
+ end
14
+
15
+ def parse(prod)
16
+ todo_stack = [{:prod => prod, :terms => nil}]
17
+ while !todo_stack.empty?
18
+ pushed = false
19
+ if todo_stack.last[:terms].nil?
20
+ todo_stack.last[:terms] = []
21
+ tok = self.token
22
+ #puts "parse tok: '#{tok}', prod #{todo_stack.last[:prod]}"
23
+
24
+ # Got an opened production
25
+ onStart(abbr(todo_stack.last[:prod]))
26
+ break if tok.nil?
27
+
28
+ cur_prod = todo_stack.last[:prod]
29
+ prod_branch = @branches[cur_prod]
30
+ error("No branches found for '#{abbr(cur_prod)}'") if prod_branch.nil?
31
+ sequence = prod_branch[tok]
32
+ if sequence.nil?
33
+ dump_stack(todo_stack) if $verbose
34
+ expected = prod_branch.values.uniq.map {|u| u.map {|v| abbr(v).inspect}.join(",")}
35
+ error("Found '#{tok}' when parsing a #{abbr(cur_prod)}. expected #{expected.join(' | ')}")
36
+ end
37
+ #puts "sequence: #{sequence.inspect}"
38
+ todo_stack.last[:terms] += sequence
39
+ end
40
+
41
+ #puts "parse: #{todo_stack.last.inspect}"
42
+ while !todo_stack.last[:terms].to_a.empty?
43
+ term = todo_stack.last[:terms].shift
44
+ if term.is_a?(String)
45
+ puts "parse term(string): #{term}" if $verbose
46
+ word = buffer[0, term.length]
47
+ if word == term
48
+ onToken(term, word)
49
+ consume(term.length)
50
+ elsif '@' + word.chop == term && @keywords.include?(word.chop)
51
+ onToken(term, word.chop)
52
+ consume(term.length - 1)
53
+ else
54
+ error("Found '#{buffer[0, 10]}...'; #{term} expected")
55
+ end
56
+ elsif regexp = @regexps[term]
57
+ if abbr(term) == 'string' && buffer[0, 3] == '"""'
58
+ # Read until end of multi-line comment if this is the start of a multi-line comment
59
+ until R_MLSTRING.match(buffer)
60
+ begin
61
+ next_line = @input.readline
62
+ @line += next_line
63
+ @lineno += 1
64
+ rescue EOFError => e
65
+ error("EOF reached searching for end of multi-line comment")
66
+ end
67
+ end
68
+ #puts "ml-str now #{buffer.dump}"
69
+ end
70
+ md = regexp.match(buffer)
71
+ error("Token(#{abbr(term)}) '#{buffer[0, 10]}...' should match #{regexp}") unless md
72
+ puts "parse term(#{abbr(term)}:regexp): #{term}, #{regexp}.match('#{buffer[0, 10]}...') => '#{md.inspect}'" if $verbose
73
+ onToken(abbr(term), md.to_s)
74
+ consume(md[0].length)
75
+ else
76
+ puts "parse term(push): #{term}" if $verbose
77
+ todo_stack << {:prod => term, :terms => nil}
78
+ pushed = true
79
+ break
80
+ end
81
+ self.token
82
+ end
83
+
84
+ while !pushed && todo_stack.last[:terms].to_a.empty?
85
+ todo_stack.pop
86
+ self.onFinish
87
+ end
88
+ end
89
+ while !todo_stack.empty?
90
+ todo_stack.pop
91
+ self.onFinish
92
+ end
93
+ end
94
+
95
+ # Memoizer for get_token
96
+ def token
97
+ unless @memo.has_key?(@pos)
98
+ tok = self.get_token
99
+ @memo[@pos] = tok
100
+ puts "token: '#{tok}'('#{buffer[0, 10]}...')" if buffer && $verbose
101
+ end
102
+ @memo[@pos]
103
+ end
104
+
105
+ def get_token
106
+ whitespace
107
+
108
+ return nil if buffer.nil?
109
+
110
+ ch2 = buffer[0, 2]
111
+ return ch2 if %w(=> <= ^^).include?(ch2)
112
+
113
+ ch = buffer[0, 1]
114
+ @keyword_mode = false if ch == '.' && @keyword_mode
115
+
116
+ return ch if SINGLE_CHARACTER_SELECTORS.include?(ch)
117
+ return ":" if ch == ":"
118
+ return "0" if "+-0123456789".include?(ch)
119
+
120
+ if ch == '@'
121
+ return '@' if @pos > 0 && @line[@pos-1, 1] == '"'
122
+
123
+ j = 0
124
+ j += 1 while buffer[j+1, 1] && !NOT_NAME_CHARS.include?(buffer[j+1, 1])
125
+ name = buffer[1, j]
126
+ if name == 'keywords'
127
+ @keywords = []
128
+ @keyword_mode = true
129
+ end
130
+ return '@' + name
131
+ end
132
+
133
+ j = 0
134
+ j += 1 while buffer[j, 1] && !NOT_QNAME_CHARS.include?(buffer[j, 1])
135
+ word = buffer[0, j]
136
+ error("Tokenizer expected qname, found #{buffer[0, 10]}") unless word
137
+ if @keyword_mode
138
+ @keywords << word
139
+ elsif @keywords.include?(word)
140
+ if word == 'keywords'
141
+ @keywords = []
142
+ @keyword_mode = true
143
+ end
144
+ return '@' + word.to_s # implicit keyword
145
+ end
146
+
147
+ 'a'
148
+ end
149
+
150
+ def whitespace
151
+ while buffer && md = R_WHITESPACE.match(buffer)
152
+ return unless md[0].length > 0
153
+ consume(md[0].length)
154
+ #puts "ws: '#{md[0]}', pos=#{@pos}"
155
+ end
156
+ end
157
+
158
+ def readline
159
+ @line = @input.readline
160
+ @lineno += 1
161
+ @line.force_encoding(Encoding::UTF_8) if @line.respond_to?(:force_encoding) # for Ruby 1.9+
162
+ puts "readline[#{@lineno}]: '#{@line}'" if $verbose
163
+ @pos = 0
164
+ @line
165
+ rescue EOFError => e
166
+ @line, @pos = nil, 0
167
+ end
168
+
169
+ # Return data from current off set to end of line
170
+ def buffer
171
+ @line[@pos, @line.length - @pos] unless @line.nil?
172
+ end
173
+
174
+ # Cause n characters of line to be consumed. Read new line while line is empty or until eof
175
+ def consume(n)
176
+ @memo = {}
177
+ @pos += n
178
+ readline while @line && @line.length <= @pos
179
+ #puts "consume[#{n}]: '#{buffer}'" if $verbose
180
+ end
181
+
182
+ def abbr(prodURI)
183
+ prodURI.to_s.split('#').last
184
+ end
185
+
186
+ def onStart(prod)
187
+ $stdout.puts ' ' * @productions.length + prod
188
+ @productions << prod
189
+ end
190
+
191
+ def onFinish
192
+ prod = @productions.pop()
193
+ $stdout.puts ' ' * @productions.length + '/' + prod
194
+ end
195
+
196
+ def onToken(prod, tok)
197
+ $stdout.puts ' ' * @productions.length + "#{prod}(#{tok})"
198
+ end
199
+
200
+ def dump_stack(stack)
201
+ STDERR.puts "\nstack trace:"
202
+ stack.reverse.each do |se|
203
+ STDERR.puts "#{se[:prod]}"
204
+ STDERR.puts " " + case se[:terms]
205
+ when nil then "nil"
206
+ when [] then "empty"
207
+ else se[:terms].join(",\n ")
208
+ end
209
+ end
210
+ end
211
+
212
+ def test(input, branches, regexps)
213
+ # FIXME: for now, read in entire doc, eventually, process as stream
214
+ @input = input.respond_to?(:read) ? (input.rewind; input) : StringIO.new(input.to_s)
215
+ @lineno = 0
216
+ readline # Prime the pump
217
+ $stdout ||= STDOUT
218
+
219
+ @memo = {}
220
+ @keyword_mode = false
221
+ @keywords = %w(a is of this has)
222
+ @productions = []
223
+
224
+ @branches = branches
225
+ @regexps = regexps
226
+ parse(START.to_sym)
227
+ end
228
+ end
229
+ end
@@ -1,3 +1,4 @@
1
1
  module RDF
2
2
  class LOG < Vocabulary("http://www.w3.org/2000/10/swap/log#"); end
3
+ class REI < Vocabulary("http://www.w3.org/2004/06/rei#"); end
3
4
  end
@@ -38,63 +38,94 @@ module RDF::N3
38
38
  # end
39
39
  # end
40
40
  #
41
+ # The writer will add prefix definitions, and use them for creating @prefix definitions, and minting QNames
42
+ #
43
+ # @example Creating @base and @prefix definitions in output
44
+ # RDF::N3::Writer.buffer(:base_uri => "http://example.com/", :prefixes => {
45
+ # nil => "http://example.com/ns#",
46
+ # :foaf => "http://xmlns.com/foaf/0.1/"}
47
+ # ) do |writer|
48
+ # graph.each_statement do |statement|
49
+ # writer << statement
50
+ # end
51
+ # end
52
+ #
41
53
  # @author [Gregg Kellogg](http://kellogg-assoc.com/)
42
54
  class Writer < RDF::Writer
43
55
  format RDF::N3::Format
44
56
 
45
- SUBJECT = 0
46
- VERB = 1
47
- OBJECT = 2
48
-
49
- attr_accessor :graph, :base_uri
50
-
57
+ # @return [Graph] Graph of statements serialized
58
+ attr_accessor :graph
59
+ # @return [URI] Base URI used for relativizing URIs
60
+ attr_accessor :base_uri
61
+
51
62
  ##
52
63
  # Initializes the Turtle writer instance.
53
64
  #
54
- # Opitons:
55
- # max_depth:: Maximum depth for recursively defining resources, defaults to 3
56
- # base_uri:: Base URI of graph, used to shorting URI references
57
- # default_namespace:: URI to use as default namespace
58
- #
59
- # @param [IO, File] output
65
+ # @param [IO, File] output
66
+ # the output stream
60
67
  # @param [Hash{Symbol => Object}] options
61
- # @option options [Integer] :max_depth (nil)
62
- # @option options [String, #to_s] :base_uri (nil)
63
- # @option options [String, #to_s] :lang (nil)
64
- # @option options [Array] :attributes (nil)
65
- # @option options [String] :default_namespace
68
+ # any additional options
69
+ # @option options [Encoding] :encoding (Encoding::UTF_8)
70
+ # the encoding to use on the output stream (Ruby 1.9+)
71
+ # @option options [Boolean] :canonicalize (false)
72
+ # whether to canonicalize literals when serializing
73
+ # @option options [Hash] :prefixes (Hash.new)
74
+ # the prefix mappings to use (not supported by all writers)
75
+ # @option options [#to_s] :base_uri (nil)
76
+ # the base URI to use when constructing relative URIs
77
+ # @option options [Integer] :max_depth (3)
78
+ # Maximum depth for recursively defining resources, defaults to 3
79
+ # @option options [Boolean] :standard_prefixes (false)
80
+ # Add standard prefixes to @prefixes, if necessary.
81
+ # @option options [String] :default_namespace (nil)
82
+ # URI to use as default namespace, same as prefixes[nil]
83
+ # @yield [writer] `self`
84
+ # @yieldparam [RDF::Writer] writer
85
+ # @yieldreturn [void]
66
86
  # @yield [writer]
67
87
  # @yieldparam [RDF::Writer] writer
68
88
  def initialize(output = $stdout, options = {}, &block)
69
- @graph = RDF::Graph.new
70
- @stream = output
71
- super
89
+ super do
90
+ @graph = RDF::Graph.new
91
+ @uri_to_qname = {}
92
+ prefix(nil, @options[:default_namespace]) if @options[:default_namespace]
93
+ if block_given?
94
+ case block.arity
95
+ when 0 then instance_eval(&block)
96
+ else block.call(self)
97
+ end
98
+ end
99
+ end
72
100
  end
73
101
 
74
102
  ##
103
+ # Write whole graph
104
+ #
75
105
  # @param [Graph] graph
76
106
  # @return [void]
77
- def insert_graph(graph)
107
+ def write_graph(graph)
78
108
  @graph = graph
79
109
  end
80
110
 
81
111
  ##
82
- # @param [Statement] statement
112
+ # Addes a statement to be serialized
113
+ # @param [RDF::Statement] statement
83
114
  # @return [void]
84
- def insert_statement(statement)
85
- @graph << statement
115
+ def write_statement(statement)
116
+ @graph.insert(statement)
86
117
  end
87
118
 
88
119
  ##
89
- # Stores the RDF/XML representation of a triple.
90
- #
120
+ # Addes a triple to be serialized
91
121
  # @param [RDF::Resource] subject
92
122
  # @param [RDF::URI] predicate
93
123
  # @param [RDF::Value] object
94
124
  # @return [void]
95
- # @see #write_epilogue
96
- def insert_triple(subject, predicate, object)
97
- @graph << RDF::Statement.new(subject, predicate, object)
125
+ # @raise [NotImplementedError] unless implemented in subclass
126
+ # @abstract
127
+ def write_triple(subject, predicate, object)
128
+ @graph.insert(Statement.new(subject, predicate, object))
98
129
  end
99
130
 
100
131
  ##
@@ -106,69 +137,285 @@ module RDF::N3
106
137
  @max_depth = @options[:max_depth] || 3
107
138
  @base_uri = @options[:base_uri]
108
139
  @debug = @options[:debug]
109
- @default_namespace = @options[:default_namespace]
110
140
 
111
141
  self.reset
112
142
 
113
143
  add_debug "\nserialize: graph: #{@graph.size}"
114
144
 
115
- add_namespace("", @default_namespace) if @default_namespace
116
-
117
145
  preprocess
118
146
  start_document
119
147
 
120
148
  order_subjects.each do |subject|
121
- #puts "subj: #{subject.inspect}"
149
+ #STDERR.puts "subj: #{subject.inspect}"
122
150
  unless is_done?(subject)
123
151
  statement(subject)
124
152
  end
125
153
  end
126
154
  end
127
155
 
156
+ # Return a QName for the URI, or nil. Adds namespace of QName to defined prefixes
157
+ # @param [URI,#to_s] uri
158
+ # @return [Array<Symbol,Symbol>, nil] Prefix, Suffix pair or nil, if none found
159
+ def get_qname(uri)
160
+ uri = RDF::URI.intern(uri.to_s) unless uri.is_a?(URI)
161
+
162
+ unless @uri_to_qname.has_key?(uri)
163
+ # Find in defined prefixes
164
+ prefixes.each_pair do |prefix, vocab|
165
+ if uri.to_s.index(vocab.to_s) == 0
166
+ local_name = uri.to_s[(vocab.to_s.length)..-1]
167
+ add_debug "get_qname(ns): #{prefix}:#{local_name}"
168
+ return @uri_to_qname[uri] = [prefix, local_name.to_sym]
169
+ end
170
+ end
171
+
172
+ # Use a default vocabulary
173
+ if @options[:standard_prefixes] && vocab = RDF::Vocabulary.detect {|v| uri.to_s.index(v.to_uri.to_s) == 0}
174
+ prefix = vocab.__name__.to_s.split('::').last.downcase
175
+ prefixes[prefix.to_sym] = vocab.to_uri
176
+ suffix = uri.to_s[vocab.to_uri.to_s.size..-1]
177
+ return @uri_to_qname[uri] = [prefix.to_sym, suffix.empty? ? nil : suffix.to_sym] if prefix && suffix
178
+ end
179
+
180
+ @uri_to_qname[uri] = nil
181
+ end
182
+
183
+ @uri_to_qname[uri]
184
+ rescue Addressable::URI::InvalidURIError => e
185
+ @uri_to_qname[uri] = nil
186
+ end
187
+
188
+ # Take a hash from predicate uris to lists of values.
189
+ # Sort the lists of values. Return a sorted list of properties.
190
+ # @param [Hash{String => Array<Resource>}] properties A hash of Property to Resource mappings
191
+ # @return [Array<String>}] Ordered list of properties. Uses predicate_order.
192
+ def sort_properties(properties)
193
+ properties.keys.each do |k|
194
+ properties[k] = properties[k].sort do |a, b|
195
+ a_li = a.is_a?(RDF::URI) && get_qname(a) && get_qname(a).last.to_s =~ /^_\d+$/ ? a.to_i : a.to_s
196
+ b_li = b.is_a?(RDF::URI) && get_qname(b) && get_qname(b).last.to_s =~ /^_\d+$/ ? b.to_i : b.to_s
197
+
198
+ a_li <=> b_li
199
+ end
200
+ end
201
+
202
+ # Make sorted list of properties
203
+ prop_list = []
204
+
205
+ predicate_order.each do |prop|
206
+ next unless properties[prop]
207
+ prop_list << prop.to_s
208
+ end
209
+
210
+ properties.keys.sort.each do |prop|
211
+ next if prop_list.include?(prop.to_s)
212
+ prop_list << prop.to_s
213
+ end
214
+
215
+ add_debug "sort_properties: #{prop_list.to_sentence}"
216
+ prop_list
217
+ end
218
+
219
+ ##
220
+ # Returns the N-Triples representation of a literal.
221
+ #
222
+ # @param [RDF::Literal, String, #to_s] literal
223
+ # @param [Hash{Symbol => Object}] options
224
+ # @return [String]
225
+ def format_literal(literal, options = {})
226
+ case literal
227
+ when RDF::Literal
228
+ text = quoted(literal.value)
229
+ text << "@#{literal.language}" if literal.has_language?
230
+ text << "^^#{format_uri(literal.datatype)}" if literal.has_datatype?
231
+ text
232
+ else
233
+ quoted(literal.to_s)
234
+ end
235
+ end
236
+
237
+ ##
238
+ # Returns the Turtle/N3 representation of a URI reference.
239
+ #
240
+ # @param [RDF::URI] literal
241
+ # @param [Hash{Symbol => Object}] options
242
+ # @return [String]
243
+ def format_uri(uri, options = {})
244
+ md = relativize(uri)
245
+ if md && md != uri.to_s
246
+ "<%s>" % md
247
+ elsif qname = get_qname(uri)
248
+ qname.map(&:to_s).join(":")
249
+ else
250
+ "<%s>" % uri_for(uri)
251
+ end
252
+ end
253
+
254
+ ##
255
+ # Returns the Turtle/N3 representation of a blank node.
256
+ #
257
+ # @param [RDF::Node] node
258
+ # @param [Hash{Symbol => Object}] options
259
+ # @return [String]
260
+ def format_node(node, options = {})
261
+ "_:%s" % node.id
262
+ end
263
+
128
264
  protected
265
+ # Output @base and @prefix definitions
129
266
  def start_document
130
267
  @started = true
131
268
 
132
- write("#{indent}@base <#{@base_uri}> .\n") if @base_uri
269
+ @output.write("#{indent}@base <#{@base_uri}> .\n") if @base_uri
270
+
271
+ add_debug("start_document: #{prefixes.inspect}")
272
+ prefixes.keys.sort_by(&:to_s).each do |prefix|
273
+ @output.write("#{indent}@prefix #{prefix}: <#{prefixes[prefix]}> .\n")
274
+ end
275
+ end
276
+
277
+ # If @base_uri is defined, use it to try to make uri relative
278
+ # @param [#to_s] uri
279
+ # @return [String]
280
+ def relativize(uri)
281
+ uri = uri.to_s
282
+ @base_uri ? uri.sub(@base_uri.to_s, "") : uri
283
+ end
284
+
285
+ # Defines rdf:type of subjects to be emitted at the beginning of the graph. Defaults to rdfs:Class
286
+ # @return [Array<URI>]
287
+ def top_classes; [RDF::RDFS.Class]; end
288
+
289
+ # Defines order of predicates to to emit at begninning of a resource description. Defaults to
290
+ # [rdf:type, rdfs:label, dc:title]
291
+ # @return [Array<URI>]
292
+ def predicate_order; [RDF.type, RDF::RDFS.label, RDF::DC.title]; end
293
+
294
+ # Order subjects for output. Override this to output subjects in another order.
295
+ #
296
+ # Uses top_classes
297
+ # @return [Array<Resource>] Ordered list of subjects
298
+ def order_subjects
299
+ seen = {}
300
+ subjects = []
133
301
 
134
- add_debug("start_document: #{@namespaces.inspect}")
135
- @namespaces.keys.sort.each do |prefix|
136
- write("#{indent}@prefix #{prefix}: <#{@namespaces[prefix]}> .\n")
302
+ top_classes.each do |class_uri|
303
+ graph.query(:predicate => RDF.type, :object => class_uri).map {|st| st.subject}.sort.uniq.each do |subject|
304
+ #add_debug "order_subjects: #{subject.inspect}"
305
+ subjects << subject
306
+ seen[subject] = @top_levels[subject] = true
307
+ end
137
308
  end
309
+
310
+ # Sort subjects by resources over bnodes, ref_counts and the subject URI itself
311
+ recursable = @subjects.keys.
312
+ select {|s| !seen.include?(s)}.
313
+ map {|r| [r.is_a?(RDF::Node) ? 1 : 0, ref_count(r), r]}.
314
+ sort
315
+
316
+ subjects += recursable.map{|r| r.last}
317
+ end
318
+
319
+ # Perform any preprocessing of statements required
320
+ def preprocess
321
+ @graph.each {|statement| preprocess_statement(statement)}
138
322
  end
139
323
 
140
- def end_document
141
- write("\n")
324
+ # Perform any statement preprocessing required. This is used to perform reference counts and determine required
325
+ # prefixes.
326
+ # @param [Statement] statement
327
+ def preprocess_statement(statement)
328
+ #add_debug "preprocess: #{statement.inspect}"
329
+ references = ref_count(statement.object) + 1
330
+ @references[statement.object] = references
331
+ @subjects[statement.subject] = true
332
+
333
+ # Pre-fetch qnames, to fill prefixes
334
+ get_qname(statement.subject)
335
+ get_qname(statement.predicate)
336
+ get_qname(statement.object)
337
+
338
+ @references[statement.predicate] = ref_count(statement.predicate) + 1
339
+ end
340
+
341
+ # Return the number of times this node has been referenced in the object position
342
+ # @return [Integer]
343
+ def ref_count(node)
344
+ @references.fetch(node, 0)
345
+ end
346
+
347
+ # Returns indent string multiplied by the depth
348
+ # @param [Integer] modifier Increase depth by specified amount
349
+ # @return [String] A number of spaces, depending on current depth
350
+ def indent(modifier = 0)
351
+ " " * (@depth + modifier)
352
+ end
353
+
354
+ # Reset internal helper instance variables
355
+ def reset
356
+ @depth = 0
357
+ @lists = {}
358
+ @namespaces = {}
359
+ @references = {}
360
+ @serialized = {}
361
+ @subjects = {}
362
+ @top_levels = {}
363
+ @shortNames = {}
364
+ @started = false
365
+ end
366
+
367
+ ##
368
+ # Use single- or multi-line quotes. If literal contains \t, \n, or \r, use a multiline quote,
369
+ # otherwise, use a single-line
370
+ # @param [String] string
371
+ # @return [String]
372
+ def quoted(string)
373
+ if string.to_s.match(/[\t\n\r]/)
374
+ string = string.gsub('\\', '\\\\').gsub('"""', '\\"""')
375
+ %("""#{string}""")
376
+ else
377
+ "\"#{escaped(string)}\""
378
+ end
142
379
  end
380
+
381
+ private
143
382
 
383
+ # Add debug event to debug array, if specified
384
+ #
385
+ # @param [String] message::
386
+ def add_debug(message)
387
+ STDERR.puts message if ::RDF::N3::debug?
388
+ @debug << message if @debug.is_a?(Array)
389
+ end
390
+
144
391
  # Checks if l is a valid RDF list, i.e. no nodes have other properties.
145
392
  def is_valid_list(l)
146
393
  props = @graph.properties(l)
147
- #puts "is_valid_list: #{props.inspect}" if ::RDF::N3::debug?
394
+ #STDERR.puts "is_valid_list: #{props.inspect}" if ::RDF::N3::debug?
148
395
  return false unless props.has_key?(RDF.first.to_s) || l == RDF.nil
149
396
  while l && l != RDF.nil do
150
- #puts "is_valid_list(length): #{props.length}" if ::RDF::N3::debug?
397
+ #STDERR.puts "is_valid_list(length): #{props.length}" if ::RDF::N3::debug?
151
398
  return false unless props.has_key?(RDF.first.to_s) && props.has_key?(RDF.rest.to_s)
152
399
  n = props[RDF.rest.to_s]
153
- #puts "is_valid_list(n): #{n.inspect}" if ::RDF::N3::debug?
400
+ #STDERR.puts "is_valid_list(n): #{n.inspect}" if ::RDF::N3::debug?
154
401
  return false unless n.is_a?(Array) && n.length == 1
155
402
  l = n.first
156
403
  props = @graph.properties(l)
157
404
  end
158
- #puts "is_valid_list: valid" if ::RDF::N3::debug?
405
+ #STDERR.puts "is_valid_list: valid" if ::RDF::N3::debug?
159
406
  true
160
407
  end
161
408
 
162
409
  def do_list(l)
163
- puts "do_list: #{l.inspect}" if ::RDF::N3::debug?
164
- position = SUBJECT
410
+ STDERR.puts "do_list: #{l.inspect}" if ::RDF::N3::debug?
411
+ position = :subject
165
412
  while l do
166
413
  p = @graph.properties(l)
167
414
  item = p.fetch(RDF.first.to_s, []).first
168
415
  if item
169
416
  path(item, position)
170
417
  subject_done(l)
171
- position = OBJECT
418
+ position = :object
172
419
  end
173
420
  l = p.fetch(RDF.rest.to_s, []).first
174
421
  end
@@ -176,13 +423,13 @@ module RDF::N3
176
423
 
177
424
  def p_list(node, position)
178
425
  return false if !is_valid_list(node)
179
- #puts "p_list: #{node.inspect}, #{position}" if ::RDF::N3::debug?
426
+ #STDERR.puts "p_list: #{node.inspect}, #{position}" if ::RDF::N3::debug?
180
427
 
181
- write(position == SUBJECT ? "(" : " (")
428
+ @output.write(position == :subject ? "(" : " (")
182
429
  @depth += 2
183
430
  do_list(node)
184
431
  @depth -= 2
185
- write(')')
432
+ @output.write(')')
186
433
  end
187
434
 
188
435
  def p_squared?(node, position)
@@ -194,57 +441,62 @@ module RDF::N3
194
441
  def p_squared(node, position)
195
442
  return false unless p_squared?(node, position)
196
443
 
197
- #puts "p_squared: #{node.inspect}, #{position}" if ::RDF::N3::debug?
444
+ #STDERR.puts "p_squared: #{node.inspect}, #{position}" if ::RDF::N3::debug?
198
445
  subject_done(node)
199
- write(position == SUBJECT ? '[' : ' [')
446
+ @output.write(position == :subject ? '[' : ' [')
200
447
  @depth += 2
201
448
  predicate_list(node)
202
449
  @depth -= 2
203
- write(']')
450
+ @output.write(']')
204
451
 
205
452
  true
206
453
  end
207
454
 
208
455
  def p_default(node, position)
209
- #puts "p_default: #{node.inspect}, #{position}" if ::RDF::N3::debug?
210
- l = (position == SUBJECT ? "" : " ") + format_value(node)
211
- write(l)
456
+ #STDERR.puts "p_default: #{node.inspect}, #{position}" if ::RDF::N3::debug?
457
+ l = (position == :subject ? "" : " ") + format_value(node)
458
+ @output.write(l)
212
459
  end
213
460
 
214
461
  def path(node, position)
215
- puts "path: #{node.inspect}, pos: #{position}, []: #{is_valid_list(node)}, p2?: #{p_squared?(node, position)}, rc: #{ref_count(node)}" if ::RDF::N3::debug?
462
+ STDERR.puts "path: #{node.inspect}, pos: #{position}, []: #{is_valid_list(node)}, p2?: #{p_squared?(node, position)}, rc: #{ref_count(node)}" if ::RDF::N3::debug?
216
463
  raise RDF::WriterError, "Cannot serialize node '#{node}'" unless p_list(node, position) || p_squared(node, position) || p_default(node, position)
217
464
  end
218
465
 
219
466
  def verb(node)
220
- puts "verb: #{node.inspect}" if ::RDF::N3::debug?
467
+ STDERR.puts "verb: #{node.inspect}" if ::RDF::N3::debug?
221
468
  if node == RDF.type
222
- write(" a")
469
+ @output.write(" a")
223
470
  else
224
- path(node, VERB)
471
+ path(node, :predicate)
225
472
  end
226
473
  end
227
474
 
228
475
  def object_list(objects)
229
- puts "object_list: #{objects.inspect}" if ::RDF::N3::debug?
476
+ STDERR.puts "object_list: #{objects.inspect}" if ::RDF::N3::debug?
230
477
  return if objects.empty?
231
478
 
232
479
  objects.each_with_index do |obj, i|
233
- write(",\n#{indent(2)}") if i > 0
234
- path(obj, OBJECT)
480
+ @output.write(",\n#{indent(2)}") if i > 0
481
+ path(obj, :object)
235
482
  end
236
483
  end
237
484
 
238
485
  def predicate_list(subject)
239
486
  properties = @graph.properties(subject)
240
487
  prop_list = sort_properties(properties) - [RDF.first.to_s, RDF.rest.to_s]
241
- puts "predicate_list: #{prop_list.inspect}" if ::RDF::N3::debug?
488
+ STDERR.puts "predicate_list: #{prop_list.inspect}" if ::RDF::N3::debug?
242
489
  return if prop_list.empty?
243
490
 
244
491
  prop_list.each_with_index do |prop, i|
245
- write(";\n#{indent(2)}") if i > 0
246
- verb(RDF::URI.new(prop))
247
- object_list(properties[prop])
492
+ begin
493
+ @output.write(";\n#{indent(2)}") if i > 0
494
+ prop[0, 2] == "_:"
495
+ verb(prop[0, 2] == "_:" ? RDF::Node.new(prop.split(':').last) : RDF::URI.intern(prop))
496
+ object_list(properties[prop])
497
+ rescue Addressable::URI::InvalidURIError => e
498
+ STDERR.puts "Predicate #{prop.inspect} is an invalid URI: #{e.message}"
499
+ end
248
500
  end
249
501
  end
250
502
 
@@ -255,39 +507,28 @@ module RDF::N3
255
507
  def s_squared(subject)
256
508
  return false unless s_squared?(subject)
257
509
 
258
- write("\n#{indent} [")
510
+ @output.write("\n#{indent} [")
259
511
  @depth += 1
260
512
  predicate_list(subject)
261
513
  @depth -= 1
262
- write("] .")
514
+ @output.write("] .")
263
515
  true
264
516
  end
265
517
 
266
518
  def s_default(subject)
267
- write("\n#{indent}")
268
- path(subject, SUBJECT)
519
+ @output.write("\n#{indent}")
520
+ path(subject, :subject)
269
521
  predicate_list(subject)
270
- write(" .")
522
+ @output.write(" .")
271
523
  true
272
524
  end
273
525
 
274
- def relativize(uri)
275
- uri = uri.to_s
276
- @base_uri ? uri.sub(@base_uri.to_s, "") : uri
277
- end
278
-
279
526
  def statement(subject)
280
- puts "statement: #{subject.inspect}, s2?: #{s_squared(subject)}" if ::RDF::N3::debug?
527
+ STDERR.puts "statement: #{subject.inspect}, s2?: #{s_squared(subject)}" if ::RDF::N3::debug?
281
528
  subject_done(subject)
282
529
  s_squared(subject) || s_default(subject)
283
530
  end
284
531
 
285
- MAX_DEPTH = 10
286
- INDENT_STRING = " "
287
-
288
- def top_classes; [RDF::RDFS.Class]; end
289
- def predicate_order; [RDF.type, RDF::RDFS.label, RDF::DC.title]; end
290
-
291
532
  def is_done?(subject)
292
533
  @serialized.include?(subject)
293
534
  end
@@ -296,187 +537,5 @@ module RDF::N3
296
537
  def subject_done(subject)
297
538
  @serialized[subject] = true
298
539
  end
299
-
300
- def order_subjects
301
- seen = {}
302
- subjects = []
303
-
304
- top_classes.each do |class_uri|
305
- graph.query(:predicate => RDF.type, :object => class_uri).map {|st| st.subject}.sort.uniq.each do |subject|
306
- #add_debug "order_subjects: #{subject.inspect}"
307
- subjects << subject
308
- seen[subject] = @top_levels[subject] = true
309
- end
310
- end
311
-
312
- # Sort subjects by resources over bnodes, ref_counts and the subject URI itself
313
- recursable = @subjects.keys.
314
- select {|s| !seen.include?(s)}.
315
- map {|r| [r.is_a?(RDF::Node) ? 1 : 0, ref_count(r), r]}.
316
- sort
317
-
318
- subjects += recursable.map{|r| r.last}
319
- end
320
-
321
- def preprocess
322
- @graph.each {|statement| preprocess_statement(statement)}
323
- end
324
-
325
- def preprocess_statement(statement)
326
- #add_debug "preprocess: #{statement.inspect}"
327
- references = ref_count(statement.object) + 1
328
- @references[statement.object] = references
329
- @subjects[statement.subject] = true
330
-
331
- # Pre-fetch qnames, to fill namespaces
332
- get_qname(statement.subject)
333
- get_qname(statement.predicate)
334
- get_qname(statement.object)
335
-
336
- @references[statement.predicate] = ref_count(statement.predicate) + 1
337
- end
338
-
339
- # Return the number of times this node has been referenced in the object position
340
- def ref_count(node)
341
- @references.fetch(node, 0)
342
- end
343
-
344
- # Return a QName for the URI, or nil. Adds namespace of QName to defined namespaces
345
- def get_qname(uri)
346
- if uri.is_a?(RDF::URI)
347
- md = relativize(uri)
348
- return "<#{md}>" unless md == uri.to_s
349
-
350
- # Duplicate logic from URI#qname to remember namespace assigned
351
-
352
- if uri.qname
353
- return ":#{uri.qname.last}" if uri.vocab == @default_namespace
354
- add_namespace(uri.qname.first, uri.vocab)
355
- add_debug "get_qname(uri.qname): #{uri.qname.join(':')}"
356
- return uri.qname.join(":")
357
- end
358
-
359
- # No vocabulary assigned, find one from cache of created namespace URIs
360
- @namespaces.each_pair do |prefix, vocab|
361
- if uri.to_s.index(vocab.to_s) == 0
362
- uri.vocab = vocab
363
- local_name = uri.to_s[(vocab.to_s.length)..-1]
364
- if vocab == @default_namespace
365
- add_debug "get_qname(ns): :#{local_name}"
366
- return ":#{local_name}"
367
- else
368
- add_debug "get_qname(ns): #{prefix}:#{local_name}"
369
- return "#{prefix}:#{local_name}"
370
- end
371
- end
372
- end
373
-
374
- nil
375
- end
376
- end
377
-
378
- def add_namespace(prefix, ns)
379
- return if @namespaces.has_key?(prefix.to_s)
380
- uri = (ns.respond_to?(:to_uri) ? ns.to_uri : ns).to_s
381
- add_debug "add_namespace: '#{prefix}', <#{uri}>"
382
- @namespaces[prefix.to_s] = uri
383
- end
384
-
385
- def reset
386
- @depth = 0
387
- @lists = {}
388
- @namespaces = {}
389
- @references = {}
390
- @serialized = {}
391
- @subjects = {}
392
- @top_levels = {}
393
- @shortNames = {}
394
- @started = false
395
- end
396
-
397
- # Take a hash from predicate uris to lists of values.
398
- # Sort the lists of values. Return a sorted list of properties.
399
- def sort_properties(properties)
400
- properties.keys.each do |k|
401
- properties[k] = properties[k].sort do |a, b|
402
- a_li = a.is_a?(RDF::URI) && a.qname && a.qname.last =~ /^_\d+$/ ? a.to_i : a.to_s
403
- b_li = b.is_a?(RDF::URI) && b.qname && b.qname.last =~ /^_\d+$/ ? b.to_i : b.to_s
404
-
405
- a_li <=> b_li
406
- end
407
- end
408
-
409
- # Make sorted list of properties
410
- prop_list = []
411
-
412
- predicate_order.each do |prop|
413
- next unless properties[prop]
414
- prop_list << prop.to_s
415
- end
416
-
417
- properties.keys.sort.each do |prop|
418
- next if prop_list.include?(prop.to_s)
419
- prop_list << prop.to_s
420
- end
421
-
422
- add_debug "sort_properties: #{prop_list.to_sentence}"
423
- prop_list
424
- end
425
-
426
- # Add debug event to debug array, if specified
427
- #
428
- # @param [String] message::
429
- def add_debug(message)
430
- STDERR.puts message if ::RDF::N3::debug?
431
- @debug << message if @debug.is_a?(Array)
432
- end
433
-
434
- # Returns indent string multiplied by the depth
435
- def indent(modifier = 0)
436
- INDENT_STRING * (@depth + modifier)
437
- end
438
-
439
- # Write text
440
- def write(text)
441
- @stream.write(text)
442
- end
443
-
444
- ##
445
- # Returns the N-Triples representation of a literal.
446
- #
447
- # @param [RDF::Literal, String, #to_s] literal
448
- # @param [Hash{Symbol => Object}] options
449
- # @return [String]
450
- def format_literal(literal, options = {})
451
- case literal
452
- when RDF::Literal
453
- text = quoted(literal.value)
454
- text << "@#{literal.language}" if literal.has_language?
455
- text << "^^#{format_uri(literal.datatype)}" if literal.has_datatype?
456
- text
457
- else
458
- quoted(literal.to_s)
459
- end
460
- end
461
-
462
- ##
463
- # Returns the Turtle/N3 representation of a URI reference.
464
- #
465
- # @param [RDF::URI] literal
466
- # @param [Hash{Symbol => Object}] options
467
- # @return [String]
468
- def format_uri(uri, options = {})
469
- get_qname(uri) || "<%s>" % uri_for(uri)
470
- end
471
-
472
- ##
473
- # Returns the Turtle/N3 representation of a blank node.
474
- #
475
- # @param [RDF::Node] node
476
- # @param [Hash{Symbol => Object}] options
477
- # @return [String]
478
- def format_node(node, options = {})
479
- "_:%s" % node.id
480
- end
481
540
  end
482
541
  end