rdf_context 0.5.0 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +15 -0
- data/README.rdoc +84 -8
- data/VERSION +1 -1
- data/bin/rdf_context +2 -6
- data/lib/rdf_context.rb +12 -2
- data/lib/rdf_context/bnode.rb +5 -1
- data/lib/rdf_context/graph.rb +97 -70
- data/lib/rdf_context/literal.rb +8 -3
- data/lib/rdf_context/parser.rb +4 -4
- data/lib/rdf_context/serializer/abstract_serializer.rb +26 -0
- data/lib/rdf_context/serializer/nt_serializer.rb +12 -0
- data/lib/rdf_context/serializer/recursive_serializer.rb +140 -0
- data/lib/rdf_context/serializer/turtle_serializer.rb +219 -0
- data/lib/rdf_context/serializer/xml_serializer.rb +236 -0
- data/lib/rdf_context/store/abstract_store.rb +1 -0
- data/lib/rdf_context/store/memory_store.rb +1 -1
- data/lib/rdf_context/string_hacks.rb +1 -1
- data/lib/rdf_context/uriref.rb +5 -2
- data/spec/bnode_spec.rb +1 -1
- data/spec/graph_spec.rb +77 -10
- data/spec/rdf_helper.rb +4 -1
- data/spec/turtle_serializer_spec.rb +227 -0
- data/spec/xml_serializer_spec.rb +378 -0
- metadata +12 -3
data/lib/rdf_context/literal.rb
CHANGED
@@ -379,6 +379,10 @@ module RdfContext
|
|
379
379
|
end
|
380
380
|
end
|
381
381
|
|
382
|
+
def <=>(other)
|
383
|
+
self.to_s <=> other.to_s
|
384
|
+
end
|
385
|
+
|
382
386
|
def hash
|
383
387
|
[@contents, @encoding, @lang].hash
|
384
388
|
end
|
@@ -425,10 +429,11 @@ module RdfContext
|
|
425
429
|
encoding.xml_args(@contents, @lang)
|
426
430
|
end
|
427
431
|
|
432
|
+
def untyped?; encoding == Encoding.the_null_encoding; end
|
433
|
+
def typed?; encoding != Encoding.the_null_encoding; end
|
434
|
+
|
428
435
|
# Is this an XMLLiteral?
|
429
|
-
def xmlliteral
|
430
|
-
encoding.is_a?(XMLLiteral)
|
431
|
-
end
|
436
|
+
def xmlliteral?; encoding == Encoding.xmlliteral; end
|
432
437
|
|
433
438
|
# Output literal contents as a string
|
434
439
|
def to_s
|
data/lib/rdf_context/parser.rb
CHANGED
@@ -28,7 +28,7 @@ module RdfContext
|
|
28
28
|
# @param [String] uri:: the URI of the document
|
29
29
|
# @param [Hash] options:: Options from
|
30
30
|
# <em>options[:debug]</em>:: Array to place debug messages
|
31
|
-
# <em>options[:type]</em>:: One of _rdfxml_, _html_, or _n3_
|
31
|
+
# <em>options[:type]</em>:: One of _rdfxml_, _html_, or _n3_ (among others)
|
32
32
|
# <em>options[:strict]</em>:: Raise Error if true, continue with lax parsing, otherwise
|
33
33
|
# @return [Graph]:: Returns the graph containing parsed triples
|
34
34
|
# @raise [Error]:: Raises RdfError if _strict_
|
@@ -62,9 +62,9 @@ module RdfContext
|
|
62
62
|
|
63
63
|
# Create a delegate of a specific parser class
|
64
64
|
@delegate ||= case options[:type].to_s
|
65
|
-
when "n3", "ntriples", "turtle" then N3Parser.new(options)
|
66
|
-
when "rdfa", "html", "xhtml"
|
67
|
-
when "xml", "rdf", "rdfxml"
|
65
|
+
when "n3", "ntriples", "turtle", "ttl", "n3", "notation3" then N3Parser.new(options)
|
66
|
+
when "rdfa", "html", "xhtml" then RdfaParser.new(options)
|
67
|
+
when "xml", "rdf", "rdfxml" then RdfXmlParser.new(options)
|
68
68
|
else
|
69
69
|
RdfXmlParser.new(options)
|
70
70
|
# raise ParserException.new("type option must be one of :rdfxml, :html, or :n3")
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), '..', 'uriref')
|
2
|
+
|
3
|
+
module RdfContext
|
4
|
+
# Abstract serializer
|
5
|
+
class AbstractSerializer
|
6
|
+
attr_accessor :graph, :base
|
7
|
+
|
8
|
+
def initialize(graph)
|
9
|
+
@graph = graph
|
10
|
+
@base = nil
|
11
|
+
end
|
12
|
+
|
13
|
+
# Serialize the graph
|
14
|
+
#
|
15
|
+
# @param [IO, StreamIO] stream:: Stream in which to place serialized graph
|
16
|
+
# @param [Hash] options:: Options for parser
|
17
|
+
# <em>options[:base]</em>:: Base URI of graph, used to shorting URI references
|
18
|
+
def serialize(stream, options = {})
|
19
|
+
end
|
20
|
+
|
21
|
+
def relativize(uri)
|
22
|
+
uri = uri.to_s
|
23
|
+
self.base ? uri.sub(/^#{self.base}/, "") : uri
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'abstract_serializer')
|
2
|
+
|
3
|
+
module RdfContext
|
4
|
+
# Serialize RDF graphs in NTriples format
|
5
|
+
class NTSerializer < AbstractSerializer
|
6
|
+
def serialize(stream, base = nil)
|
7
|
+
@graph.triples.collect do |t|
|
8
|
+
stream.write(t.to_ntriples + "\n")
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,140 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'abstract_serializer')
|
2
|
+
require File.join(File.dirname(__FILE__), '..', 'bnode')
|
3
|
+
require File.join(File.dirname(__FILE__), '..', 'literal')
|
4
|
+
|
5
|
+
module RdfContext
|
6
|
+
# Recursive serializer
|
7
|
+
class RecursiveSerializer < AbstractSerializer
|
8
|
+
MAX_DEPTH = 10
|
9
|
+
INDENT_STRING = " "
|
10
|
+
|
11
|
+
def initialize(graph)
|
12
|
+
super(graph)
|
13
|
+
@stream = nil
|
14
|
+
self.reset
|
15
|
+
end
|
16
|
+
|
17
|
+
def top_classes; [RDFS_NS.Class]; end
|
18
|
+
def predicate_order; [RDF_TYPE, RDFS_NS.label, DC_NS.title]; end
|
19
|
+
|
20
|
+
def is_done?(subject)
|
21
|
+
@serialized.include?(subject)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Mark a subject as done.
|
25
|
+
def subject_done(subject)
|
26
|
+
@serialized[subject] = true
|
27
|
+
end
|
28
|
+
|
29
|
+
def order_subjects
|
30
|
+
seen = {}
|
31
|
+
subjects = []
|
32
|
+
|
33
|
+
top_classes.each do |class_uri|
|
34
|
+
graph.triples(Triple.new(nil, RDF_TYPE, class_uri)).map {|t| t.subject}.sort.uniq.each do |subject|
|
35
|
+
#puts "order_subjects: #{subject.inspect}"
|
36
|
+
subjects << subject
|
37
|
+
seen[subject] = @top_levels[subject] = true
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Sort subjects by resources over bnodes, ref_counts and the subject URI itself
|
42
|
+
recursable = @subjects.keys.
|
43
|
+
select {|s| !seen.include?(s)}.
|
44
|
+
map {|r| [r.is_a?(BNode) ? 1 : 0, ref_count(r), r]}.
|
45
|
+
sort
|
46
|
+
|
47
|
+
subjects += recursable.map{|r| r.last}
|
48
|
+
end
|
49
|
+
|
50
|
+
def preprocess
|
51
|
+
@graph.triples.each {|t| preprocess_triple(t)}
|
52
|
+
end
|
53
|
+
|
54
|
+
def preprocess_triple(triple)
|
55
|
+
#puts "preprocess: #{triple.inspect}"
|
56
|
+
references = ref_count(triple.object) + 1
|
57
|
+
@references[triple.object] = references
|
58
|
+
@subjects[triple.subject] = true
|
59
|
+
end
|
60
|
+
|
61
|
+
# Return the number of times this node has been referenced in the object position
|
62
|
+
def ref_count(node)
|
63
|
+
@references.fetch(node, 0)
|
64
|
+
end
|
65
|
+
|
66
|
+
# Return a QName for the URI, or nil. Adds namespace of QName to defined namespaces
|
67
|
+
def get_qname(uri)
|
68
|
+
if uri.is_a?(URIRef)
|
69
|
+
begin
|
70
|
+
qn = @graph.qname(uri)
|
71
|
+
rescue RdfException
|
72
|
+
return false # no namespace
|
73
|
+
end
|
74
|
+
# Local parts with . will mess up serialization
|
75
|
+
return false if qn.index('.')
|
76
|
+
|
77
|
+
add_namespace(uri.namespace)
|
78
|
+
return qn
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def add_namespace(ns)
|
83
|
+
@namespaces[ns.prefix.to_s] = ns
|
84
|
+
end
|
85
|
+
|
86
|
+
# URI -> Namespace bindings (similar to graph) for looking up qnames
|
87
|
+
def uri_binding
|
88
|
+
@uri_binding ||= @namespaces.values.inject({}) {|hash, ns| hash[ns.uri.to_s] = ns; hash}
|
89
|
+
end
|
90
|
+
|
91
|
+
def reset
|
92
|
+
@depth = 0
|
93
|
+
@lists = {}
|
94
|
+
@namespaces = {}
|
95
|
+
@references = {}
|
96
|
+
@serialized = {}
|
97
|
+
@subjects = {}
|
98
|
+
@top_levels = {}
|
99
|
+
end
|
100
|
+
|
101
|
+
# Take a hash from predicate uris to lists of values.
|
102
|
+
# Sort the lists of values. Return a sorted list of properties.
|
103
|
+
def sort_properties(properties)
|
104
|
+
properties.keys.each do |k|
|
105
|
+
properties[k] = properties[k].sort do |a, b|
|
106
|
+
a_li = a.is_a?(URIRef) && a.short_name =~ /^_\d+$/ ? a.to_i : a.to_s
|
107
|
+
b_li = b.is_a?(URIRef) && b.short_name =~ /^_\d+$/ ? b.to_i : b.to_s
|
108
|
+
|
109
|
+
a_li <=> b_li
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# Make sorted list of properties
|
114
|
+
prop_list = []
|
115
|
+
|
116
|
+
predicate_order.each do |prop|
|
117
|
+
next unless properties[prop]
|
118
|
+
prop_list << prop.to_s
|
119
|
+
end
|
120
|
+
|
121
|
+
properties.keys.sort.each do |prop|
|
122
|
+
next if prop_list.include?(prop.to_s)
|
123
|
+
prop_list << prop.to_s
|
124
|
+
end
|
125
|
+
|
126
|
+
puts "sort_properties: #{prop_list.to_sentence}" if $DEBUG
|
127
|
+
prop_list
|
128
|
+
end
|
129
|
+
|
130
|
+
# Returns indent string multiplied by the depth
|
131
|
+
def indent(modifier = 0)
|
132
|
+
INDENT_STRING * (@depth + modifier)
|
133
|
+
end
|
134
|
+
|
135
|
+
# Write text
|
136
|
+
def write(text)
|
137
|
+
@stream.write(text)
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
@@ -0,0 +1,219 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'recursive_serializer')
|
2
|
+
|
3
|
+
module RdfContext
|
4
|
+
# Abstract serializer
|
5
|
+
class TurtleSerializer < RecursiveSerializer
|
6
|
+
SUBJECT = 0
|
7
|
+
VERB = 1
|
8
|
+
OBJECT = 2
|
9
|
+
|
10
|
+
def reset
|
11
|
+
super
|
12
|
+
@shortNames = {}
|
13
|
+
@started = false
|
14
|
+
end
|
15
|
+
|
16
|
+
def get_qname(uri)
|
17
|
+
if uri.is_a?(URIRef)
|
18
|
+
md = uri.to_s.match(/^#{@base}(.*)$/) if @base
|
19
|
+
return "<#{md[1]}>" if md
|
20
|
+
|
21
|
+
super(uri)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def preprocess_triple(triple)
|
26
|
+
super
|
27
|
+
|
28
|
+
# Pre-fetch qnames, to fill namespaces
|
29
|
+
get_qname(triple.subject)
|
30
|
+
get_qname(triple.predicate)
|
31
|
+
get_qname(triple.object)
|
32
|
+
|
33
|
+
@references[triple.predicate] = ref_count(triple.predicate) + 1
|
34
|
+
end
|
35
|
+
|
36
|
+
def label(node)
|
37
|
+
get_qname(node) || node.to_n3
|
38
|
+
end
|
39
|
+
|
40
|
+
def start_document
|
41
|
+
@started = true
|
42
|
+
|
43
|
+
write("#{indent}@base <#{@base}> .\n") if @base
|
44
|
+
|
45
|
+
ns_list = @namespaces.values.sort_by {|ns| ns.prefix}
|
46
|
+
unless ns_list.empty?
|
47
|
+
ns_str = ns_list.map do |ns|
|
48
|
+
"#{indent}@prefix #{ns.prefix}: <#{ns.uri}> ."
|
49
|
+
end.join("\n") + "\n"
|
50
|
+
write(ns_str)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def end_document; end
|
55
|
+
|
56
|
+
# Checks if l is a valid RDF list, i.e. no nodes have other properties.
|
57
|
+
def is_valid_list(l)
|
58
|
+
props = @graph.properties(l)
|
59
|
+
#puts "is_valid_list: #{props.inspect}" if $DEBUG
|
60
|
+
return false unless props.has_key?(RDF_NS.first.to_s) || l == RDF_NS.nil
|
61
|
+
while l && l != RDF_NS.nil do
|
62
|
+
#puts "is_valid_list(length): #{props.length}" if $DEBUG
|
63
|
+
return false unless props.has_key?(RDF_NS.first.to_s) && props.has_key?(RDF_NS.rest.to_s)
|
64
|
+
n = props[RDF_NS.rest.to_s]
|
65
|
+
#puts "is_valid_list(n): #{n.inspect}" if $DEBUG
|
66
|
+
return false unless n.is_a?(Array) && n.length == 1
|
67
|
+
l = n.first
|
68
|
+
props = @graph.properties(l)
|
69
|
+
end
|
70
|
+
#puts "is_valid_list: valid" if $DEBUG
|
71
|
+
true
|
72
|
+
end
|
73
|
+
|
74
|
+
def do_list(l)
|
75
|
+
puts "do_list: #{l.inspect}" if $DEBUG
|
76
|
+
position = SUBJECT
|
77
|
+
while l do
|
78
|
+
p = @graph.properties(l)
|
79
|
+
item = p.fetch(RDF_NS.first.to_s, []).first
|
80
|
+
if item
|
81
|
+
path(item, position)
|
82
|
+
subject_done(l)
|
83
|
+
position = OBJECT
|
84
|
+
end
|
85
|
+
l = p.fetch(RDF_NS.rest.to_s, []).first
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def p_list(node, position)
|
90
|
+
return false if !is_valid_list(node)
|
91
|
+
#puts "p_list: #{node.inspect}, #{position}" if $DEBUG
|
92
|
+
|
93
|
+
write(position == SUBJECT ? "(" : " (")
|
94
|
+
@depth += 2
|
95
|
+
do_list(node)
|
96
|
+
@depth -= 2
|
97
|
+
write(')')
|
98
|
+
end
|
99
|
+
|
100
|
+
def p_squared?(node, position)
|
101
|
+
node.is_a?(BNode) &&
|
102
|
+
!@serialized.has_key?(node) &&
|
103
|
+
ref_count(node) <= 1
|
104
|
+
end
|
105
|
+
|
106
|
+
def p_squared(node, position)
|
107
|
+
return false unless p_squared?(node, position)
|
108
|
+
|
109
|
+
#puts "p_squared: #{node.inspect}, #{position}" if $DEBUG
|
110
|
+
subject_done(node)
|
111
|
+
write(position == SUBJECT ? '[' : ' [')
|
112
|
+
@depth += 2
|
113
|
+
predicate_list(node)
|
114
|
+
@depth -= 2
|
115
|
+
write(']')
|
116
|
+
|
117
|
+
true
|
118
|
+
end
|
119
|
+
|
120
|
+
def p_default(node, position)
|
121
|
+
#puts "p_default: #{node.inspect}, #{position}" if $DEBUG
|
122
|
+
l = (position == SUBJECT ? "" : " ") + label(node)
|
123
|
+
write(l)
|
124
|
+
end
|
125
|
+
|
126
|
+
def path(node, position)
|
127
|
+
puts "path: #{node.inspect}, pos: #{position}, []: #{is_valid_list(node)}, p2?: #{p_squared?(node, position)}, rc: #{ref_count(node)}" if $DEBUG
|
128
|
+
raise RdfException, "Cannot serialize node '#{node}'" unless p_list(node, position) || p_squared(node, position) || p_default(node, position)
|
129
|
+
end
|
130
|
+
|
131
|
+
def verb(node)
|
132
|
+
puts "verb: #{node.inspect}" if $DEBUG
|
133
|
+
if node == RDF_TYPE
|
134
|
+
write(" a")
|
135
|
+
else
|
136
|
+
path(node, VERB)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def object_list(objects)
|
141
|
+
puts "object_list: #{objects.inspect}" if $DEBUG
|
142
|
+
return if objects.empty?
|
143
|
+
|
144
|
+
objects.each_with_index do |obj, i|
|
145
|
+
write(",\n#{indent(2)}") if i > 0
|
146
|
+
path(obj, OBJECT)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
def predicate_list(subject)
|
151
|
+
properties = @graph.properties(subject)
|
152
|
+
prop_list = sort_properties(properties) - [RDF_NS.first.to_s, RDF_NS.rest.to_s]
|
153
|
+
puts "predicate_list: #{prop_list.inspect}" if $DEBUG
|
154
|
+
return if prop_list.empty?
|
155
|
+
|
156
|
+
prop_list.each_with_index do |prop, i|
|
157
|
+
write(";\n#{indent(2)}") if i > 0
|
158
|
+
verb(URIRef.new(prop))
|
159
|
+
object_list(properties[prop])
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def s_squared?(subject)
|
164
|
+
ref_count(subject) == 0 && subject.is_a?(BNode) && !is_valid_list(subject)
|
165
|
+
end
|
166
|
+
|
167
|
+
def s_squared(subject)
|
168
|
+
return false unless s_squared?(subject)
|
169
|
+
|
170
|
+
write("\n#{indent} [")
|
171
|
+
@depth += 1
|
172
|
+
predicate_list(subject)
|
173
|
+
@depth -= 1
|
174
|
+
write("] .")
|
175
|
+
true
|
176
|
+
end
|
177
|
+
|
178
|
+
def s_default(subject)
|
179
|
+
write("\n#{indent}")
|
180
|
+
path(subject, SUBJECT)
|
181
|
+
predicate_list(subject)
|
182
|
+
write(" .")
|
183
|
+
true
|
184
|
+
end
|
185
|
+
|
186
|
+
def statement(subject)
|
187
|
+
puts "statement: #{subject.inspect}, s2?: #{s_squared(subject)}" if $DEBUG
|
188
|
+
subject_done(subject)
|
189
|
+
s_squared(subject) || s_default(subject)
|
190
|
+
end
|
191
|
+
|
192
|
+
# Serialize the graph
|
193
|
+
#
|
194
|
+
# @param [IO, StreamIO] stream:: Stream in which to place serialized graph
|
195
|
+
# @param [Hash] options:: Options for parser
|
196
|
+
# <em>options[:base]</em>:: Base URI of graph, used to shorting URI references
|
197
|
+
def serialize(stream, options = {})
|
198
|
+
puts "\nserialize: #{@graph.inspect}" if $DEBUG
|
199
|
+
reset
|
200
|
+
@stream = stream
|
201
|
+
@base = options[:base]
|
202
|
+
|
203
|
+
@graph.bind(RDF_NS)
|
204
|
+
@graph.bind(RDFS_NS)
|
205
|
+
|
206
|
+
preprocess
|
207
|
+
start_document
|
208
|
+
|
209
|
+
order_subjects.each do |subject|
|
210
|
+
#puts "subj: #{subject.inspect}"
|
211
|
+
unless is_done?(subject)
|
212
|
+
statement(subject)
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
end_document
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|