rdf_context 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +15 -0
- data/README.rdoc +84 -8
- data/VERSION +1 -1
- data/bin/rdf_context +2 -6
- data/lib/rdf_context.rb +12 -2
- data/lib/rdf_context/bnode.rb +5 -1
- data/lib/rdf_context/graph.rb +97 -70
- data/lib/rdf_context/literal.rb +8 -3
- data/lib/rdf_context/parser.rb +4 -4
- data/lib/rdf_context/serializer/abstract_serializer.rb +26 -0
- data/lib/rdf_context/serializer/nt_serializer.rb +12 -0
- data/lib/rdf_context/serializer/recursive_serializer.rb +140 -0
- data/lib/rdf_context/serializer/turtle_serializer.rb +219 -0
- data/lib/rdf_context/serializer/xml_serializer.rb +236 -0
- data/lib/rdf_context/store/abstract_store.rb +1 -0
- data/lib/rdf_context/store/memory_store.rb +1 -1
- data/lib/rdf_context/string_hacks.rb +1 -1
- data/lib/rdf_context/uriref.rb +5 -2
- data/spec/bnode_spec.rb +1 -1
- data/spec/graph_spec.rb +77 -10
- data/spec/rdf_helper.rb +4 -1
- data/spec/turtle_serializer_spec.rb +227 -0
- data/spec/xml_serializer_spec.rb +378 -0
- metadata +12 -3
data/lib/rdf_context/literal.rb
CHANGED
@@ -379,6 +379,10 @@ module RdfContext
|
|
379
379
|
end
|
380
380
|
end
|
381
381
|
|
382
|
+
def <=>(other)
|
383
|
+
self.to_s <=> other.to_s
|
384
|
+
end
|
385
|
+
|
382
386
|
def hash
|
383
387
|
[@contents, @encoding, @lang].hash
|
384
388
|
end
|
@@ -425,10 +429,11 @@ module RdfContext
|
|
425
429
|
encoding.xml_args(@contents, @lang)
|
426
430
|
end
|
427
431
|
|
432
|
+
def untyped?; encoding == Encoding.the_null_encoding; end
|
433
|
+
def typed?; encoding != Encoding.the_null_encoding; end
|
434
|
+
|
428
435
|
# Is this an XMLLiteral?
|
429
|
-
def xmlliteral
|
430
|
-
encoding.is_a?(XMLLiteral)
|
431
|
-
end
|
436
|
+
def xmlliteral?; encoding == Encoding.xmlliteral; end
|
432
437
|
|
433
438
|
# Output literal contents as a string
|
434
439
|
def to_s
|
data/lib/rdf_context/parser.rb
CHANGED
@@ -28,7 +28,7 @@ module RdfContext
|
|
28
28
|
# @param [String] uri:: the URI of the document
|
29
29
|
# @param [Hash] options:: Options from
|
30
30
|
# <em>options[:debug]</em>:: Array to place debug messages
|
31
|
-
# <em>options[:type]</em>:: One of _rdfxml_, _html_, or _n3_
|
31
|
+
# <em>options[:type]</em>:: One of _rdfxml_, _html_, or _n3_ (among others)
|
32
32
|
# <em>options[:strict]</em>:: Raise Error if true, continue with lax parsing, otherwise
|
33
33
|
# @return [Graph]:: Returns the graph containing parsed triples
|
34
34
|
# @raise [Error]:: Raises RdfError if _strict_
|
@@ -62,9 +62,9 @@ module RdfContext
|
|
62
62
|
|
63
63
|
# Create a delegate of a specific parser class
|
64
64
|
@delegate ||= case options[:type].to_s
|
65
|
-
when "n3", "ntriples", "turtle" then N3Parser.new(options)
|
66
|
-
when "rdfa", "html", "xhtml"
|
67
|
-
when "xml", "rdf", "rdfxml"
|
65
|
+
when "n3", "ntriples", "turtle", "ttl", "n3", "notation3" then N3Parser.new(options)
|
66
|
+
when "rdfa", "html", "xhtml" then RdfaParser.new(options)
|
67
|
+
when "xml", "rdf", "rdfxml" then RdfXmlParser.new(options)
|
68
68
|
else
|
69
69
|
RdfXmlParser.new(options)
|
70
70
|
# raise ParserException.new("type option must be one of :rdfxml, :html, or :n3")
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), '..', 'uriref')
|
2
|
+
|
3
|
+
module RdfContext
|
4
|
+
# Abstract serializer
|
5
|
+
class AbstractSerializer
|
6
|
+
attr_accessor :graph, :base
|
7
|
+
|
8
|
+
def initialize(graph)
|
9
|
+
@graph = graph
|
10
|
+
@base = nil
|
11
|
+
end
|
12
|
+
|
13
|
+
# Serialize the graph
|
14
|
+
#
|
15
|
+
# @param [IO, StreamIO] stream:: Stream in which to place serialized graph
|
16
|
+
# @param [Hash] options:: Options for parser
|
17
|
+
# <em>options[:base]</em>:: Base URI of graph, used to shorting URI references
|
18
|
+
def serialize(stream, options = {})
|
19
|
+
end
|
20
|
+
|
21
|
+
def relativize(uri)
|
22
|
+
uri = uri.to_s
|
23
|
+
self.base ? uri.sub(/^#{self.base}/, "") : uri
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'abstract_serializer')
|
2
|
+
|
3
|
+
module RdfContext
|
4
|
+
# Serialize RDF graphs in NTriples format
|
5
|
+
class NTSerializer < AbstractSerializer
|
6
|
+
def serialize(stream, base = nil)
|
7
|
+
@graph.triples.collect do |t|
|
8
|
+
stream.write(t.to_ntriples + "\n")
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,140 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'abstract_serializer')
|
2
|
+
require File.join(File.dirname(__FILE__), '..', 'bnode')
|
3
|
+
require File.join(File.dirname(__FILE__), '..', 'literal')
|
4
|
+
|
5
|
+
module RdfContext
|
6
|
+
# Recursive serializer
|
7
|
+
class RecursiveSerializer < AbstractSerializer
|
8
|
+
MAX_DEPTH = 10
|
9
|
+
INDENT_STRING = " "
|
10
|
+
|
11
|
+
def initialize(graph)
|
12
|
+
super(graph)
|
13
|
+
@stream = nil
|
14
|
+
self.reset
|
15
|
+
end
|
16
|
+
|
17
|
+
def top_classes; [RDFS_NS.Class]; end
|
18
|
+
def predicate_order; [RDF_TYPE, RDFS_NS.label, DC_NS.title]; end
|
19
|
+
|
20
|
+
def is_done?(subject)
|
21
|
+
@serialized.include?(subject)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Mark a subject as done.
|
25
|
+
def subject_done(subject)
|
26
|
+
@serialized[subject] = true
|
27
|
+
end
|
28
|
+
|
29
|
+
def order_subjects
|
30
|
+
seen = {}
|
31
|
+
subjects = []
|
32
|
+
|
33
|
+
top_classes.each do |class_uri|
|
34
|
+
graph.triples(Triple.new(nil, RDF_TYPE, class_uri)).map {|t| t.subject}.sort.uniq.each do |subject|
|
35
|
+
#puts "order_subjects: #{subject.inspect}"
|
36
|
+
subjects << subject
|
37
|
+
seen[subject] = @top_levels[subject] = true
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Sort subjects by resources over bnodes, ref_counts and the subject URI itself
|
42
|
+
recursable = @subjects.keys.
|
43
|
+
select {|s| !seen.include?(s)}.
|
44
|
+
map {|r| [r.is_a?(BNode) ? 1 : 0, ref_count(r), r]}.
|
45
|
+
sort
|
46
|
+
|
47
|
+
subjects += recursable.map{|r| r.last}
|
48
|
+
end
|
49
|
+
|
50
|
+
def preprocess
|
51
|
+
@graph.triples.each {|t| preprocess_triple(t)}
|
52
|
+
end
|
53
|
+
|
54
|
+
def preprocess_triple(triple)
|
55
|
+
#puts "preprocess: #{triple.inspect}"
|
56
|
+
references = ref_count(triple.object) + 1
|
57
|
+
@references[triple.object] = references
|
58
|
+
@subjects[triple.subject] = true
|
59
|
+
end
|
60
|
+
|
61
|
+
# Return the number of times this node has been referenced in the object position
|
62
|
+
def ref_count(node)
|
63
|
+
@references.fetch(node, 0)
|
64
|
+
end
|
65
|
+
|
66
|
+
# Return a QName for the URI, or nil. Adds namespace of QName to defined namespaces
|
67
|
+
def get_qname(uri)
|
68
|
+
if uri.is_a?(URIRef)
|
69
|
+
begin
|
70
|
+
qn = @graph.qname(uri)
|
71
|
+
rescue RdfException
|
72
|
+
return false # no namespace
|
73
|
+
end
|
74
|
+
# Local parts with . will mess up serialization
|
75
|
+
return false if qn.index('.')
|
76
|
+
|
77
|
+
add_namespace(uri.namespace)
|
78
|
+
return qn
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def add_namespace(ns)
|
83
|
+
@namespaces[ns.prefix.to_s] = ns
|
84
|
+
end
|
85
|
+
|
86
|
+
# URI -> Namespace bindings (similar to graph) for looking up qnames
|
87
|
+
def uri_binding
|
88
|
+
@uri_binding ||= @namespaces.values.inject({}) {|hash, ns| hash[ns.uri.to_s] = ns; hash}
|
89
|
+
end
|
90
|
+
|
91
|
+
def reset
|
92
|
+
@depth = 0
|
93
|
+
@lists = {}
|
94
|
+
@namespaces = {}
|
95
|
+
@references = {}
|
96
|
+
@serialized = {}
|
97
|
+
@subjects = {}
|
98
|
+
@top_levels = {}
|
99
|
+
end
|
100
|
+
|
101
|
+
# Take a hash from predicate uris to lists of values.
|
102
|
+
# Sort the lists of values. Return a sorted list of properties.
|
103
|
+
def sort_properties(properties)
|
104
|
+
properties.keys.each do |k|
|
105
|
+
properties[k] = properties[k].sort do |a, b|
|
106
|
+
a_li = a.is_a?(URIRef) && a.short_name =~ /^_\d+$/ ? a.to_i : a.to_s
|
107
|
+
b_li = b.is_a?(URIRef) && b.short_name =~ /^_\d+$/ ? b.to_i : b.to_s
|
108
|
+
|
109
|
+
a_li <=> b_li
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# Make sorted list of properties
|
114
|
+
prop_list = []
|
115
|
+
|
116
|
+
predicate_order.each do |prop|
|
117
|
+
next unless properties[prop]
|
118
|
+
prop_list << prop.to_s
|
119
|
+
end
|
120
|
+
|
121
|
+
properties.keys.sort.each do |prop|
|
122
|
+
next if prop_list.include?(prop.to_s)
|
123
|
+
prop_list << prop.to_s
|
124
|
+
end
|
125
|
+
|
126
|
+
puts "sort_properties: #{prop_list.to_sentence}" if $DEBUG
|
127
|
+
prop_list
|
128
|
+
end
|
129
|
+
|
130
|
+
# Returns indent string multiplied by the depth
|
131
|
+
def indent(modifier = 0)
|
132
|
+
INDENT_STRING * (@depth + modifier)
|
133
|
+
end
|
134
|
+
|
135
|
+
# Write text
|
136
|
+
def write(text)
|
137
|
+
@stream.write(text)
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
@@ -0,0 +1,219 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'recursive_serializer')
|
2
|
+
|
3
|
+
module RdfContext
|
4
|
+
# Abstract serializer
|
5
|
+
class TurtleSerializer < RecursiveSerializer
|
6
|
+
SUBJECT = 0
|
7
|
+
VERB = 1
|
8
|
+
OBJECT = 2
|
9
|
+
|
10
|
+
def reset
|
11
|
+
super
|
12
|
+
@shortNames = {}
|
13
|
+
@started = false
|
14
|
+
end
|
15
|
+
|
16
|
+
def get_qname(uri)
|
17
|
+
if uri.is_a?(URIRef)
|
18
|
+
md = uri.to_s.match(/^#{@base}(.*)$/) if @base
|
19
|
+
return "<#{md[1]}>" if md
|
20
|
+
|
21
|
+
super(uri)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def preprocess_triple(triple)
|
26
|
+
super
|
27
|
+
|
28
|
+
# Pre-fetch qnames, to fill namespaces
|
29
|
+
get_qname(triple.subject)
|
30
|
+
get_qname(triple.predicate)
|
31
|
+
get_qname(triple.object)
|
32
|
+
|
33
|
+
@references[triple.predicate] = ref_count(triple.predicate) + 1
|
34
|
+
end
|
35
|
+
|
36
|
+
def label(node)
|
37
|
+
get_qname(node) || node.to_n3
|
38
|
+
end
|
39
|
+
|
40
|
+
def start_document
|
41
|
+
@started = true
|
42
|
+
|
43
|
+
write("#{indent}@base <#{@base}> .\n") if @base
|
44
|
+
|
45
|
+
ns_list = @namespaces.values.sort_by {|ns| ns.prefix}
|
46
|
+
unless ns_list.empty?
|
47
|
+
ns_str = ns_list.map do |ns|
|
48
|
+
"#{indent}@prefix #{ns.prefix}: <#{ns.uri}> ."
|
49
|
+
end.join("\n") + "\n"
|
50
|
+
write(ns_str)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def end_document; end
|
55
|
+
|
56
|
+
# Checks if l is a valid RDF list, i.e. no nodes have other properties.
|
57
|
+
def is_valid_list(l)
|
58
|
+
props = @graph.properties(l)
|
59
|
+
#puts "is_valid_list: #{props.inspect}" if $DEBUG
|
60
|
+
return false unless props.has_key?(RDF_NS.first.to_s) || l == RDF_NS.nil
|
61
|
+
while l && l != RDF_NS.nil do
|
62
|
+
#puts "is_valid_list(length): #{props.length}" if $DEBUG
|
63
|
+
return false unless props.has_key?(RDF_NS.first.to_s) && props.has_key?(RDF_NS.rest.to_s)
|
64
|
+
n = props[RDF_NS.rest.to_s]
|
65
|
+
#puts "is_valid_list(n): #{n.inspect}" if $DEBUG
|
66
|
+
return false unless n.is_a?(Array) && n.length == 1
|
67
|
+
l = n.first
|
68
|
+
props = @graph.properties(l)
|
69
|
+
end
|
70
|
+
#puts "is_valid_list: valid" if $DEBUG
|
71
|
+
true
|
72
|
+
end
|
73
|
+
|
74
|
+
def do_list(l)
|
75
|
+
puts "do_list: #{l.inspect}" if $DEBUG
|
76
|
+
position = SUBJECT
|
77
|
+
while l do
|
78
|
+
p = @graph.properties(l)
|
79
|
+
item = p.fetch(RDF_NS.first.to_s, []).first
|
80
|
+
if item
|
81
|
+
path(item, position)
|
82
|
+
subject_done(l)
|
83
|
+
position = OBJECT
|
84
|
+
end
|
85
|
+
l = p.fetch(RDF_NS.rest.to_s, []).first
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def p_list(node, position)
|
90
|
+
return false if !is_valid_list(node)
|
91
|
+
#puts "p_list: #{node.inspect}, #{position}" if $DEBUG
|
92
|
+
|
93
|
+
write(position == SUBJECT ? "(" : " (")
|
94
|
+
@depth += 2
|
95
|
+
do_list(node)
|
96
|
+
@depth -= 2
|
97
|
+
write(')')
|
98
|
+
end
|
99
|
+
|
100
|
+
def p_squared?(node, position)
|
101
|
+
node.is_a?(BNode) &&
|
102
|
+
!@serialized.has_key?(node) &&
|
103
|
+
ref_count(node) <= 1
|
104
|
+
end
|
105
|
+
|
106
|
+
def p_squared(node, position)
|
107
|
+
return false unless p_squared?(node, position)
|
108
|
+
|
109
|
+
#puts "p_squared: #{node.inspect}, #{position}" if $DEBUG
|
110
|
+
subject_done(node)
|
111
|
+
write(position == SUBJECT ? '[' : ' [')
|
112
|
+
@depth += 2
|
113
|
+
predicate_list(node)
|
114
|
+
@depth -= 2
|
115
|
+
write(']')
|
116
|
+
|
117
|
+
true
|
118
|
+
end
|
119
|
+
|
120
|
+
def p_default(node, position)
|
121
|
+
#puts "p_default: #{node.inspect}, #{position}" if $DEBUG
|
122
|
+
l = (position == SUBJECT ? "" : " ") + label(node)
|
123
|
+
write(l)
|
124
|
+
end
|
125
|
+
|
126
|
+
def path(node, position)
|
127
|
+
puts "path: #{node.inspect}, pos: #{position}, []: #{is_valid_list(node)}, p2?: #{p_squared?(node, position)}, rc: #{ref_count(node)}" if $DEBUG
|
128
|
+
raise RdfException, "Cannot serialize node '#{node}'" unless p_list(node, position) || p_squared(node, position) || p_default(node, position)
|
129
|
+
end
|
130
|
+
|
131
|
+
def verb(node)
|
132
|
+
puts "verb: #{node.inspect}" if $DEBUG
|
133
|
+
if node == RDF_TYPE
|
134
|
+
write(" a")
|
135
|
+
else
|
136
|
+
path(node, VERB)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def object_list(objects)
|
141
|
+
puts "object_list: #{objects.inspect}" if $DEBUG
|
142
|
+
return if objects.empty?
|
143
|
+
|
144
|
+
objects.each_with_index do |obj, i|
|
145
|
+
write(",\n#{indent(2)}") if i > 0
|
146
|
+
path(obj, OBJECT)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
def predicate_list(subject)
|
151
|
+
properties = @graph.properties(subject)
|
152
|
+
prop_list = sort_properties(properties) - [RDF_NS.first.to_s, RDF_NS.rest.to_s]
|
153
|
+
puts "predicate_list: #{prop_list.inspect}" if $DEBUG
|
154
|
+
return if prop_list.empty?
|
155
|
+
|
156
|
+
prop_list.each_with_index do |prop, i|
|
157
|
+
write(";\n#{indent(2)}") if i > 0
|
158
|
+
verb(URIRef.new(prop))
|
159
|
+
object_list(properties[prop])
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def s_squared?(subject)
|
164
|
+
ref_count(subject) == 0 && subject.is_a?(BNode) && !is_valid_list(subject)
|
165
|
+
end
|
166
|
+
|
167
|
+
def s_squared(subject)
|
168
|
+
return false unless s_squared?(subject)
|
169
|
+
|
170
|
+
write("\n#{indent} [")
|
171
|
+
@depth += 1
|
172
|
+
predicate_list(subject)
|
173
|
+
@depth -= 1
|
174
|
+
write("] .")
|
175
|
+
true
|
176
|
+
end
|
177
|
+
|
178
|
+
def s_default(subject)
|
179
|
+
write("\n#{indent}")
|
180
|
+
path(subject, SUBJECT)
|
181
|
+
predicate_list(subject)
|
182
|
+
write(" .")
|
183
|
+
true
|
184
|
+
end
|
185
|
+
|
186
|
+
def statement(subject)
|
187
|
+
puts "statement: #{subject.inspect}, s2?: #{s_squared(subject)}" if $DEBUG
|
188
|
+
subject_done(subject)
|
189
|
+
s_squared(subject) || s_default(subject)
|
190
|
+
end
|
191
|
+
|
192
|
+
# Serialize the graph
|
193
|
+
#
|
194
|
+
# @param [IO, StreamIO] stream:: Stream in which to place serialized graph
|
195
|
+
# @param [Hash] options:: Options for parser
|
196
|
+
# <em>options[:base]</em>:: Base URI of graph, used to shorting URI references
|
197
|
+
def serialize(stream, options = {})
|
198
|
+
puts "\nserialize: #{@graph.inspect}" if $DEBUG
|
199
|
+
reset
|
200
|
+
@stream = stream
|
201
|
+
@base = options[:base]
|
202
|
+
|
203
|
+
@graph.bind(RDF_NS)
|
204
|
+
@graph.bind(RDFS_NS)
|
205
|
+
|
206
|
+
preprocess
|
207
|
+
start_document
|
208
|
+
|
209
|
+
order_subjects.each do |subject|
|
210
|
+
#puts "subj: #{subject.inspect}"
|
211
|
+
unless is_done?(subject)
|
212
|
+
statement(subject)
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
end_document
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|