activerdf_rdflite 1.0 → 1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -1
- data/lib/activerdf_rdflite/fetching.rb +43 -0
- data/lib/activerdf_rdflite/init.rb +2 -0
- data/lib/activerdf_rdflite/rdflite.rb +182 -114
- data/lib/activerdf_rdflite/suggesting.rb +73 -0
- data/test/test_data.nt +3 -0
- data/test/test_rdflite.rb +52 -5
- metadata +4 -2
data/Rakefile
CHANGED
@@ -14,7 +14,7 @@ desc "test and package gem"
|
|
14
14
|
task :default => [:test, :package]
|
15
15
|
|
16
16
|
# get ActiveRdfVersion from commandline
|
17
|
-
ActiveRdfVersion = ENV['REL'] || '0
|
17
|
+
ActiveRdfVersion = ENV['REL'] || '1.0'
|
18
18
|
NAME="activerdf_rdflite"
|
19
19
|
GEMNAME="#{NAME}-#{ActiveRdfVersion}.gem"
|
20
20
|
|
@@ -0,0 +1,43 @@
|
|
1
|
+
#class String
|
2
|
+
# alias _match match
|
3
|
+
# def match(*args)
|
4
|
+
# m = _match(args.first)
|
5
|
+
# if m && m.length > 1
|
6
|
+
# args[1..-1].each_with_index do |name, index|
|
7
|
+
# m.instance_eval "def #{name}; self[#{index+1}] end"
|
8
|
+
# end
|
9
|
+
# end
|
10
|
+
# m
|
11
|
+
# end
|
12
|
+
#end
|
13
|
+
|
14
|
+
class FetchingAdapter < RDFLite
|
15
|
+
ConnectionPool.register_adapter(:fetching,self)
|
16
|
+
|
17
|
+
# fetches RDF/XML data from given url and adds it to the datastore, using the
|
18
|
+
# source url as context identifier.
|
19
|
+
def fetch url
|
20
|
+
return unless url.match(/http:\/\/(.*)/)
|
21
|
+
|
22
|
+
# remove fragment identifier from url
|
23
|
+
hashidx = url.rindex('#')
|
24
|
+
url = url[0..url.rindex('#')-1] unless hashidx.nil?
|
25
|
+
|
26
|
+
$activerdflog.debug "fetching from #{url}"
|
27
|
+
|
28
|
+
#model = Redland::Model.new
|
29
|
+
#parser = Redland::Parser.new('rdfxml')
|
30
|
+
#scan = Redland::Uri.new('http://feature.librdf.org/raptor-scanForRDF')
|
31
|
+
#enable = Redland::Literal.new('1')
|
32
|
+
#Redland::librdf_parser_set_feature(parser, scan.uri, enable.node)
|
33
|
+
#parser.parse_into_model(model, url)
|
34
|
+
#triples = Redland::Serializer.ntriples.model_to_string(nil, model)
|
35
|
+
|
36
|
+
triples = `rapper --scan "#{url}"`
|
37
|
+
lines = triples.split($/)
|
38
|
+
$activerdflog.debug "found #{lines.size} triples"
|
39
|
+
|
40
|
+
context = RDFS::Resource.new(url)
|
41
|
+
add_ntriples(triples, context)
|
42
|
+
end
|
43
|
+
end
|
@@ -1,7 +1,3 @@
|
|
1
|
-
# RDFLite is a lightweight RDF database on top of sqlite3. It can act as adapter
|
2
|
-
# in ActiveRDF. It supports on-disk and in-memory usage, and allows keyword
|
3
|
-
# search if ferret is installed.
|
4
|
-
#
|
5
1
|
# Author:: Eyal Oren
|
6
2
|
# Copyright:: (c) 2005-2006 Eyal Oren
|
7
3
|
# License:: LGPL
|
@@ -10,20 +6,23 @@ require 'sqlite3'
|
|
10
6
|
require 'active_rdf'
|
11
7
|
require 'federation/connection_pool'
|
12
8
|
|
13
|
-
$
|
9
|
+
$activerdflog.info "loading RDFLite adapter"
|
14
10
|
|
15
11
|
begin
|
16
12
|
require 'ferret'
|
17
13
|
@@have_ferret = true
|
18
14
|
rescue LoadError
|
19
|
-
$
|
15
|
+
$activerdflog.info "Keyword search is disabled since we could not load Ferret. To
|
20
16
|
enable, please do \"gem install ferret\""
|
21
17
|
@@have_ferret = false
|
22
18
|
end
|
23
19
|
|
20
|
+
# RDFLite is a lightweight RDF database on top of sqlite3. It can act as adapter
|
21
|
+
# in ActiveRDF. It supports on-disk and in-memory usage, and allows keyword
|
22
|
+
# search if ferret is installed.
|
24
23
|
class RDFLite < ActiveRdfAdapter
|
25
24
|
ConnectionPool.register_adapter(:rdflite,self)
|
26
|
-
bool_accessor :keyword_search
|
25
|
+
bool_accessor :keyword_search, :reasoning
|
27
26
|
|
28
27
|
# instantiates RDFLite database
|
29
28
|
# available parameters:
|
@@ -31,7 +30,7 @@ class RDFLite < ActiveRdfAdapter
|
|
31
30
|
# * :keyword => true/false (defaults to true)
|
32
31
|
# * :pidx, :oidx, etc. => true/false (enable/disable these indices)
|
33
32
|
def initialize(params = {})
|
34
|
-
$
|
33
|
+
$activerdflog.info "initialised rdflite with params #{params.to_s}"
|
35
34
|
|
36
35
|
@reads = true
|
37
36
|
@writes = true
|
@@ -40,22 +39,17 @@ class RDFLite < ActiveRdfAdapter
|
|
40
39
|
file = params[:location] || ':memory:'
|
41
40
|
@db = SQLite3::Database.new(file)
|
42
41
|
|
43
|
-
#
|
44
|
-
@keyword_search =
|
45
|
-
true
|
46
|
-
else
|
47
|
-
params[:keyword]
|
48
|
-
end
|
49
|
-
|
50
|
-
# we can only do keyword search if ferret is found
|
42
|
+
# enable keyword search by default, but only if ferret is found
|
43
|
+
@keyword_search = params[:keyword].nil? ? true : params[:keyword]
|
51
44
|
@keyword_search &= @@have_ferret
|
52
|
-
|
45
|
+
|
46
|
+
@reasoning = params[:reasoning] || false
|
53
47
|
|
54
48
|
if keyword_search?
|
55
49
|
# we initialise the ferret index, either as a file or in memory
|
50
|
+
infos = Ferret::Index::FieldInfos.new
|
56
51
|
|
57
52
|
# we setup the fields not to store object's contents
|
58
|
-
infos = Ferret::Index::FieldInfos.new
|
59
53
|
infos.add_field(:subject, :store => :yes, :index => :no, :term_vector => :no)
|
60
54
|
infos.add_field(:object, :store => :no) #, :index => :omit_norms)
|
61
55
|
|
@@ -69,13 +63,11 @@ class RDFLite < ActiveRdfAdapter
|
|
69
63
|
# turn off filesystem synchronisation for speed
|
70
64
|
@db.synchronous = 'off'
|
71
65
|
|
72
|
-
# create triples table.
|
73
|
-
@db.execute('create table if not exists triple(s,p,o, unique(s,p,o) on conflict ignore)')
|
66
|
+
# create triples table. ignores duplicated triples
|
67
|
+
@db.execute('create table if not exists triple(s,p,o,c, unique(s,p,o,c) on conflict ignore)')
|
74
68
|
|
75
69
|
create_indices(params)
|
76
|
-
|
77
|
-
$log.debug("opened connection to #{file}")
|
78
|
-
$log.debug("database contains #{size} triples")
|
70
|
+
@db
|
79
71
|
end
|
80
72
|
|
81
73
|
# returns the number of triples in the datastore (incl. possible duplicates)
|
@@ -85,8 +77,8 @@ class RDFLite < ActiveRdfAdapter
|
|
85
77
|
|
86
78
|
# returns all triples in the datastore
|
87
79
|
def dump
|
88
|
-
@db.execute('select s,p,o from triple') do |s,p,o|
|
89
|
-
[s,p,o].join(' ')
|
80
|
+
@db.execute('select s,p,o,c from triple').collect do |s,p,o,c|
|
81
|
+
[s,p,o,c].join(' ')
|
90
82
|
end
|
91
83
|
end
|
92
84
|
|
@@ -95,37 +87,23 @@ class RDFLite < ActiveRdfAdapter
|
|
95
87
|
@db.execute('delete from triple')
|
96
88
|
end
|
97
89
|
|
98
|
-
# deletes triple(s,p,o) from datastore
|
99
|
-
#
|
100
|
-
|
90
|
+
# deletes triple(s,p,o,c) from datastore
|
91
|
+
# symbol parameters match anything: delete(:s,:p,:o) will delete all triples
|
92
|
+
# you can specify a context to limit deletion to that context:
|
93
|
+
# delete(:s,:p,:o, 'http://context') will delete all triples with that context
|
94
|
+
def delete(s,p,o,c=nil)
|
101
95
|
# convert input to internal format
|
102
|
-
|
103
|
-
s = "<#{s.uri}>" unless s.nil?
|
104
|
-
p = "<#{p.uri}>" unless p.nil?
|
105
|
-
o = case o
|
106
|
-
when RDFS::Resource
|
107
|
-
"<#{o.uri}>"
|
108
|
-
else
|
109
|
-
"\"#{o.to_s}\""
|
110
|
-
end unless o.nil?
|
111
|
-
|
112
|
-
# construct where clause for deletion (for all non-nil input)
|
113
|
-
where_clauses = []
|
114
|
-
conditions = []
|
115
|
-
unless s.nil?
|
116
|
-
conditions << s
|
117
|
-
where_clauses << 's = ?'
|
118
|
-
end
|
96
|
+
quad = [s,p,o,c].collect {|r| internalise(r) }
|
119
97
|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
98
|
+
# construct where clause for deletion (for all non-nil input)
|
99
|
+
where_clauses = []
|
100
|
+
conditions = []
|
101
|
+
quad.each_with_index do |r,i|
|
102
|
+
unless r.nil?
|
103
|
+
conditions << r
|
104
|
+
where_clauses << "#{SPOC[i]} = ?"
|
105
|
+
end
|
106
|
+
end
|
129
107
|
|
130
108
|
# construct delete string
|
131
109
|
ds = 'delete from triple'
|
@@ -133,28 +111,29 @@ class RDFLite < ActiveRdfAdapter
|
|
133
111
|
|
134
112
|
# execute delete string with possible deletion conditions (for each
|
135
113
|
# non-empty where clause)
|
114
|
+
$activerdflog.debug("deleting #{[s,p,o,c].join(' ')}")
|
136
115
|
@db.execute(ds, *conditions)
|
137
|
-
$log.debug(sprintf("sending delete query: #{ds}", *conditions))
|
138
116
|
|
139
117
|
# delete literal from ferret index
|
140
118
|
@ferret.search_each("subject:\"#{s}\", object:\"#{o}\"") do |idx, score|
|
141
|
-
$log.debug "deleting #{o} => #{s} from ferret index"
|
142
119
|
@ferret.delete(idx)
|
143
120
|
end if keyword_search?
|
121
|
+
|
122
|
+
@db
|
144
123
|
end
|
145
124
|
|
146
125
|
# adds triple(s,p,o) to datastore
|
147
126
|
# s,p must be resources, o can be primitive data or resource
|
148
|
-
def add(s,p,o)
|
127
|
+
def add(s,p,o,c=nil)
|
149
128
|
# check illegal input
|
150
129
|
raise(ActiveRdfError, "adding non-resource #{s}") unless s.respond_to?(:uri)
|
151
130
|
raise(ActiveRdfError, "adding non-resource #{p}") unless p.respond_to?(:uri)
|
152
131
|
|
153
132
|
# get internal representation (array)
|
154
|
-
|
133
|
+
quad = [s,p,o,c].collect {|r| internalise(r) }
|
155
134
|
|
156
135
|
# add triple to database
|
157
|
-
add_internal(
|
136
|
+
add_internal(@db,*quad)
|
158
137
|
end
|
159
138
|
|
160
139
|
# flushes openstanding changes to underlying sqlite3
|
@@ -167,38 +146,70 @@ class RDFLite < ActiveRdfAdapter
|
|
167
146
|
# loads triples from file in ntriples format
|
168
147
|
def load(file)
|
169
148
|
ntriples = File.readlines(file)
|
149
|
+
$activerdflog.debug "read #{ntriples.size} triples from file #{file}"
|
170
150
|
|
171
|
-
|
151
|
+
context = "<file:#{file}>"
|
152
|
+
add_ntriples(ntriples, context)
|
153
|
+
end
|
154
|
+
|
155
|
+
# adds string of ntriples from given context to database
|
156
|
+
def add_ntriples(ntriples, context=nil)
|
157
|
+
# convert context to internal format if RDFS::Resource
|
158
|
+
context = internalise(context)
|
159
|
+
|
160
|
+
# need unique identifier for this batch of triples (to detect occurence of
|
161
|
+
# same bnodes _:#1
|
162
|
+
uuid = `uuidgen`
|
163
|
+
|
164
|
+
# add each triple to db
|
165
|
+
@db.transaction do |tr|
|
172
166
|
ntriples.each do |triple|
|
173
167
|
nodes = triple.scan(Node)
|
174
|
-
|
168
|
+
|
169
|
+
# handle bnodes if necessary (bnodes need to have uri generated)
|
170
|
+
subject = case nodes[0]
|
171
|
+
when BNode
|
172
|
+
"<http://www.activerdf.org/bnode/#$1/#{uuid}>"
|
173
|
+
else
|
174
|
+
nodes[0]
|
175
|
+
end
|
176
|
+
|
177
|
+
predicate = nodes[1]
|
178
|
+
|
179
|
+
# handle bnodes and literals if necessary (literals need unicode fixing)
|
180
|
+
object = case nodes[2]
|
181
|
+
when BNode
|
182
|
+
"<http://www.activerdf.org/bnode/#$1/#{uuid}>"
|
183
|
+
when Literal
|
184
|
+
fix_unicode(nodes[2])
|
185
|
+
else
|
186
|
+
nodes[2]
|
187
|
+
end
|
188
|
+
|
189
|
+
add_internal(tr, subject, predicate, object, context)
|
175
190
|
end
|
176
191
|
end
|
177
192
|
|
178
|
-
$log.debug "read #{ntriples.size} triples from file #{file}"
|
179
193
|
@db
|
180
194
|
end
|
181
195
|
|
182
196
|
# executes ActiveRDF query on datastore
|
183
197
|
def query(query)
|
184
|
-
# log received query
|
185
|
-
$log.debug "received query: #{query.to_sp}"
|
186
|
-
|
187
198
|
# construct query clauses
|
188
|
-
sql = translate(query)
|
199
|
+
sql, conditions = translate(query)
|
189
200
|
|
190
201
|
# executing query, passing all where-clause values as parameters (so that
|
191
202
|
# sqlite will encode quotes correctly)
|
192
|
-
constraints =
|
193
|
-
|
194
|
-
$log.debug format("executing: #{sql.gsub('?','"%s"')}", *constraints)
|
203
|
+
#constraints = right_hand_sides.collect { |value| value.to_s }
|
195
204
|
|
196
205
|
# executing query
|
197
|
-
results = @db.execute(sql, *
|
206
|
+
results = @db.execute(sql, *conditions)
|
198
207
|
|
199
208
|
# if ASK query, we check whether we received a positive result count
|
200
209
|
if query.ask?
|
201
|
-
return [results[0][0].to_i > 0]
|
210
|
+
return [[results[0][0].to_i > 0]]
|
211
|
+
elsif query.count?
|
212
|
+
return [[results[0][0].to_i]]
|
202
213
|
else
|
203
214
|
# otherwise we convert results to ActiveRDF nodes and return them
|
204
215
|
return wrap(query, results)
|
@@ -207,22 +218,23 @@ class RDFLite < ActiveRdfAdapter
|
|
207
218
|
|
208
219
|
# translates ActiveRDF query into internal sqlite query string
|
209
220
|
def translate(query)
|
210
|
-
|
211
|
-
|
221
|
+
where, conditions = construct_where(query)
|
222
|
+
[construct_select(query) + construct_join(query) + where + construct_sort(query) + construct_limit(query), conditions]
|
212
223
|
end
|
213
224
|
|
214
225
|
private
|
215
226
|
# constants for extracting resources/literals from sql results
|
227
|
+
BNode = /_:(\S*)/
|
216
228
|
Resource = /<([^>]*)>/
|
217
229
|
Literal = /"([^"]*)"/
|
218
|
-
Node = Regexp.union(
|
219
|
-
|
230
|
+
Node = Regexp.union(/_:\S*/,/<[^>]*>/,/"[^"]*"/)
|
231
|
+
SPOC = ['s','p','o','c']
|
220
232
|
|
221
233
|
# adds s,p,o into sqlite and ferret
|
222
234
|
# s,p,o should be in internal format: <uri> and "literal"
|
223
|
-
def add_internal(s,p,o)
|
235
|
+
def add_internal(db, s, p, o, c)
|
224
236
|
# insert the triple into the datastore
|
225
|
-
|
237
|
+
db.execute('insert into triple values (?,?,?,?)', s,p,o,c)
|
226
238
|
|
227
239
|
# if keyword-search available, insert the object into keyword search
|
228
240
|
@ferret << {:subject => s, :object => o} if keyword_search?
|
@@ -262,6 +274,16 @@ class RDFLite < ActiveRdfAdapter
|
|
262
274
|
clause
|
263
275
|
end
|
264
276
|
|
277
|
+
# sort query results on variable clause (optionally)
|
278
|
+
def construct_sort(query)
|
279
|
+
return "" if query.sort_clauses.empty?
|
280
|
+
|
281
|
+
sort = query.sort_clauses.collect do |term|
|
282
|
+
variable_name(query, term)
|
283
|
+
end
|
284
|
+
" order by (#{sort.join(',')})"
|
285
|
+
end
|
286
|
+
|
265
287
|
# construct join clause
|
266
288
|
# TODO: joins don't work this way, they have to be linear (in one direction
|
267
289
|
# only, and we should only alias tables we didnt alias yet)
|
@@ -298,8 +320,8 @@ class RDFLite < ActiveRdfAdapter
|
|
298
320
|
|
299
321
|
# construct t0,t1,... as aliases for term
|
300
322
|
# and construct join condition, e.g. t0.s
|
301
|
-
termalias = "t#{index /
|
302
|
-
termjoin = "#{termalias}.#{
|
323
|
+
termalias = "t#{index / 4}"
|
324
|
+
termjoin = "#{termalias}.#{SPOC[index % 4]}"
|
303
325
|
|
304
326
|
join = if join_stmt.include?(termalias)
|
305
327
|
""
|
@@ -313,8 +335,8 @@ class RDFLite < ActiveRdfAdapter
|
|
313
335
|
|
314
336
|
# construct t0,t1, etc. as aliases for buddy,
|
315
337
|
# and construct join condition, e.g. t0.s = t1.p
|
316
|
-
buddyalias = "t#{i/
|
317
|
-
buddyjoin = "#{buddyalias}.#{
|
338
|
+
buddyalias = "t#{i/4}"
|
339
|
+
buddyjoin = "#{buddyalias}.#{SPOC[i%4]}"
|
318
340
|
|
319
341
|
# TODO: fix reuse of same table names as aliases, e.g.
|
320
342
|
# "from triple as t1 join triple as t2 on ... join t1 on ..."
|
@@ -345,20 +367,22 @@ class RDFLite < ActiveRdfAdapter
|
|
345
367
|
# collecting all the right-hand sides of where clauses (e.g. where name =
|
346
368
|
# 'abc'), to add to query string later using ?-notation, because then
|
347
369
|
# sqlite will automatically encode quoted literals correctly
|
348
|
-
|
370
|
+
right_hand_sides = []
|
349
371
|
|
350
372
|
# convert each where clause to SQL:
|
351
373
|
# add where clause for each subclause, except if it's a variable
|
352
374
|
query.where_clauses.each_with_index do |clause,level|
|
375
|
+
raise ActiveRdfError, "where clause #{clause} is not a triple" unless clause.is_a?(Array)
|
353
376
|
clause.each_with_index do |subclause, i|
|
354
377
|
# dont add where clause for variables
|
355
378
|
unless subclause.is_a?(Symbol)
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
379
|
+
conditions = compute_where_condition(i, subclause, query.reasoning? && reasoning?)
|
380
|
+
if conditions.size == 1
|
381
|
+
where << "t#{level}.#{SPOC[i]} = ?"
|
382
|
+
right_hand_sides << conditions.first
|
360
383
|
else
|
361
|
-
|
384
|
+
conditions = conditions.collect {|c| "'#{c}'"}
|
385
|
+
where << "t#{level}.#{SPOC[i]} in (#{conditions.join(',')})"
|
362
386
|
end
|
363
387
|
end
|
364
388
|
end
|
@@ -371,18 +395,53 @@ class RDFLite < ActiveRdfAdapter
|
|
371
395
|
raise ActiveRdfError, "cannot do keyword search over multiple subjects" if select_subject.size > 1
|
372
396
|
|
373
397
|
keywords = query.keywords.collect {|subj,key| key}
|
374
|
-
@ferret.search_each("object
|
398
|
+
@ferret.search_each("object:#{keywords}") do |idx,score|
|
375
399
|
subjects << @ferret[idx][:subject]
|
376
400
|
end
|
377
401
|
subjects.uniq! if query.distinct?
|
378
402
|
where << "#{variable_name(query,select_subject.first)} in (#{subjects.collect {'?'}.join(',')})"
|
379
|
-
|
403
|
+
right_hand_sides += subjects
|
380
404
|
end
|
381
405
|
|
382
406
|
if where.empty?
|
383
|
-
''
|
407
|
+
['',[]]
|
408
|
+
else
|
409
|
+
["where " + where.join(' and '), right_hand_sides]
|
410
|
+
end
|
411
|
+
end
|
412
|
+
|
413
|
+
def compute_where_condition(index, subclause, reasoning)
|
414
|
+
conditions = [subclause]
|
415
|
+
|
416
|
+
# expand conditions with rdfs rules if reasoning enabled
|
417
|
+
if reasoning
|
418
|
+
case index
|
419
|
+
when 0: ;
|
420
|
+
# no rule for subjects
|
421
|
+
when 1:
|
422
|
+
# expand properties to include all subproperties
|
423
|
+
conditions = subproperties(subclause) if subclause.respond_to?(:uri)
|
424
|
+
when 2:
|
425
|
+
# no rule for objects
|
426
|
+
when 3:
|
427
|
+
# no rule for contexts
|
428
|
+
end
|
429
|
+
end
|
430
|
+
|
431
|
+
# convert conditions into internal format
|
432
|
+
conditions.collect { |c| c.respond_to?(:uri) ? "<#{c.uri}>" : c.to_s }
|
433
|
+
end
|
434
|
+
|
435
|
+
def subproperties(resource)
|
436
|
+
subproperty = Namespace.lookup(:rdfs,:subPropertyOf)
|
437
|
+
children_query = Query.new.distinct(:sub).where(:sub, subproperty, resource)
|
438
|
+
children_query.reasoning = false
|
439
|
+
children = children_query.execute
|
440
|
+
|
441
|
+
if children.empty?
|
442
|
+
[resource]
|
384
443
|
else
|
385
|
-
|
444
|
+
[resource] + children.collect{|c| subproperties(c)}.flatten.compact
|
386
445
|
end
|
387
446
|
end
|
388
447
|
|
@@ -412,28 +471,31 @@ class RDFLite < ActiveRdfAdapter
|
|
412
471
|
end
|
413
472
|
end
|
414
473
|
|
415
|
-
termtable = "t#{index /
|
416
|
-
termspo =
|
474
|
+
termtable = "t#{index / 4}"
|
475
|
+
termspo = SPOC[index % 4]
|
417
476
|
return "#{termtable}.#{termspo}"
|
418
477
|
end
|
419
478
|
|
420
479
|
# wrap resources into ActiveRDF resources, literals into Strings
|
421
480
|
def wrap(query, results)
|
422
481
|
results.collect do |row|
|
423
|
-
row.collect
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
482
|
+
row.collect { |result| parse(result) }
|
483
|
+
end
|
484
|
+
end
|
485
|
+
|
486
|
+
def parse(result)
|
487
|
+
case result
|
488
|
+
when Resource
|
489
|
+
RDFS::Resource.new($1)
|
490
|
+
when Literal
|
491
|
+
String.new($1)
|
492
|
+
else
|
493
|
+
# when we do a count(*) query we get a number, not a resource/literal
|
494
|
+
result
|
434
495
|
end
|
435
496
|
end
|
436
497
|
|
498
|
+
|
437
499
|
def create_indices(params)
|
438
500
|
sidx = params[:sidx] || false
|
439
501
|
pidx = params[:pidx] || false
|
@@ -457,15 +519,21 @@ class RDFLite < ActiveRdfAdapter
|
|
457
519
|
|
458
520
|
# transform triple into internal format <uri> and "literal"
|
459
521
|
# returns array [s,p,o]
|
460
|
-
def
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
end
|
469
|
-
[s,p,o]
|
522
|
+
def internalise(r)
|
523
|
+
if r.respond_to?(:uri)
|
524
|
+
"<#{r.uri}>"
|
525
|
+
elsif r.is_a?(Symbol)
|
526
|
+
nil
|
527
|
+
else
|
528
|
+
"\"#{r.to_s}\""
|
529
|
+
end
|
470
530
|
end
|
531
|
+
|
532
|
+
# fixes unicode characters in literals (because we parse them wrongly somehow)
|
533
|
+
def fix_unicode(str)
|
534
|
+
tmp = str.gsub(/\\\u([0-9a-fA-F]{4,4})/u){ "U+#$1" }
|
535
|
+
tmp.gsub(/U\+([0-9a-fA-F]{4,4})/u){["#$1".hex ].pack('U*')}
|
536
|
+
end
|
537
|
+
|
538
|
+
public :subproperties
|
471
539
|
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
require 'pp'
|
2
|
+
class SuggestingAdapter < FetchingAdapter
|
3
|
+
ConnectionPool.register_adapter(:suggesting,self)
|
4
|
+
|
5
|
+
alias _old_initialize initialize
|
6
|
+
|
7
|
+
def initialize params
|
8
|
+
_old_initialize(params)
|
9
|
+
@db.execute('drop view if exists occurrence')
|
10
|
+
@db.execute('create view occurrence as select p, count(distinct s) as count from triple group by p')
|
11
|
+
|
12
|
+
@db.execute('drop view if exists cooccurrence')
|
13
|
+
@db.execute('create view cooccurrence as select t0.p as p1,t1.p as p2, count(distinct t0.s) as count from triple as t0 join triple as t1 on t0.s=t1.s and t0.p!=t1.p group by t0.p, t1.p')
|
14
|
+
end
|
15
|
+
|
16
|
+
def suggest(resource)
|
17
|
+
$activerdflog.debug "starting suggestions for #{size} triples"
|
18
|
+
time = Time.now
|
19
|
+
|
20
|
+
predicates = []
|
21
|
+
own_predicates = resource.direct_predicates
|
22
|
+
|
23
|
+
construct_occurrence_matrix
|
24
|
+
construct_cooccurrence_matrix
|
25
|
+
|
26
|
+
own_predicates.each do |p|
|
27
|
+
predicates << p if occurrence(p) > 1
|
28
|
+
end
|
29
|
+
|
30
|
+
# fetch all predicates co-occurring with our predicates
|
31
|
+
candidates = predicates.collect {|p| cooccurring(p) }
|
32
|
+
|
33
|
+
# perform set intersection
|
34
|
+
candidates = candidates.inject {|intersect, n| intersect & n }.flatten
|
35
|
+
candidates = candidates - own_predicates
|
36
|
+
|
37
|
+
suggestions = candidates.collect do |candidate|
|
38
|
+
score = predicates.inject(1.0) do |score, p|
|
39
|
+
score * cooccurrence(candidate, p) / occurrence(p)
|
40
|
+
end
|
41
|
+
[candidate, score]
|
42
|
+
end
|
43
|
+
$activerdflog.debug "suggestions for #{resource} took #{Time.now-time}s"
|
44
|
+
suggestions
|
45
|
+
end
|
46
|
+
|
47
|
+
def construct_occurrence_matrix
|
48
|
+
@occurrence = {}
|
49
|
+
@db.execute('select * from occurrence where count > 1') do |p,count|
|
50
|
+
@occurrence[parse(p)] = count.to_i
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def construct_cooccurrence_matrix
|
55
|
+
@cooccurrence = {}
|
56
|
+
@db.execute('select * from cooccurrence') do |p1, p2, count|
|
57
|
+
@cooccurrence[parse(p1)] ||= {}
|
58
|
+
@cooccurrence[parse(p1)][parse(p2)] = count.to_i
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def occurrence(predicate)
|
63
|
+
@occurrence[predicate] || 0
|
64
|
+
end
|
65
|
+
|
66
|
+
def cooccurrence(p1, p2)
|
67
|
+
@cooccurrence[p1][p2] || 0
|
68
|
+
end
|
69
|
+
|
70
|
+
def cooccurring(predicate)
|
71
|
+
@cooccurrence[predicate].keys
|
72
|
+
end
|
73
|
+
end
|
data/test/test_data.nt
CHANGED
@@ -27,3 +27,6 @@
|
|
27
27
|
<http://www.w3.org/1999/02/22-rdf-syntax-ns#Property> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2000/01/rdf-schema#Resource> .
|
28
28
|
<http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2000/01/rdf-schema#domain> <http://www.w3.org/2000/01/rdf-schema#Resource> .
|
29
29
|
<http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2000/01/rdf-schema#range> <http://www.w3.org/2000/01/rdf-schema#Class> .
|
30
|
+
_:#1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://activerdf.org/test/Person> .
|
31
|
+
_:#1 <http://activerdf.org/test/age> "29" .
|
32
|
+
_:#1 <http://activerdf.org/test/name> "Another Person" .
|
data/test/test_rdflite.rb
CHANGED
@@ -92,8 +92,45 @@ class TestRdfLiteAdapter < Test::Unit::TestCase
|
|
92
92
|
def test_loading_data
|
93
93
|
adapter = ConnectionPool.add_data_source :type => :rdflite
|
94
94
|
adapter.load(File.dirname(File.expand_path(__FILE__)) + '/test_data.nt')
|
95
|
-
assert_equal
|
96
|
-
|
95
|
+
assert_equal 32, adapter.size
|
96
|
+
end
|
97
|
+
|
98
|
+
def test_count_query
|
99
|
+
adapter = ConnectionPool.add_data_source :type => :rdflite
|
100
|
+
adapter.load(File.dirname(File.expand_path(__FILE__)) + '/test_data.nt')
|
101
|
+
assert_kind_of Fixnum, Query.new.count(:s).where(:s,:p,:o).execute
|
102
|
+
assert_equal 32, Query.new.count(:s).where(:s,:p,:o).execute
|
103
|
+
end
|
104
|
+
|
105
|
+
def test_single_context
|
106
|
+
adapter = ConnectionPool.add_data_source :type => :rdflite
|
107
|
+
file = File.dirname(File.expand_path(__FILE__)) + '/test_data.nt'
|
108
|
+
adapter.load(file)
|
109
|
+
|
110
|
+
context = Query.new.distinct(:c).where(:s,:p,:o,:c).execute
|
111
|
+
assert_instance_of RDFS::Resource, context
|
112
|
+
assert_equal RDFS::Resource.new("file:#{file}"), context
|
113
|
+
end
|
114
|
+
|
115
|
+
def test_multiple_context
|
116
|
+
adapter = ConnectionPool.add_data_source :type => :rdflite
|
117
|
+
file = File.dirname(File.expand_path(__FILE__)) + '/test_data.nt'
|
118
|
+
adapter.load(file)
|
119
|
+
file_context = RDFS::Resource.new("file:#{file}")
|
120
|
+
|
121
|
+
eyal = RDFS::Resource.new 'eyaloren.org'
|
122
|
+
age = RDFS::Resource.new 'foaf:age'
|
123
|
+
test = RDFS::Resource.new 'test'
|
124
|
+
adapter.add(eyal, age, test)
|
125
|
+
|
126
|
+
context = Query.new.distinct(:c).where(:s,:p,:o,:c).execute
|
127
|
+
assert_equal file_context, context[0]
|
128
|
+
assert_equal '', context[1]
|
129
|
+
|
130
|
+
n1 = Query.new.distinct(:s).where(:s,:p,:o,'').execute(:flatten => false)
|
131
|
+
n2 = Query.new.distinct(:s).where(:s,:p,:o,file_context).execute(:flatten => false)
|
132
|
+
assert_equal 1, n1.size
|
133
|
+
assert_equal 9, n2.size
|
97
134
|
end
|
98
135
|
|
99
136
|
def test_person_data
|
@@ -119,11 +156,11 @@ class TestRdfLiteAdapter < Test::Unit::TestCase
|
|
119
156
|
def test_delete_data
|
120
157
|
adapter = ConnectionPool.add_data_source :type => :rdflite
|
121
158
|
adapter.load(File.dirname(File.expand_path(__FILE__)) + '/test_data.nt')
|
122
|
-
assert_equal
|
159
|
+
assert_equal 32, adapter.size
|
123
160
|
|
124
161
|
eyal = RDFS::Resource.new('http://activerdf.org/test/eyal')
|
125
162
|
adapter.delete(eyal, nil, nil)
|
126
|
-
assert_equal
|
163
|
+
assert_equal 27, adapter.size
|
127
164
|
|
128
165
|
adapter.delete(nil,nil,nil)
|
129
166
|
assert_equal 0, adapter.size
|
@@ -136,7 +173,17 @@ class TestRdfLiteAdapter < Test::Unit::TestCase
|
|
136
173
|
eyal = RDFS::Resource.new('http://activerdf.org/test/eyal')
|
137
174
|
assert_equal eyal, Query.new.distinct(:s).where(:s,:keyword,"blue").execute
|
138
175
|
assert_equal eyal, Query.new.distinct(:s).where(:s,:keyword,"27").execute
|
139
|
-
|
140
176
|
assert_equal eyal, Query.new.distinct(:s).where(:s,:keyword,"eyal oren").execute
|
141
177
|
end
|
178
|
+
|
179
|
+
def test_bnodes
|
180
|
+
adapter = ConnectionPool.add_data_source :type => :rdflite
|
181
|
+
adapter.load(File.dirname(File.expand_path(__FILE__)) + '/test_data.nt')
|
182
|
+
|
183
|
+
Namespace.register(:test, 'http://activerdf.org/test/')
|
184
|
+
ObjectManager.construct_classes
|
185
|
+
assert_equal 2, TEST::Person.find_all.size
|
186
|
+
assert_equal 29, TEST::Person.find_all[1].age.to_i
|
187
|
+
assert_equal "Another Person", TEST::Person.find_all[1].name
|
188
|
+
end
|
142
189
|
end
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: activerdf_rdflite
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: "1.
|
7
|
-
date: 2006-
|
6
|
+
version: "1.1"
|
7
|
+
date: 2006-12-08 00:00:00 +00:00
|
8
8
|
summary: an RDF database for usage in ActiveRDF (based on sqlite3)
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -37,6 +37,8 @@ files:
|
|
37
37
|
- lib/activerdf_rdflite
|
38
38
|
- lib/activerdf_rdflite/init.rb
|
39
39
|
- lib/activerdf_rdflite/rdflite.rb
|
40
|
+
- lib/activerdf_rdflite/fetching.rb
|
41
|
+
- lib/activerdf_rdflite/suggesting.rb
|
40
42
|
test_files: []
|
41
43
|
|
42
44
|
rdoc_options: []
|