activerdf_rdflite 1.0 → 1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -1
- data/lib/activerdf_rdflite/fetching.rb +43 -0
- data/lib/activerdf_rdflite/init.rb +2 -0
- data/lib/activerdf_rdflite/rdflite.rb +182 -114
- data/lib/activerdf_rdflite/suggesting.rb +73 -0
- data/test/test_data.nt +3 -0
- data/test/test_rdflite.rb +52 -5
- metadata +4 -2
data/Rakefile
CHANGED
@@ -14,7 +14,7 @@ desc "test and package gem"
|
|
14
14
|
task :default => [:test, :package]
|
15
15
|
|
16
16
|
# get ActiveRdfVersion from commandline
|
17
|
-
ActiveRdfVersion = ENV['REL'] || '0
|
17
|
+
ActiveRdfVersion = ENV['REL'] || '1.0'
|
18
18
|
NAME="activerdf_rdflite"
|
19
19
|
GEMNAME="#{NAME}-#{ActiveRdfVersion}.gem"
|
20
20
|
|
@@ -0,0 +1,43 @@
|
|
1
|
+
#class String
|
2
|
+
# alias _match match
|
3
|
+
# def match(*args)
|
4
|
+
# m = _match(args.first)
|
5
|
+
# if m && m.length > 1
|
6
|
+
# args[1..-1].each_with_index do |name, index|
|
7
|
+
# m.instance_eval "def #{name}; self[#{index+1}] end"
|
8
|
+
# end
|
9
|
+
# end
|
10
|
+
# m
|
11
|
+
# end
|
12
|
+
#end
|
13
|
+
|
14
|
+
class FetchingAdapter < RDFLite
|
15
|
+
ConnectionPool.register_adapter(:fetching,self)
|
16
|
+
|
17
|
+
# fetches RDF/XML data from given url and adds it to the datastore, using the
|
18
|
+
# source url as context identifier.
|
19
|
+
def fetch url
|
20
|
+
return unless url.match(/http:\/\/(.*)/)
|
21
|
+
|
22
|
+
# remove fragment identifier from url
|
23
|
+
hashidx = url.rindex('#')
|
24
|
+
url = url[0..url.rindex('#')-1] unless hashidx.nil?
|
25
|
+
|
26
|
+
$activerdflog.debug "fetching from #{url}"
|
27
|
+
|
28
|
+
#model = Redland::Model.new
|
29
|
+
#parser = Redland::Parser.new('rdfxml')
|
30
|
+
#scan = Redland::Uri.new('http://feature.librdf.org/raptor-scanForRDF')
|
31
|
+
#enable = Redland::Literal.new('1')
|
32
|
+
#Redland::librdf_parser_set_feature(parser, scan.uri, enable.node)
|
33
|
+
#parser.parse_into_model(model, url)
|
34
|
+
#triples = Redland::Serializer.ntriples.model_to_string(nil, model)
|
35
|
+
|
36
|
+
triples = `rapper --scan "#{url}"`
|
37
|
+
lines = triples.split($/)
|
38
|
+
$activerdflog.debug "found #{lines.size} triples"
|
39
|
+
|
40
|
+
context = RDFS::Resource.new(url)
|
41
|
+
add_ntriples(triples, context)
|
42
|
+
end
|
43
|
+
end
|
@@ -1,7 +1,3 @@
|
|
1
|
-
# RDFLite is a lightweight RDF database on top of sqlite3. It can act as adapter
|
2
|
-
# in ActiveRDF. It supports on-disk and in-memory usage, and allows keyword
|
3
|
-
# search if ferret is installed.
|
4
|
-
#
|
5
1
|
# Author:: Eyal Oren
|
6
2
|
# Copyright:: (c) 2005-2006 Eyal Oren
|
7
3
|
# License:: LGPL
|
@@ -10,20 +6,23 @@ require 'sqlite3'
|
|
10
6
|
require 'active_rdf'
|
11
7
|
require 'federation/connection_pool'
|
12
8
|
|
13
|
-
$
|
9
|
+
$activerdflog.info "loading RDFLite adapter"
|
14
10
|
|
15
11
|
begin
|
16
12
|
require 'ferret'
|
17
13
|
@@have_ferret = true
|
18
14
|
rescue LoadError
|
19
|
-
$
|
15
|
+
$activerdflog.info "Keyword search is disabled since we could not load Ferret. To
|
20
16
|
enable, please do \"gem install ferret\""
|
21
17
|
@@have_ferret = false
|
22
18
|
end
|
23
19
|
|
20
|
+
# RDFLite is a lightweight RDF database on top of sqlite3. It can act as adapter
|
21
|
+
# in ActiveRDF. It supports on-disk and in-memory usage, and allows keyword
|
22
|
+
# search if ferret is installed.
|
24
23
|
class RDFLite < ActiveRdfAdapter
|
25
24
|
ConnectionPool.register_adapter(:rdflite,self)
|
26
|
-
bool_accessor :keyword_search
|
25
|
+
bool_accessor :keyword_search, :reasoning
|
27
26
|
|
28
27
|
# instantiates RDFLite database
|
29
28
|
# available parameters:
|
@@ -31,7 +30,7 @@ class RDFLite < ActiveRdfAdapter
|
|
31
30
|
# * :keyword => true/false (defaults to true)
|
32
31
|
# * :pidx, :oidx, etc. => true/false (enable/disable these indices)
|
33
32
|
def initialize(params = {})
|
34
|
-
$
|
33
|
+
$activerdflog.info "initialised rdflite with params #{params.to_s}"
|
35
34
|
|
36
35
|
@reads = true
|
37
36
|
@writes = true
|
@@ -40,22 +39,17 @@ class RDFLite < ActiveRdfAdapter
|
|
40
39
|
file = params[:location] || ':memory:'
|
41
40
|
@db = SQLite3::Database.new(file)
|
42
41
|
|
43
|
-
#
|
44
|
-
@keyword_search =
|
45
|
-
true
|
46
|
-
else
|
47
|
-
params[:keyword]
|
48
|
-
end
|
49
|
-
|
50
|
-
# we can only do keyword search if ferret is found
|
42
|
+
# enable keyword search by default, but only if ferret is found
|
43
|
+
@keyword_search = params[:keyword].nil? ? true : params[:keyword]
|
51
44
|
@keyword_search &= @@have_ferret
|
52
|
-
|
45
|
+
|
46
|
+
@reasoning = params[:reasoning] || false
|
53
47
|
|
54
48
|
if keyword_search?
|
55
49
|
# we initialise the ferret index, either as a file or in memory
|
50
|
+
infos = Ferret::Index::FieldInfos.new
|
56
51
|
|
57
52
|
# we setup the fields not to store object's contents
|
58
|
-
infos = Ferret::Index::FieldInfos.new
|
59
53
|
infos.add_field(:subject, :store => :yes, :index => :no, :term_vector => :no)
|
60
54
|
infos.add_field(:object, :store => :no) #, :index => :omit_norms)
|
61
55
|
|
@@ -69,13 +63,11 @@ class RDFLite < ActiveRdfAdapter
|
|
69
63
|
# turn off filesystem synchronisation for speed
|
70
64
|
@db.synchronous = 'off'
|
71
65
|
|
72
|
-
# create triples table.
|
73
|
-
@db.execute('create table if not exists triple(s,p,o, unique(s,p,o) on conflict ignore)')
|
66
|
+
# create triples table. ignores duplicated triples
|
67
|
+
@db.execute('create table if not exists triple(s,p,o,c, unique(s,p,o,c) on conflict ignore)')
|
74
68
|
|
75
69
|
create_indices(params)
|
76
|
-
|
77
|
-
$log.debug("opened connection to #{file}")
|
78
|
-
$log.debug("database contains #{size} triples")
|
70
|
+
@db
|
79
71
|
end
|
80
72
|
|
81
73
|
# returns the number of triples in the datastore (incl. possible duplicates)
|
@@ -85,8 +77,8 @@ class RDFLite < ActiveRdfAdapter
|
|
85
77
|
|
86
78
|
# returns all triples in the datastore
|
87
79
|
def dump
|
88
|
-
@db.execute('select s,p,o from triple') do |s,p,o|
|
89
|
-
[s,p,o].join(' ')
|
80
|
+
@db.execute('select s,p,o,c from triple').collect do |s,p,o,c|
|
81
|
+
[s,p,o,c].join(' ')
|
90
82
|
end
|
91
83
|
end
|
92
84
|
|
@@ -95,37 +87,23 @@ class RDFLite < ActiveRdfAdapter
|
|
95
87
|
@db.execute('delete from triple')
|
96
88
|
end
|
97
89
|
|
98
|
-
# deletes triple(s,p,o) from datastore
|
99
|
-
#
|
100
|
-
|
90
|
+
# deletes triple(s,p,o,c) from datastore
|
91
|
+
# symbol parameters match anything: delete(:s,:p,:o) will delete all triples
|
92
|
+
# you can specify a context to limit deletion to that context:
|
93
|
+
# delete(:s,:p,:o, 'http://context') will delete all triples with that context
|
94
|
+
def delete(s,p,o,c=nil)
|
101
95
|
# convert input to internal format
|
102
|
-
|
103
|
-
s = "<#{s.uri}>" unless s.nil?
|
104
|
-
p = "<#{p.uri}>" unless p.nil?
|
105
|
-
o = case o
|
106
|
-
when RDFS::Resource
|
107
|
-
"<#{o.uri}>"
|
108
|
-
else
|
109
|
-
"\"#{o.to_s}\""
|
110
|
-
end unless o.nil?
|
111
|
-
|
112
|
-
# construct where clause for deletion (for all non-nil input)
|
113
|
-
where_clauses = []
|
114
|
-
conditions = []
|
115
|
-
unless s.nil?
|
116
|
-
conditions << s
|
117
|
-
where_clauses << 's = ?'
|
118
|
-
end
|
96
|
+
quad = [s,p,o,c].collect {|r| internalise(r) }
|
119
97
|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
98
|
+
# construct where clause for deletion (for all non-nil input)
|
99
|
+
where_clauses = []
|
100
|
+
conditions = []
|
101
|
+
quad.each_with_index do |r,i|
|
102
|
+
unless r.nil?
|
103
|
+
conditions << r
|
104
|
+
where_clauses << "#{SPOC[i]} = ?"
|
105
|
+
end
|
106
|
+
end
|
129
107
|
|
130
108
|
# construct delete string
|
131
109
|
ds = 'delete from triple'
|
@@ -133,28 +111,29 @@ class RDFLite < ActiveRdfAdapter
|
|
133
111
|
|
134
112
|
# execute delete string with possible deletion conditions (for each
|
135
113
|
# non-empty where clause)
|
114
|
+
$activerdflog.debug("deleting #{[s,p,o,c].join(' ')}")
|
136
115
|
@db.execute(ds, *conditions)
|
137
|
-
$log.debug(sprintf("sending delete query: #{ds}", *conditions))
|
138
116
|
|
139
117
|
# delete literal from ferret index
|
140
118
|
@ferret.search_each("subject:\"#{s}\", object:\"#{o}\"") do |idx, score|
|
141
|
-
$log.debug "deleting #{o} => #{s} from ferret index"
|
142
119
|
@ferret.delete(idx)
|
143
120
|
end if keyword_search?
|
121
|
+
|
122
|
+
@db
|
144
123
|
end
|
145
124
|
|
146
125
|
# adds triple(s,p,o) to datastore
|
147
126
|
# s,p must be resources, o can be primitive data or resource
|
148
|
-
def add(s,p,o)
|
127
|
+
def add(s,p,o,c=nil)
|
149
128
|
# check illegal input
|
150
129
|
raise(ActiveRdfError, "adding non-resource #{s}") unless s.respond_to?(:uri)
|
151
130
|
raise(ActiveRdfError, "adding non-resource #{p}") unless p.respond_to?(:uri)
|
152
131
|
|
153
132
|
# get internal representation (array)
|
154
|
-
|
133
|
+
quad = [s,p,o,c].collect {|r| internalise(r) }
|
155
134
|
|
156
135
|
# add triple to database
|
157
|
-
add_internal(
|
136
|
+
add_internal(@db,*quad)
|
158
137
|
end
|
159
138
|
|
160
139
|
# flushes openstanding changes to underlying sqlite3
|
@@ -167,38 +146,70 @@ class RDFLite < ActiveRdfAdapter
|
|
167
146
|
# loads triples from file in ntriples format
|
168
147
|
def load(file)
|
169
148
|
ntriples = File.readlines(file)
|
149
|
+
$activerdflog.debug "read #{ntriples.size} triples from file #{file}"
|
170
150
|
|
171
|
-
|
151
|
+
context = "<file:#{file}>"
|
152
|
+
add_ntriples(ntriples, context)
|
153
|
+
end
|
154
|
+
|
155
|
+
# adds string of ntriples from given context to database
|
156
|
+
def add_ntriples(ntriples, context=nil)
|
157
|
+
# convert context to internal format if RDFS::Resource
|
158
|
+
context = internalise(context)
|
159
|
+
|
160
|
+
# need unique identifier for this batch of triples (to detect occurence of
|
161
|
+
# same bnodes _:#1
|
162
|
+
uuid = `uuidgen`
|
163
|
+
|
164
|
+
# add each triple to db
|
165
|
+
@db.transaction do |tr|
|
172
166
|
ntriples.each do |triple|
|
173
167
|
nodes = triple.scan(Node)
|
174
|
-
|
168
|
+
|
169
|
+
# handle bnodes if necessary (bnodes need to have uri generated)
|
170
|
+
subject = case nodes[0]
|
171
|
+
when BNode
|
172
|
+
"<http://www.activerdf.org/bnode/#$1/#{uuid}>"
|
173
|
+
else
|
174
|
+
nodes[0]
|
175
|
+
end
|
176
|
+
|
177
|
+
predicate = nodes[1]
|
178
|
+
|
179
|
+
# handle bnodes and literals if necessary (literals need unicode fixing)
|
180
|
+
object = case nodes[2]
|
181
|
+
when BNode
|
182
|
+
"<http://www.activerdf.org/bnode/#$1/#{uuid}>"
|
183
|
+
when Literal
|
184
|
+
fix_unicode(nodes[2])
|
185
|
+
else
|
186
|
+
nodes[2]
|
187
|
+
end
|
188
|
+
|
189
|
+
add_internal(tr, subject, predicate, object, context)
|
175
190
|
end
|
176
191
|
end
|
177
192
|
|
178
|
-
$log.debug "read #{ntriples.size} triples from file #{file}"
|
179
193
|
@db
|
180
194
|
end
|
181
195
|
|
182
196
|
# executes ActiveRDF query on datastore
|
183
197
|
def query(query)
|
184
|
-
# log received query
|
185
|
-
$log.debug "received query: #{query.to_sp}"
|
186
|
-
|
187
198
|
# construct query clauses
|
188
|
-
sql = translate(query)
|
199
|
+
sql, conditions = translate(query)
|
189
200
|
|
190
201
|
# executing query, passing all where-clause values as parameters (so that
|
191
202
|
# sqlite will encode quotes correctly)
|
192
|
-
constraints =
|
193
|
-
|
194
|
-
$log.debug format("executing: #{sql.gsub('?','"%s"')}", *constraints)
|
203
|
+
#constraints = right_hand_sides.collect { |value| value.to_s }
|
195
204
|
|
196
205
|
# executing query
|
197
|
-
results = @db.execute(sql, *
|
206
|
+
results = @db.execute(sql, *conditions)
|
198
207
|
|
199
208
|
# if ASK query, we check whether we received a positive result count
|
200
209
|
if query.ask?
|
201
|
-
return [results[0][0].to_i > 0]
|
210
|
+
return [[results[0][0].to_i > 0]]
|
211
|
+
elsif query.count?
|
212
|
+
return [[results[0][0].to_i]]
|
202
213
|
else
|
203
214
|
# otherwise we convert results to ActiveRDF nodes and return them
|
204
215
|
return wrap(query, results)
|
@@ -207,22 +218,23 @@ class RDFLite < ActiveRdfAdapter
|
|
207
218
|
|
208
219
|
# translates ActiveRDF query into internal sqlite query string
|
209
220
|
def translate(query)
|
210
|
-
|
211
|
-
|
221
|
+
where, conditions = construct_where(query)
|
222
|
+
[construct_select(query) + construct_join(query) + where + construct_sort(query) + construct_limit(query), conditions]
|
212
223
|
end
|
213
224
|
|
214
225
|
private
|
215
226
|
# constants for extracting resources/literals from sql results
|
227
|
+
BNode = /_:(\S*)/
|
216
228
|
Resource = /<([^>]*)>/
|
217
229
|
Literal = /"([^"]*)"/
|
218
|
-
Node = Regexp.union(
|
219
|
-
|
230
|
+
Node = Regexp.union(/_:\S*/,/<[^>]*>/,/"[^"]*"/)
|
231
|
+
SPOC = ['s','p','o','c']
|
220
232
|
|
221
233
|
# adds s,p,o into sqlite and ferret
|
222
234
|
# s,p,o should be in internal format: <uri> and "literal"
|
223
|
-
def add_internal(s,p,o)
|
235
|
+
def add_internal(db, s, p, o, c)
|
224
236
|
# insert the triple into the datastore
|
225
|
-
|
237
|
+
db.execute('insert into triple values (?,?,?,?)', s,p,o,c)
|
226
238
|
|
227
239
|
# if keyword-search available, insert the object into keyword search
|
228
240
|
@ferret << {:subject => s, :object => o} if keyword_search?
|
@@ -262,6 +274,16 @@ class RDFLite < ActiveRdfAdapter
|
|
262
274
|
clause
|
263
275
|
end
|
264
276
|
|
277
|
+
# sort query results on variable clause (optionally)
|
278
|
+
def construct_sort(query)
|
279
|
+
return "" if query.sort_clauses.empty?
|
280
|
+
|
281
|
+
sort = query.sort_clauses.collect do |term|
|
282
|
+
variable_name(query, term)
|
283
|
+
end
|
284
|
+
" order by (#{sort.join(',')})"
|
285
|
+
end
|
286
|
+
|
265
287
|
# construct join clause
|
266
288
|
# TODO: joins don't work this way, they have to be linear (in one direction
|
267
289
|
# only, and we should only alias tables we didnt alias yet)
|
@@ -298,8 +320,8 @@ class RDFLite < ActiveRdfAdapter
|
|
298
320
|
|
299
321
|
# construct t0,t1,... as aliases for term
|
300
322
|
# and construct join condition, e.g. t0.s
|
301
|
-
termalias = "t#{index /
|
302
|
-
termjoin = "#{termalias}.#{
|
323
|
+
termalias = "t#{index / 4}"
|
324
|
+
termjoin = "#{termalias}.#{SPOC[index % 4]}"
|
303
325
|
|
304
326
|
join = if join_stmt.include?(termalias)
|
305
327
|
""
|
@@ -313,8 +335,8 @@ class RDFLite < ActiveRdfAdapter
|
|
313
335
|
|
314
336
|
# construct t0,t1, etc. as aliases for buddy,
|
315
337
|
# and construct join condition, e.g. t0.s = t1.p
|
316
|
-
buddyalias = "t#{i/
|
317
|
-
buddyjoin = "#{buddyalias}.#{
|
338
|
+
buddyalias = "t#{i/4}"
|
339
|
+
buddyjoin = "#{buddyalias}.#{SPOC[i%4]}"
|
318
340
|
|
319
341
|
# TODO: fix reuse of same table names as aliases, e.g.
|
320
342
|
# "from triple as t1 join triple as t2 on ... join t1 on ..."
|
@@ -345,20 +367,22 @@ class RDFLite < ActiveRdfAdapter
|
|
345
367
|
# collecting all the right-hand sides of where clauses (e.g. where name =
|
346
368
|
# 'abc'), to add to query string later using ?-notation, because then
|
347
369
|
# sqlite will automatically encode quoted literals correctly
|
348
|
-
|
370
|
+
right_hand_sides = []
|
349
371
|
|
350
372
|
# convert each where clause to SQL:
|
351
373
|
# add where clause for each subclause, except if it's a variable
|
352
374
|
query.where_clauses.each_with_index do |clause,level|
|
375
|
+
raise ActiveRdfError, "where clause #{clause} is not a triple" unless clause.is_a?(Array)
|
353
376
|
clause.each_with_index do |subclause, i|
|
354
377
|
# dont add where clause for variables
|
355
378
|
unless subclause.is_a?(Symbol)
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
379
|
+
conditions = compute_where_condition(i, subclause, query.reasoning? && reasoning?)
|
380
|
+
if conditions.size == 1
|
381
|
+
where << "t#{level}.#{SPOC[i]} = ?"
|
382
|
+
right_hand_sides << conditions.first
|
360
383
|
else
|
361
|
-
|
384
|
+
conditions = conditions.collect {|c| "'#{c}'"}
|
385
|
+
where << "t#{level}.#{SPOC[i]} in (#{conditions.join(',')})"
|
362
386
|
end
|
363
387
|
end
|
364
388
|
end
|
@@ -371,18 +395,53 @@ class RDFLite < ActiveRdfAdapter
|
|
371
395
|
raise ActiveRdfError, "cannot do keyword search over multiple subjects" if select_subject.size > 1
|
372
396
|
|
373
397
|
keywords = query.keywords.collect {|subj,key| key}
|
374
|
-
@ferret.search_each("object
|
398
|
+
@ferret.search_each("object:#{keywords}") do |idx,score|
|
375
399
|
subjects << @ferret[idx][:subject]
|
376
400
|
end
|
377
401
|
subjects.uniq! if query.distinct?
|
378
402
|
where << "#{variable_name(query,select_subject.first)} in (#{subjects.collect {'?'}.join(',')})"
|
379
|
-
|
403
|
+
right_hand_sides += subjects
|
380
404
|
end
|
381
405
|
|
382
406
|
if where.empty?
|
383
|
-
''
|
407
|
+
['',[]]
|
408
|
+
else
|
409
|
+
["where " + where.join(' and '), right_hand_sides]
|
410
|
+
end
|
411
|
+
end
|
412
|
+
|
413
|
+
def compute_where_condition(index, subclause, reasoning)
|
414
|
+
conditions = [subclause]
|
415
|
+
|
416
|
+
# expand conditions with rdfs rules if reasoning enabled
|
417
|
+
if reasoning
|
418
|
+
case index
|
419
|
+
when 0: ;
|
420
|
+
# no rule for subjects
|
421
|
+
when 1:
|
422
|
+
# expand properties to include all subproperties
|
423
|
+
conditions = subproperties(subclause) if subclause.respond_to?(:uri)
|
424
|
+
when 2:
|
425
|
+
# no rule for objects
|
426
|
+
when 3:
|
427
|
+
# no rule for contexts
|
428
|
+
end
|
429
|
+
end
|
430
|
+
|
431
|
+
# convert conditions into internal format
|
432
|
+
conditions.collect { |c| c.respond_to?(:uri) ? "<#{c.uri}>" : c.to_s }
|
433
|
+
end
|
434
|
+
|
435
|
+
def subproperties(resource)
|
436
|
+
subproperty = Namespace.lookup(:rdfs,:subPropertyOf)
|
437
|
+
children_query = Query.new.distinct(:sub).where(:sub, subproperty, resource)
|
438
|
+
children_query.reasoning = false
|
439
|
+
children = children_query.execute
|
440
|
+
|
441
|
+
if children.empty?
|
442
|
+
[resource]
|
384
443
|
else
|
385
|
-
|
444
|
+
[resource] + children.collect{|c| subproperties(c)}.flatten.compact
|
386
445
|
end
|
387
446
|
end
|
388
447
|
|
@@ -412,28 +471,31 @@ class RDFLite < ActiveRdfAdapter
|
|
412
471
|
end
|
413
472
|
end
|
414
473
|
|
415
|
-
termtable = "t#{index /
|
416
|
-
termspo =
|
474
|
+
termtable = "t#{index / 4}"
|
475
|
+
termspo = SPOC[index % 4]
|
417
476
|
return "#{termtable}.#{termspo}"
|
418
477
|
end
|
419
478
|
|
420
479
|
# wrap resources into ActiveRDF resources, literals into Strings
|
421
480
|
def wrap(query, results)
|
422
481
|
results.collect do |row|
|
423
|
-
row.collect
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
482
|
+
row.collect { |result| parse(result) }
|
483
|
+
end
|
484
|
+
end
|
485
|
+
|
486
|
+
def parse(result)
|
487
|
+
case result
|
488
|
+
when Resource
|
489
|
+
RDFS::Resource.new($1)
|
490
|
+
when Literal
|
491
|
+
String.new($1)
|
492
|
+
else
|
493
|
+
# when we do a count(*) query we get a number, not a resource/literal
|
494
|
+
result
|
434
495
|
end
|
435
496
|
end
|
436
497
|
|
498
|
+
|
437
499
|
def create_indices(params)
|
438
500
|
sidx = params[:sidx] || false
|
439
501
|
pidx = params[:pidx] || false
|
@@ -457,15 +519,21 @@ class RDFLite < ActiveRdfAdapter
|
|
457
519
|
|
458
520
|
# transform triple into internal format <uri> and "literal"
|
459
521
|
# returns array [s,p,o]
|
460
|
-
def
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
end
|
469
|
-
[s,p,o]
|
522
|
+
def internalise(r)
|
523
|
+
if r.respond_to?(:uri)
|
524
|
+
"<#{r.uri}>"
|
525
|
+
elsif r.is_a?(Symbol)
|
526
|
+
nil
|
527
|
+
else
|
528
|
+
"\"#{r.to_s}\""
|
529
|
+
end
|
470
530
|
end
|
531
|
+
|
532
|
+
# fixes unicode characters in literals (because we parse them wrongly somehow)
|
533
|
+
def fix_unicode(str)
|
534
|
+
tmp = str.gsub(/\\\u([0-9a-fA-F]{4,4})/u){ "U+#$1" }
|
535
|
+
tmp.gsub(/U\+([0-9a-fA-F]{4,4})/u){["#$1".hex ].pack('U*')}
|
536
|
+
end
|
537
|
+
|
538
|
+
public :subproperties
|
471
539
|
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
require 'pp'
|
2
|
+
class SuggestingAdapter < FetchingAdapter
|
3
|
+
ConnectionPool.register_adapter(:suggesting,self)
|
4
|
+
|
5
|
+
alias _old_initialize initialize
|
6
|
+
|
7
|
+
def initialize params
|
8
|
+
_old_initialize(params)
|
9
|
+
@db.execute('drop view if exists occurrence')
|
10
|
+
@db.execute('create view occurrence as select p, count(distinct s) as count from triple group by p')
|
11
|
+
|
12
|
+
@db.execute('drop view if exists cooccurrence')
|
13
|
+
@db.execute('create view cooccurrence as select t0.p as p1,t1.p as p2, count(distinct t0.s) as count from triple as t0 join triple as t1 on t0.s=t1.s and t0.p!=t1.p group by t0.p, t1.p')
|
14
|
+
end
|
15
|
+
|
16
|
+
def suggest(resource)
|
17
|
+
$activerdflog.debug "starting suggestions for #{size} triples"
|
18
|
+
time = Time.now
|
19
|
+
|
20
|
+
predicates = []
|
21
|
+
own_predicates = resource.direct_predicates
|
22
|
+
|
23
|
+
construct_occurrence_matrix
|
24
|
+
construct_cooccurrence_matrix
|
25
|
+
|
26
|
+
own_predicates.each do |p|
|
27
|
+
predicates << p if occurrence(p) > 1
|
28
|
+
end
|
29
|
+
|
30
|
+
# fetch all predicates co-occurring with our predicates
|
31
|
+
candidates = predicates.collect {|p| cooccurring(p) }
|
32
|
+
|
33
|
+
# perform set intersection
|
34
|
+
candidates = candidates.inject {|intersect, n| intersect & n }.flatten
|
35
|
+
candidates = candidates - own_predicates
|
36
|
+
|
37
|
+
suggestions = candidates.collect do |candidate|
|
38
|
+
score = predicates.inject(1.0) do |score, p|
|
39
|
+
score * cooccurrence(candidate, p) / occurrence(p)
|
40
|
+
end
|
41
|
+
[candidate, score]
|
42
|
+
end
|
43
|
+
$activerdflog.debug "suggestions for #{resource} took #{Time.now-time}s"
|
44
|
+
suggestions
|
45
|
+
end
|
46
|
+
|
47
|
+
def construct_occurrence_matrix
|
48
|
+
@occurrence = {}
|
49
|
+
@db.execute('select * from occurrence where count > 1') do |p,count|
|
50
|
+
@occurrence[parse(p)] = count.to_i
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def construct_cooccurrence_matrix
|
55
|
+
@cooccurrence = {}
|
56
|
+
@db.execute('select * from cooccurrence') do |p1, p2, count|
|
57
|
+
@cooccurrence[parse(p1)] ||= {}
|
58
|
+
@cooccurrence[parse(p1)][parse(p2)] = count.to_i
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def occurrence(predicate)
|
63
|
+
@occurrence[predicate] || 0
|
64
|
+
end
|
65
|
+
|
66
|
+
def cooccurrence(p1, p2)
|
67
|
+
@cooccurrence[p1][p2] || 0
|
68
|
+
end
|
69
|
+
|
70
|
+
def cooccurring(predicate)
|
71
|
+
@cooccurrence[predicate].keys
|
72
|
+
end
|
73
|
+
end
|
data/test/test_data.nt
CHANGED
@@ -27,3 +27,6 @@
|
|
27
27
|
<http://www.w3.org/1999/02/22-rdf-syntax-ns#Property> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2000/01/rdf-schema#Resource> .
|
28
28
|
<http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2000/01/rdf-schema#domain> <http://www.w3.org/2000/01/rdf-schema#Resource> .
|
29
29
|
<http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2000/01/rdf-schema#range> <http://www.w3.org/2000/01/rdf-schema#Class> .
|
30
|
+
_:#1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://activerdf.org/test/Person> .
|
31
|
+
_:#1 <http://activerdf.org/test/age> "29" .
|
32
|
+
_:#1 <http://activerdf.org/test/name> "Another Person" .
|
data/test/test_rdflite.rb
CHANGED
@@ -92,8 +92,45 @@ class TestRdfLiteAdapter < Test::Unit::TestCase
|
|
92
92
|
def test_loading_data
|
93
93
|
adapter = ConnectionPool.add_data_source :type => :rdflite
|
94
94
|
adapter.load(File.dirname(File.expand_path(__FILE__)) + '/test_data.nt')
|
95
|
-
assert_equal
|
96
|
-
|
95
|
+
assert_equal 32, adapter.size
|
96
|
+
end
|
97
|
+
|
98
|
+
def test_count_query
|
99
|
+
adapter = ConnectionPool.add_data_source :type => :rdflite
|
100
|
+
adapter.load(File.dirname(File.expand_path(__FILE__)) + '/test_data.nt')
|
101
|
+
assert_kind_of Fixnum, Query.new.count(:s).where(:s,:p,:o).execute
|
102
|
+
assert_equal 32, Query.new.count(:s).where(:s,:p,:o).execute
|
103
|
+
end
|
104
|
+
|
105
|
+
def test_single_context
|
106
|
+
adapter = ConnectionPool.add_data_source :type => :rdflite
|
107
|
+
file = File.dirname(File.expand_path(__FILE__)) + '/test_data.nt'
|
108
|
+
adapter.load(file)
|
109
|
+
|
110
|
+
context = Query.new.distinct(:c).where(:s,:p,:o,:c).execute
|
111
|
+
assert_instance_of RDFS::Resource, context
|
112
|
+
assert_equal RDFS::Resource.new("file:#{file}"), context
|
113
|
+
end
|
114
|
+
|
115
|
+
def test_multiple_context
|
116
|
+
adapter = ConnectionPool.add_data_source :type => :rdflite
|
117
|
+
file = File.dirname(File.expand_path(__FILE__)) + '/test_data.nt'
|
118
|
+
adapter.load(file)
|
119
|
+
file_context = RDFS::Resource.new("file:#{file}")
|
120
|
+
|
121
|
+
eyal = RDFS::Resource.new 'eyaloren.org'
|
122
|
+
age = RDFS::Resource.new 'foaf:age'
|
123
|
+
test = RDFS::Resource.new 'test'
|
124
|
+
adapter.add(eyal, age, test)
|
125
|
+
|
126
|
+
context = Query.new.distinct(:c).where(:s,:p,:o,:c).execute
|
127
|
+
assert_equal file_context, context[0]
|
128
|
+
assert_equal '', context[1]
|
129
|
+
|
130
|
+
n1 = Query.new.distinct(:s).where(:s,:p,:o,'').execute(:flatten => false)
|
131
|
+
n2 = Query.new.distinct(:s).where(:s,:p,:o,file_context).execute(:flatten => false)
|
132
|
+
assert_equal 1, n1.size
|
133
|
+
assert_equal 9, n2.size
|
97
134
|
end
|
98
135
|
|
99
136
|
def test_person_data
|
@@ -119,11 +156,11 @@ class TestRdfLiteAdapter < Test::Unit::TestCase
|
|
119
156
|
def test_delete_data
|
120
157
|
adapter = ConnectionPool.add_data_source :type => :rdflite
|
121
158
|
adapter.load(File.dirname(File.expand_path(__FILE__)) + '/test_data.nt')
|
122
|
-
assert_equal
|
159
|
+
assert_equal 32, adapter.size
|
123
160
|
|
124
161
|
eyal = RDFS::Resource.new('http://activerdf.org/test/eyal')
|
125
162
|
adapter.delete(eyal, nil, nil)
|
126
|
-
assert_equal
|
163
|
+
assert_equal 27, adapter.size
|
127
164
|
|
128
165
|
adapter.delete(nil,nil,nil)
|
129
166
|
assert_equal 0, adapter.size
|
@@ -136,7 +173,17 @@ class TestRdfLiteAdapter < Test::Unit::TestCase
|
|
136
173
|
eyal = RDFS::Resource.new('http://activerdf.org/test/eyal')
|
137
174
|
assert_equal eyal, Query.new.distinct(:s).where(:s,:keyword,"blue").execute
|
138
175
|
assert_equal eyal, Query.new.distinct(:s).where(:s,:keyword,"27").execute
|
139
|
-
|
140
176
|
assert_equal eyal, Query.new.distinct(:s).where(:s,:keyword,"eyal oren").execute
|
141
177
|
end
|
178
|
+
|
179
|
+
def test_bnodes
|
180
|
+
adapter = ConnectionPool.add_data_source :type => :rdflite
|
181
|
+
adapter.load(File.dirname(File.expand_path(__FILE__)) + '/test_data.nt')
|
182
|
+
|
183
|
+
Namespace.register(:test, 'http://activerdf.org/test/')
|
184
|
+
ObjectManager.construct_classes
|
185
|
+
assert_equal 2, TEST::Person.find_all.size
|
186
|
+
assert_equal 29, TEST::Person.find_all[1].age.to_i
|
187
|
+
assert_equal "Another Person", TEST::Person.find_all[1].name
|
188
|
+
end
|
142
189
|
end
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: activerdf_rdflite
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: "1.
|
7
|
-
date: 2006-
|
6
|
+
version: "1.1"
|
7
|
+
date: 2006-12-08 00:00:00 +00:00
|
8
8
|
summary: an RDF database for usage in ActiveRDF (based on sqlite3)
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -37,6 +37,8 @@ files:
|
|
37
37
|
- lib/activerdf_rdflite
|
38
38
|
- lib/activerdf_rdflite/init.rb
|
39
39
|
- lib/activerdf_rdflite/rdflite.rb
|
40
|
+
- lib/activerdf_rdflite/fetching.rb
|
41
|
+
- lib/activerdf_rdflite/suggesting.rb
|
40
42
|
test_files: []
|
41
43
|
|
42
44
|
rdoc_options: []
|